Walk Forward Optimization & Scikit-Learn

After looking over Scikit-Learns documention, I realized that there wasn’t a cross validation iterator for walk forward optimization.

Someone on Github attempted to merge this feature, but I thought the implementation was too complicated.

So I wrote my own:

class WalkForward(object):
    def __init__(self, timestamps, fold_count=1, pretraining_size=None, is_verbose=False):

        self.n                = len(timestamps)
        self.fold_count       = fold_count
        self.pretraining_size = pretraining_size or int(self.n / (self.fold_count + 1.0))
        self.is_verbose       = is_verbose

    def __iter__(self):

        cv_end      = self.pretraining_size
        fold_length = math.ceil(float(self.n - self.pretraining_size) / self.fold_count)

        for k in range(self.fold_count):

            cv_start = cv_end
            cv_end   = min(int(cv_start + fold_length), self.n)

            train = np.arange(0, cv_start)
            cv    = np.arange(cv_start, cv_end)

            if self.is_verbose:
                print len(train), len(cv)

            yield train, cv

    def __len__(self):
        if self.pretraining_size > 0:
            return self.fold_count + 1
        else:
            return self.fold_count