import numpy as np import getopt import sys from glob import glob import os import constants as c from utils import process_clip def process_training_data(num_clips): """ Processes random training clips from the full training data. Saves to TRAIN_DIR_CLIPS by default. @param num_clips: The number of clips to process. Default = 5000000 (set in __main__). @warning: This can take a couple of hours to complete with large numbers of clips. """ num_prev_clips = len(glob(c.TRAIN_DIR_CLIPS + '*')) for clip_num in xrange(num_prev_clips, num_clips + num_prev_clips): clip = process_clip() np.savez_compressed(c.TRAIN_DIR_CLIPS + str(clip_num), clip) if (clip_num + 1) % 100 == 0: print 'Processed %d clips' % (clip_num + 1) def usage(): print 'Options:' print '-n/--num_clips= <# clips to process for training> (Default = 5000000)' print '-t/--train_dir= <Directory of full training frames>' print '-c/--clips_dir= <Save directory for processed clips>' print " (I suggest making this a hidden dir so the filesystem doesn't freeze" print " with so many files. DON'T `ls` THIS DIRECTORY!)" print '-o/--overwrite (Overwrites the previous data in clips_dir)' print '-H/--help (Prints usage)' def main(): ## # Handle command line input ## num_clips = 5000000 try: opts, _ = getopt.getopt(sys.argv[1:], 'n:t:c:oH', ['num_clips=', 'train_dir=', 'clips_dir=', 'overwrite', 'help']) except getopt.GetoptError: usage() sys.exit(2) for opt, arg in opts: if opt in ('-n', '--num_clips'): num_clips = int(arg) if opt in ('-t', '--train_dir'): c.TRAIN_DIR = c.get_dir(arg) if opt in ('-c', '--clips_dir'): c.TRAIN_DIR_CLIPS = c.get_dir(arg) if opt in ('-o', '--overwrite'): c.clear_dir(c.TRAIN_DIR_CLIPS) if opt in ('-H', '--help'): usage() sys.exit(2) # set train frame dimensions assert os.path.exists(c.TRAIN_DIR) c.FULL_HEIGHT, c.FULL_WIDTH = c.get_train_frame_dims() ## # Process data for training ## process_training_data(num_clips) if __name__ == '__main__': main()