Python torch.nn.DistributedDataParallel() Examples

The following are 2 code examples of torch.nn.DistributedDataParallel(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn , or try the search function

Example #1

Source File: i3d_learner.py From deep-smoke-machine with BSD 3-Clause "New" or "Revised" License

4 votes

def __init__(self,
            use_cuda=None, # use cuda or not
            use_tsm=False, # use the Temporal Shift module or not
            use_nl=False, # use the Non-local module or not
            use_tc=False, # use the Timeception module or not
            use_lstm=False, # use LSTM module or not
            freeze_i3d=False, # freeze i3d layers when training Timeception
            batch_size_train=10, # size for each batch for training
            batch_size_test=50, # size for each batch for testing
            batch_size_extract_features=40, # size for each batch for extracting features
            max_steps=2000, # total number of steps for training
            num_steps_per_update=2, # gradient accumulation (for large batch size that does not fit into memory)
            init_lr=0.1, # initial learning rate
            weight_decay=0.000001, # L2 regularization
            momentum=0.9, # SGD parameters
            milestones=[500, 1500], # MultiStepLR parameters
            gamma=0.1, # MultiStepLR parameters
            num_of_action_classes=2, # currently we only have two classes (0 and 1, which means no and yes)
            num_steps_per_check=50, # the number of steps to save a model and log information
            parallel=True, # use nn.DistributedDataParallel or not
            augment=True, # use data augmentation or not
            num_workers=12, # number of workers for the dataloader
            mode="rgb", # can be "rgb" or "flow" or "rgbd"
            p_frame="../data/rgb/", # path to load video frames
            code_testing=False # a special flag for testing if the code works
            ):
        super().__init__(use_cuda=use_cuda)

        self.use_tsm = use_tsm
        self.use_nl = use_nl
        self.use_tc = use_tc
        self.use_lstm = use_lstm
        self.freeze_i3d = freeze_i3d
        self.batch_size_train = batch_size_train
        self.batch_size_test = batch_size_test
        self.batch_size_extract_features = batch_size_extract_features
        self.max_steps = max_steps
        self.num_steps_per_update = num_steps_per_update
        self.init_lr = init_lr
        self.weight_decay = weight_decay
        self.momentum = momentum
        self.milestones = milestones
        self.gamma = gamma
        self.num_of_action_classes = num_of_action_classes
        self.num_steps_per_check = num_steps_per_check
        self.parallel = parallel
        self.augment = augment
        self.num_workers = num_workers
        self.mode = mode
        self.p_frame = p_frame

        # Internal parameters
        self.image_size = 224 # 224 is the input for the i3d network structure
        self.can_parallel = False

        # Code testing mode
        self.code_testing = code_testing
        if code_testing:
            self.max_steps = 10

Example #2

Source File: cnn_learner.py From deep-smoke-machine with BSD 3-Clause "New" or "Revised" License

4 votes

def __init__(self,
            use_cuda=None, # use cuda or not
            batch_size_train=6, # size for each batch for training
            batch_size_test=40, # size for each batch for testing
            batch_size_extract_features=40, # size for each batch for extracting features
            max_steps=2000, # total number of steps for training
            num_steps_per_update=2, # gradient accumulation (for large batch size that does not fit into memory)
            init_lr=0.01, # initial learning rate
            weight_decay=0.000001, # L2 regularization
            momentum=0.9, # SGD parameters
            milestones=[500, 1500], # MultiStepLR parameters
            gamma=0.1, # MultiStepLR parameters
            num_of_action_classes=2, # currently we only have two classes (0 and 1, which means no and yes)
            num_steps_per_check=50, # the number of steps to save a model and log information
            parallel=True, # use nn.DistributedDataParallel or not
            augment=True, # use data augmentation or not
            num_workers=12, # number of workers for the dataloader
            mode="rgb", # can be "rgb" or "flow"
            p_frame="../data/rgb/", # path to load video frames
            method="cnn", # the method for the model
            freeze_cnn=False, # freeze the CNN model while training or not
            code_testing=False # a special flag for testing if the code works
            ):
        super().__init__(use_cuda=use_cuda)

        self.batch_size_train = batch_size_train
        self.batch_size_test = batch_size_test
        self.batch_size_extract_features = batch_size_extract_features
        self.max_steps = max_steps
        self.num_steps_per_update = num_steps_per_update
        self.init_lr = init_lr
        self.weight_decay = weight_decay
        self.momentum = momentum
        self.milestones = milestones
        self.gamma = gamma
        self.num_of_action_classes = num_of_action_classes
        self.num_steps_per_check = num_steps_per_check
        self.parallel = parallel
        self.augment = augment
        self.num_workers = num_workers
        self.mode = mode
        self.p_frame = p_frame
        self.method = method
        self.freeze_cnn = freeze_cnn

        # Internal parameters
        self.image_size = 224 # 224 is the input for the ResNet18 network structure
        self.can_parallel = False

        # Code testing mode
        self.code_testing = code_testing
        if code_testing:
            self.max_steps = 10