Python cntk.Trainer() Examples

The following are 5 code examples of cntk.Trainer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module cntk , or try the search function

Example #1

Source File: cart_pole_dqn_cntk.py From ai-gym with MIT License

6 votes

def __init__(self):

        #### Construct the model ####
        observation = cntk.ops.input_variable(STATE_DIM, np.float32, name="s")
        q_target = cntk.ops.input_variable(NUM_ACTIONS, np.float32, name="q")

        # Define the structure of the neural network
        self.model = self.create_multi_layer_neural_network(observation, NUM_ACTIONS, 2)

        #### Define the trainer ####
        self.learning_rate = cntk.learner.training_parameter_schedule(0.0001, cntk.UnitType.sample)
        self.momentum = cntk.learner.momentum_as_time_constant_schedule(0.99)

        self.loss =  cntk.ops.reduce_mean(cntk.ops.square(self.model - q_target), axis=0)
        mean_error = cntk.ops.reduce_mean(cntk.ops.square(self.model - q_target), axis=0)

        learner = cntk.adam_sgd(self.model.parameters, self.learning_rate, momentum=self.momentum)
        self.trainer = cntk.Trainer(self.model, self.loss, mean_error, learner)

Example #2

Source File: atari_breakout_dqn_cntk.py From ai-gym with MIT License

6 votes

def __init__(self):

        #### Construct the model ####
        observation = cntk.ops.input_variable(STATE_DIMS, np.float32, name="s")
        q_target = cntk.ops.input_variable(NUM_ACTIONS, np.float32, name="q")

        # Define the structure of the neural network
        self.model = self.create_convolutional_neural_network(observation, NUM_ACTIONS)

        #### Define the trainer ####
        self.learning_rate = cntk.learner.training_parameter_schedule(0.0001, cntk.UnitType.sample)
        self.momentum = cntk.learner.momentum_as_time_constant_schedule(0.99)

        self.loss =  cntk.ops.reduce_mean(cntk.ops.square(self.model - q_target), axis=0)
        mean_error = cntk.ops.reduce_mean(cntk.ops.square(self.model - q_target), axis=0)

        learner = cntk.adam_sgd(self.model.parameters, self.learning_rate, momentum=self.momentum)
        self.trainer = cntk.Trainer(self.model, self.loss, mean_error, learner)

Example #3

Source File: test_ops_compoud.py From ngraph-python with Apache License 2.0

5 votes

def _create_model_and_execute_test(params):
    # Create CNTK model
    input_var = C.input_variable(params['input_dim'], np.float32)
    params['input_var'] = input_var
    params['act_fun'] = C.layers.blocks.identity
    params['init_fun'] = C.glorot_uniform()

    model = params['create_model'](params)

    label_var = C.input_variable((params['label_dim']), np.float32)
    loss = C.cross_entropy_with_softmax(model, label_var)
    eval_error = C.classification_error(model, label_var)

    lr_schedule = C.learning_rate_schedule(0.05, C.UnitType.minibatch)
    learner = C.sgd(model.parameters, lr_schedule)
    trainer = C.Trainer(model, (loss, eval_error), [learner])

    input_value, label_value = _generate_random_sample(
        params['batch_size'],
        params['input_dim'],
        params['label_dim']
    )

    # Import to ngraph
    ng_loss, placeholders = CNTKImporter(batch_size=params['batch_size']).import_model(loss)
    parallel_update = CommonSGDOptimizer(0.05).minimize(ng_loss, ng_loss.variables())

    transformer = ng.transformers.make_transformer()
    update_fun = transformer.computation([ng_loss, parallel_update], *placeholders)

    # Execute on CNTK
    trainer.train_minibatch({input_var: input_value, label_var: label_value})
    cntk_ret = trainer.previous_minibatch_loss_average

    # Execute on ngraph
    input_value = np.moveaxis(input_value, 0, -1)
    label_value = np.moveaxis(label_value, 0, -1)
    ng_ret = update_fun(input_value, label_value)[0]

    return cntk_ret, ng_ret

Example #4

Source File: train_end2end.py From end2end_AU_speech with MIT License

4 votes

def build_graph(config):
    assert(config['type'] in ["cnn", "lstm", "gru", "bilstm", "bigru"])
    if config["type"] == "cnn":
        # static model
        features = C.input_variable(input_dim_model, name="input")
        labels = C.input_variable(label_dim, name="label")
    else:
        # recurrent model
        features = C.sequence.input_variable(input_dim_model, name="input")
        labels = C.sequence.input_variable(label_dim, name="label")
    netoutput = create_model(features, config["type"], config["encoder"], config["pretrained_model"], config["e3_clone"])

    if config["l2_loss_type"] == 1:
        print("Use standard l2 loss")
        ce = l2_loss(netoutput, labels)
    elif config["l2_loss_type"] == 2:
        print("Use variance normalized l2 loss")
        ce = std_normalized_l2_loss(netoutput, labels)
    else:
        raise ValueError("Unsupported loss type")

    # enforce sparsity output
    if config["l1_reg"] > sys.float_info.epsilon:
        ce = ce + config["l1_reg"] * l1_reg_loss(netoutput)
    
    # performance metrics
    pe = C.squared_error(netoutput, labels)

    if config["constlr"]:
        lr_schedule = config["lr"]
    else:
        if config["lr_list"] is not None:
            print("use learning rate schedule from file")
            lr_schedule = config["lr_list"]
        else:
            if config["type"] != "cnn": # default learning rate for recurrent model
                lr_schedule = [0.005] + [0.0025]*2 + [0.001]*4 + [0.0005]*8 + [0.00025]*16 + [0.0001]*1000 + [0.00005]*1000 + [0.000025]
            elif config["lr_schedule"] == 1: # learning rate for CNN
                lr_schedule = [0.005] + [0.0025]*2 + [0.00125]*3 + [0.0005]*4 + [0.00025]*5 + [0.0001]
            elif config["lr_schedule"] == 2:
                lr_schedule = [0.005] + [0.0025]*2 + [0.00125]*3 + [0.0005]*4 + [0.00025]*5 + [0.0001]*100 + [0.00005]*50 + [0.000025]*50 + [0.00001]
            else:
                raise ValueError("unknown learning rate")
    learning_rate = C.learning_parameter_schedule_per_sample(lr_schedule, epoch_size=config["epoch_size"])
    momentum_schedule = C.momentum_schedule(0.9, minibatch_size=300)
    
    learner = C.adam(netoutput.parameters, lr=learning_rate, momentum=momentum_schedule,
                        l2_regularization_weight=0.0001,
                        gradient_clipping_threshold_per_sample=3.0, gradient_clipping_with_truncation=True)
    trainer = C.Trainer(netoutput, (ce, pe), [learner])

    return features, labels, netoutput, trainer


#-----------------------------------
# training procedure
#-----------------------------------

# create reader

Example #5

Source File: language_understanding.py From nlp-services with MIT License

4 votes

def train(self, x, y, reader, model_func, max_epochs=10, task="slot_tagging"):
        log.info("Training...")

        # Instantiate the model function; x is the input (feature) variable
        model = model_func(x)

        # Instantiate the loss and error function
        loss, label_error = self.create_criterion_function_preferred(model, y)

        # training config
        epoch_size = 18000  # 18000 samples is half the dataset size
        minibatch_size = 70

        # LR schedule over epochs
        # In CNTK, an epoch is how often we get out of the minibatch loop to
        # do other stuff (e.g. checkpointing, adjust learning rate, etc.)
        lr_per_sample = [3e-4] * 4 + [1.5e-4]
        lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
        lr_schedule = C.learning_parameter_schedule(lr_per_minibatch, epoch_size=epoch_size)

        # Momentum schedule
        momentums = C.momentum_schedule(0.9048374180359595, minibatch_size=minibatch_size)

        # We use a the Adam optimizer which is known to work well on this dataset
        # Feel free to try other optimizers from
        # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
        learner = C.adam(
            parameters=model.parameters,
            lr=lr_schedule,
            momentum=momentums,
            gradient_clipping_threshold_per_sample=15,
            gradient_clipping_with_truncation=True)

        # Setup the progress updater
        progress_printer = C.logging.ProgressPrinter(tag="Training", num_epochs=max_epochs)

        # Instantiate the trainer
        trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

        # process mini batches and perform model training
        C.logging.log_number_of_parameters(model)

        # Assign the data fields to be read from the input
        if task == "slot_tagging":
            data_map = {x: reader.streams.query, y: reader.streams.slot_labels}
        else:
            data_map = {x: reader.streams.query, y: reader.streams.intent}

        t = 0
        for epoch in range(max_epochs):  # loop over epochs
            epoch_end = (epoch + 1) * epoch_size
            while t < epoch_end:  # loop over mini batches on the epoch
                data = reader.next_minibatch(minibatch_size, input_map=data_map)  # fetch mini batch
                trainer.train_minibatch(data)  # update model with it
                t += data[y].num_samples  # samples so far
            trainer.summarize_training_progress()

        return model