Python utils.batchify() Examples

The following are 7 code examples of utils.batchify(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module utils , or try the search function .
Example #1
Source File: eval.py    From DREAM with MIT License 6 votes vote down vote up
def eval_pred(dr_model, ub):
    '''
        evaluate dream model for predicting next basket on all training users
        in batches
    '''
    item_embedding = dr_model.encode.weight
    dr_model.eval()
    dr_hidden = dr_model.init_hidden(dr_model.config.batch_size)
    start_time = time()
    id_u, score_u = [], [] # user's id, user's score
    num_batchs = ceil(len(ub) / dr_model.config.batch_size)
    for i,x in enumerate(batchify(ub, dr_model.config.batch_size)):
        print(i)
        baskets, lens, uids = x
        _, dynamic_user, _ = dr_model(baskets, lens, dr_hidden)# shape: batch_size, max_len, embedding_size
        dr_hidden = repackage_hidden(dr_hidden)
        for i,l,du in zip(uids, lens, dynamic_user):
            du_latest = du[l - 1].unsqueeze(0) # shape: 1, embedding_size
            score_up = torch.mm(du_latest, item_embedding.t()) # shape: 1, num_item
            score_u.append(score_up.cpu().data.numpy())
            id_u.append(i)
    elapsed = time() - start_time 
    print('[Predicting] Elapsed: {02.2f}'.format(elapsed))
    return score_ub, id_u 
Example #2
Source File: train.py    From DREAM with MIT License 6 votes vote down vote up
def evaluate_reorder_dream():
    dr_model.eval()
    dr_hidden = dr_model.init_hidden(dr_config.batch_size)

    total_loss = 0
    start_time = time()
    num_batchs = ceil(len(test_ub) / dr_config.batch_size)
    for i, x in enumerate(batchify(test_ub, dr_config.batch_size, is_reordered=True)):
        baskets, lens, _, r_baskets, h_baskets = x
        dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
        loss = reorder_bpr_loss(r_baskets, h_baskets, dynamic_user, dr_model.encode.weight, dr_config)
        dr_hidden = repackage_hidden(dr_hidden)
        total_loss += loss.data

    # Logging
    elapsed = (time() - start_time) * 1000 / num_batchs
    total_loss = total_loss[0] / num_batchs / dr_config.batch_size
    print('[Evaluation]| Epochs {:3d} | Elapsed {:02.2f} | Loss {:05.2f} |'.format(epoch, elapsed, total_loss))
    return total_loss 
Example #3
Source File: ptb_main.py    From online-normalization with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def main(args):
    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    print('=> load data...')
    corpus = data.Corpus(args.data)

    eval_batch_size = 10
    train_loader = batchify(corpus.train, args.batch_size, device)
    val_loader = batchify(corpus.valid, eval_batch_size, device)

    ntokens = len(corpus.dictionary)
    main_worker(train_loader, val_loader, ntokens, args, device) 
Example #4
Source File: eval.py    From DREAM with MIT License 5 votes vote down vote up
def eval_batch(dr_model, ub, up, batch_size, is_reordered = False):
    '''
        Using dr_model to predict (u,p) score in batch
        Parameters:
        - ub: users' baskets
        - up: users' history purchases
        - batch_size
    '''
    # turn on evaluation mode
    dr_model.eval()
    is_cuda =  dr_model.config.cuda
    item_embedding = dr_model.encode.weight
    dr_hidden = dr_model.init_hidden(batch_size)

    id_u, item_u, score_u, dynam_u = [], [], [], []
    start_time = time()
    num_batchs = ceil(len(ub) / batch_size)
    for i, x in enumerate(batchify(ub, batch_size, is_reordered)):
        if is_reordered is True:
            baskets, lens, uids, r_baskets, h_baskets = x
        else:
            baskets, lens, uids = x
        dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
        for uid, l, du in zip(uids, lens, dynamic_user):
            du_latest =  du[l - 1].unsqueeze(0)
            # calculating <u,p> score for all history <u,p> pair 
            history_item = [int(i) for i in up[up.user_id == uid]['product_id'].values[0]]
            history_item = torch.cuda.LongTensor(history_item) if is_cuda else torch.LongTensor(history_item)
            score_up = torch.mm(du_latest, item_embedding[history_item].t()).cpu().data.numpy()[0]
            id_u.append(uid), dynam_u.append(du_latest.cpu().data.numpy()[0]), item_u.append(history_item.cpu().numpy()),score_u.append(score_up)
        # Logging
        elapsed = time() - start_time; start_time = time()
        print('[Predicting]| Batch {:5d} / {:5d} | seconds/batch {:02.02f}'.format(i, num_batchs, elapsed))
    return id_u, item_u, score_u, dynam_u 
Example #5
Source File: eval.py    From DREAM with MIT License 5 votes vote down vote up
def get_dynamic_u(uid, dr_model, ub, dr_hidden):
    '''
        get latest dynamic representation of user uid
        dr_hidden must be provided as global variable
    '''
    for i,x in enumerate(batchify(ub, 1)):
        baskets, lens, uids = x
        _, dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
    return dynamic_user[0][lens[0] - 1].unsqueeze(0) 
Example #6
Source File: train.py    From DREAM with MIT License 4 votes vote down vote up
def train_dream():
    dr_model.train()  # turn on training mode for dropout
    dr_hidden = dr_model.init_hidden(dr_config.batch_size)
    total_loss = 0
    start_time = time()
    num_batchs = ceil(len(train_ub) / dr_config.batch_size)
    for i, x in enumerate(batchify(train_ub, dr_config.batch_size)):
        baskets, lens, _ = x
        dr_hidden = repackage_hidden(dr_hidden)  # repackage hidden state for RNN
        dr_model.zero_grad()  # optim.zero_grad()
        dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
        loss = bpr_loss(baskets, dynamic_user, dr_model.encode.weight, dr_config)
        loss.backward()

        # Clip to avoid gradient exploding
        torch.nn.utils.clip_grad_norm(dr_model.parameters(), dr_config.clip)

        # Parameter updating
        # manual SGD
        # for p in dr_model.parameters(): # Update parameters by -lr*grad
        #    p.data.add_(- dr_config.learning_rate, p.grad.data)
        # adam
        grad_norm = get_grad_norm(dr_model)
        previous_params = deepcopy(list(dr_model.parameters()))
        optim.step()

        total_loss += loss.data
        params = deepcopy(list(dr_model.parameters()))
        delta = get_weight_update(previous_params, params)
        weight_update_ratio = get_ratio_update(delta, params)

        # Logging
        if i % dr_config.log_interval == 0 and i > 0:
            elapsed = (time() - start_time) * 1000 / dr_config.log_interval
            cur_loss = total_loss[0] / dr_config.log_interval / dr_config.batch_size  # turn tensor into float
            total_loss = 0
            start_time = time()
            print(
                '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.2f} |'.format(epoch, i,
                                                                                                              num_batchs,
                                                                                                              elapsed,
                                                                                                              cur_loss))
            writer.add_scalar('model/train_loss', cur_loss, epoch * num_batchs + i)
            writer.add_scalar('model/grad_norm', grad_norm, epoch * num_batchs + i)
            writer.add_scalar('model/weight_update_ratio', weight_update_ratio, epoch * num_batchs + i) 
Example #7
Source File: train.py    From DREAM with MIT License 4 votes vote down vote up
def train_reorder_dream():
    dr_model.train()  # turn on training mode for dropout
    dr_hidden = dr_model.init_hidden(dr_config.batch_size)

    total_loss = 0
    start_time = time()
    num_batchs = ceil(len(train_ub) / dr_config.batch_size)
    for i, x in enumerate(batchify(train_ub, dr_config.batch_size, is_reordered=True)):
        baskets, lens, ids, r_baskets, h_baskets = x
        dr_hidden = repackage_hidden(dr_hidden)  # repackage hidden state for RNN
        dr_model.zero_grad()  # optim.zero_grad()
        dynamic_user, _ = dr_model(baskets, lens, dr_hidden)
        loss = reorder_bpr_loss(r_baskets, h_baskets, dynamic_user, dr_model.encode.weight, dr_config)

        try:
            loss.backward()
        except RuntimeError:  # for debugging
            print('caching')
            tmp = {'baskets': baskets, 'ids': ids, 'r_baskets': r_baskets, 'h_baskets': h_baskets,
                   'dynamic_user': dynamic_user, 'item_embedding': dr_model.encode.weight}
            print(baskets)
            print(ids)
            print(r_baskets)
            print(h_baskets)
            print(dr_model.encode.weight)
            print(dynamic_user.data)
            with open('tmp.pkl', 'wb') as f:
                pickle.dump(tmp, f, pickle.HIGHEST_PROTOCOL)
            break

        # Clip to avoid gradient exploding
        torch.nn.utils.clip_grad_norm(dr_model.parameters(), dr_config.clip)

        # Parameter updating
        # manual SGD
        # for p in dr_model.parameters(): # Update parameters by -lr*grad
        #    p.data.add_(- dr_config.learning_rate, p.grad.data)
        # adam
        grad_norm = get_grad_norm(dr_model)
        previous_params = deepcopy(list(dr_model.parameters()))
        optim.step()

        total_loss += loss.data
        params = deepcopy(list(dr_model.parameters()))
        delta = get_weight_update(previous_params, params)
        weight_update_ratio = get_ratio_update(delta, params)

        # Logging
        if i % dr_config.log_interval == 0 and i > 0:
            elapsed = (time() - start_time) * 1000 / dr_config.log_interval
            cur_loss = total_loss[0] / dr_config.log_interval / dr_config.batch_size # turn tensor into float
            total_loss = 0
            start_time = time()
            print(
                '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.2f} |'.format(epoch, i,
                                                                                                              num_batchs,
                                                                                                              elapsed,
                                                                                                              cur_loss))