Python torch.nn.CrossEntropyLoss() Examples

The following are 30 code examples of torch.nn.CrossEntropyLoss(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn , or try the search function .
Example #1
Source File: modeling.py    From BERT-for-Chinese-Question-Answering with Apache License 2.0 7 votes vote down vote up
def forward(self, input_ids, token_type_ids, attention_mask, labels=None):
        pooled_outputs = []
        for i in range(input_ids.size(1)):
            _, pooled_output = self.bert(input_ids[:, i, :], token_type_ids[:, i, :], attention_mask[:, i, :])
            pooled_output = self.dropout(pooled_output)
            pooled_outputs.append(pooled_output.unsqueeze_(1))

        logits = self.classifier(torch.cat(pooled_outputs, 1).view(-1, self.hidden_size))
        logits = logits.view(-1, input_ids.size(1))

        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits, labels)
            return loss, logits
        else:
            return logits 
Example #2
Source File: train.py    From pytorch-multigpu with MIT License 7 votes vote down vote up
def main():
    best_acc = 0

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    print('==> Preparing data..')
    transforms_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

    dataset_train = CIFAR10(root='../data', train=True, download=True, 
                            transform=transforms_train)

    train_loader = DataLoader(dataset_train, batch_size=args.batch_size, 
                              shuffle=True, num_workers=args.num_worker)

    # there are 10 classes so the dataset name is cifar-10
    classes = ('plane', 'car', 'bird', 'cat', 'deer', 
               'dog', 'frog', 'horse', 'ship', 'truck')

    print('==> Making model..')

    net = pyramidnet()
    net = nn.DataParallel(net)
    net = net.to(device)
    num_params = sum(p.numel() for p in net.parameters() if p.requires_grad)
    print('The number of parameters of model is', num_params)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=args.lr)
    # optimizer = optim.SGD(net.parameters(), lr=args.lr, 
    #                       momentum=0.9, weight_decay=1e-4)
    
    train(net, criterion, optimizer, train_loader, device) 
Example #3
Source File: Patient2Vec.py    From Patient2Vec with MIT License 6 votes vote down vote up
def get_loss(pred, y, criterion, mtr, a=0.5):
    """
    To calculate loss
    :param pred: predicted value
    :param y: actual value
    :param criterion: nn.CrossEntropyLoss
    :param mtr: beta matrix
    """
    mtr_t = torch.transpose(mtr, 1, 2)
    aa = torch.bmm(mtr, mtr_t)
    loss_fn = 0
    for i in range(aa.size()[0]):
        aai = torch.add(aa[i, ], Variable(torch.neg(torch.eye(mtr.size()[1]))))
        loss_fn += torch.trace(torch.mul(aai, aai).data)
    loss_fn /= aa.size()[0]
    loss = torch.add(criterion(pred, y), Variable(torch.FloatTensor([loss_fn * a])))
    return loss 
Example #4
Source File: modeling.py    From cmrc2019 with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None, start_positions=None, end_positions=None):
        sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)
        logits = self.qa_outputs(sequence_output)
        start_logits, end_logits = logits.split(1, dim=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        if start_positions is not None and end_positions is not None:
            # If we are on multi-GPU, split add a dimension
            if len(start_positions.size()) > 1:
                start_positions = start_positions.squeeze(-1)
            if len(end_positions.size()) > 1:
                end_positions = end_positions.squeeze(-1)
            # sometimes the start/end positions are outside our model inputs, we ignore these terms
            ignored_index = start_logits.size(1)
            start_positions.clamp_(0, ignored_index)
            end_positions.clamp_(0, ignored_index)

            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
            start_loss = loss_fct(start_logits, start_positions)
            end_loss = loss_fct(end_logits, end_positions)
            total_loss = (start_loss + end_loss) / 2
            return total_loss
        else:
            return start_logits, end_logits 
Example #5
Source File: run_cmrc2019_baseline.py    From cmrc2019 with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None, answer_mask=None,positions=None):
        sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)
        answer_mask = answer_mask.to(dtype=next(self.parameters()).dtype)
        logits = self.qa_outputs(sequence_output).squeeze(-1)
        #logits = logits*answer_mask_
        logits = logits + (1-answer_mask) * -10000.0

        if positions is not None:
            # If we are on multi-GPU, split add a dimension
            if len(positions.size()) > 1:
                positions = positions.squeeze(-1)
            # sometimes the positions are outside our model inputs, we ignore these terms
            ignored_index = logits.size(1)
            positions.clamp_(0, ignored_index)

            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
            total_loss = loss_fct(logits, positions)
            return total_loss
        else:
            return logits 
Example #6
Source File: utils.py    From deep-learning-note with MIT License 6 votes vote down vote up
def train_cnn(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print('training on', device)
    loss = nn.CrossEntropyLoss()
    batch_count = 0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()

            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec' %
              (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc, time.time() - start)) 
Example #7
Source File: trainer.py    From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0 6 votes vote down vote up
def _loss(self, learning_algorithm, label_paddingId, use_crf=False):
        """
        :param learning_algorithm:
        :param label_paddingId:
        :param use_crf:
        :return:
        """
        if use_crf:
            loss_function = self.model.crf_layer.neg_log_likelihood_loss
            return loss_function
        elif learning_algorithm == "SGD":
            loss_function = nn.CrossEntropyLoss(ignore_index=label_paddingId, reduction="sum")
            return loss_function
        else:
            loss_function = nn.CrossEntropyLoss(ignore_index=label_paddingId, reduction="mean")
            return loss_function 
Example #8
Source File: loss.py    From overhaul-distillation with MIT License 6 votes vote down vote up
def FocalLoss(self, logit, target, gamma=2, alpha=0.5):
        n, c, h, w = logit.size()
        criterion = nn.CrossEntropyLoss(weight=self.weight, ignore_index=self.ignore_index,
                                        size_average=self.size_average)
        if self.cuda:
            criterion = criterion.cuda()

        logpt = -criterion(logit, target.long())
        pt = torch.exp(logpt)
        if alpha is not None:
            logpt *= alpha
        loss = -((1 - pt) ** gamma) * logpt

        if self.batch_average:
            loss /= n

        return loss 
Example #9
Source File: 53_machine_translation.py    From deep-learning-note with MIT License 6 votes vote down vote up
def train(encoder, decoder, dataset, lr, batch_size, num_epochs):
    enc_optimizer = torch.optim.Adam(encoder.parameters(), lr=lr)
    dec_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)

    loss = nn.CrossEntropyLoss(reduction='none')
    data_iter = Data.DataLoader(dataset, batch_size, shuffle=True)
    for epoch in range(num_epochs):
        l_sum = 0.0
        start = time.time()
        for X, Y in data_iter:
            enc_optimizer.zero_grad()
            dec_optimizer.zero_grad()
            l = batch_loss(encoder, decoder, X, Y, loss)
            l.backward()
            enc_optimizer.step()
            dec_optimizer.step()
            l_sum += l.item()
        if (epoch + 1) % 10 == 0:
            print("epoch %d, loss %.3f, time: %.1f sec" % (epoch + 1, l_sum / len(data_iter), time.time() - start)) 
Example #10
Source File: main.py    From transferlearning with MIT License 6 votes vote down vote up
def test(model, data_tar, e):
    total_loss_test = 0
    correct = 0
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for batch_id, (data, target) in enumerate(data_tar):
            data, target = data.view(-1,28 * 28).to(DEVICE),target.to(DEVICE)
            model.eval()
            ypred, _, _ = model(data, data)
            loss = criterion(ypred, target)
            pred = ypred.data.max(1)[1]  # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
            total_loss_test += loss.data
        accuracy = correct * 100. / len(data_tar.dataset)
        res = 'Test: total loss: {:.6f}, correct: [{}/{}], testing accuracy: {:.4f}%'.format(
            total_loss_test, correct, len(data_tar.dataset), accuracy
        )
    tqdm.write(res)
    RESULT_TEST.append([e, total_loss_test, accuracy])
    log_test.write(res + '\n') 
Example #11
Source File: components.py    From interpret-text with MIT License 6 votes vote down vote up
def __init__(self, args, model):
        """Initialize an instance of the wrapper

        :param args: arguments containing training and structure parameters
        :type args: ModelArguments
        :param model: A classifier module, ex. BERT or RNN classifier module
        :type model: BertForSequenceClassification or ClassifierModule
        """
        self.args = args
        self.model = model
        self.opt = None

        self.num_epochs = args.num_pretrain_epochs
        self.epochs_since_improv = 0
        self.best_test_acc = 0
        self.avg_accuracy = 0
        self.test_accs = []
        self.train_accs = []

        self.loss_func = nn.CrossEntropyLoss(reduction="none") 
Example #12
Source File: bert_tagger.py    From mrc-for-flat-nested-ner with Apache License 2.0 6 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None, 
        labels=None, input_mask=None):

        last_bert_layer, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, \
            output_all_encoded_layers=False)
        last_bert_layer = last_bert_layer.view(-1, self.hidden_size)
        last_bert_layer = self.dropout(last_bert_layer)
        logits = self.classifier(last_bert_layer) 

        if labels is not None:
            loss_fct = CrossEntropyLoss()
            if input_mask is not None:
                masked_logits = torch.masked_select(logits, input_mask)
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) 
            else:
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            return loss 
        else:
            return logits 
Example #13
Source File: bert_mrc_ner.py    From mrc-for-flat-nested-ner with Apache License 2.0 6 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None, 
        start_positions=None, end_positions=None):

        sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)
        sequence_output = sequence_output.view(-1, self.hidden_size)  
        
        start_logits = self.start_outputs(sequence_output) 
        end_logits = self.end_outputs(sequence_output) 


        if start_positions is not None and end_positions is not None:
            loss_fct = CrossEntropyLoss() 
        
            start_loss = loss_fct(start_logits.view(-1, 2), start_positions.view(-1))
            end_loss = loss_fct(end_logits.view(-1, 2), end_positions.view(-1))
            total_loss = start_loss + end_loss + span_loss
            # total_loss = (start_loss + end_loss) / 2 
            return total_loss 
        else:
            return start_logits, end_logits 
Example #14
Source File: __init__.py    From dfw with MIT License 6 votes vote down vote up
def get_loss(args):
    if args.loss == 'svm':
        loss_fn = MultiClassHingeLoss()
    elif args.loss == 'ce':
        loss_fn = nn.CrossEntropyLoss()
    else:
        raise ValueError

    print('L2 regularization: \t {}'.format(args.l2))
    print('\nLoss function:')
    print(loss_fn)

    if args.cuda:
        loss_fn = loss_fn.cuda()

    return loss_fn 
Example #15
Source File: SemBranch.py    From Semantic-Aware-Scene-Recognition with MIT License 6 votes vote down vote up
def __init__(self, scene_classes, semantic_classes=151):
        super(SemBranch, self).__init__()

        # Semantic Branch
        self.in_block_sem = nn.Sequential(
            nn.Conv2d(semantic_classes + 1, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        )
        self.in_block_sem_1 = BasicBlockSem(64, 128, kernel_size=3, stride=2, padding=1)
        self.in_block_sem_2 = BasicBlockSem(128, 256, kernel_size=3, stride=2, padding=1)
        self.in_block_sem_3 = BasicBlockSem(256, 512, kernel_size=3, stride=2, padding=1)

        # Semantic Scene Classification Layers
        self.dropout = nn.Dropout(0.3)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc_SEM = nn.Linear(512, scene_classes)

        # Loss
        self.criterion = nn.CrossEntropyLoss() 
Example #16
Source File: utility_functions.py    From MaskTrack with MIT License 6 votes vote down vote up
def cross_entropy_loss_weighted(output, labels):

    temp = labels.data.cpu().numpy()
    freqCount = scipystats.itemfreq(temp)
    total = freqCount[0][1]+freqCount[1][1]
    perc_1 = freqCount[1][1]/total
    perc_0 = freqCount[0][1]/total

    weight_array = [perc_1, perc_0]

    if torch.cuda.is_available():
        weight_tensor = torch.FloatTensor(weight_array).cuda()
    else:
        weight_tensor = torch.FloatTensor(weight_array)

    ce_loss = nn.CrossEntropyLoss(weight=weight_tensor)
    images, channels, height, width = output.data.shape
    loss = ce_loss(output, labels.long().view(images, height, width))
    return loss 
Example #17
Source File: loss.py    From Fast_Seg with Apache License 2.0 6 votes vote down vote up
def __init__(self, ignore_label, reduction='elementwise_mean', thresh=0.6, min_kept=256,
                 down_ratio=1, use_weight=False):
        super(OhemCrossEntropy2dTensor, self).__init__()
        self.ignore_label = ignore_label
        self.thresh = float(thresh)
        self.min_kept = int(min_kept)
        self.down_ratio = down_ratio
        if use_weight:
            weight = torch.FloatTensor(
                [0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489,
                 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955,
                 1.0865, 1.1529, 1.0507])
            self.criterion = torch.nn.CrossEntropyLoss(reduction=reduction,
                                                       weight=weight,
                                                       ignore_index=ignore_label)
        else:
            self.criterion = torch.nn.CrossEntropyLoss(reduction=reduction,
                                                       ignore_index=ignore_label) 
Example #18
Source File: modeling.py    From cmrc2019 with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
        _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            return loss
        else:
            return logits 
Example #19
Source File: loss_funcs.py    From mrc-for-flat-nested-ner with Apache License 2.0 5 votes vote down vote up
def cross_entropy_loss():
    # loss 
    loss = nn.CrossEntropyLoss()
    input = torch.randn(3, 5, requires_grad=True)
    target = torch.empty(3, dtype=torch.long).random_(5)
    output = loss(input, target)
    output.backward() 
Example #20
Source File: bert_basic_layer.py    From mrc-for-flat-nested-ner with Apache License 2.0 5 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None,
                next_sentence_label=None):
        sequence_output, pooled_output, attn = self.bert(input_ids, token_type_ids, attention_mask,
                                                         output_all_encoded_layers=False)
        prediction_scores, seq_relationship_score = self.cls(sequence_output, pooled_output)

        if masked_lm_labels is not None and next_sentence_label is not None:
            loss_fct = CrossEntropyLoss(ignore_index=-1)
            masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1))
            next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1))
            total_loss = masked_lm_loss + next_sentence_loss
            return total_loss, attn
        else:
            return prediction_scores, seq_relationship_score, attn 
Example #21
Source File: modeling.py    From cmrc2019 with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
        flat_input_ids = input_ids.view(-1, input_ids.size(-1))
        flat_token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1))
        flat_attention_mask = attention_mask.view(-1, attention_mask.size(-1))
        _, pooled_output = self.bert(flat_input_ids, flat_token_type_ids, flat_attention_mask, output_all_encoded_layers=False)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        reshaped_logits = logits.view(-1, self.num_choices)

        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(reshaped_logits, labels)
            return loss
        else:
            return reshaped_logits 
Example #22
Source File: bert_qa.py    From mrc-for-flat-nested-ner with Apache License 2.0 5 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None, 
        start_positions=None, end_positions=None):

        sequence_output, _ = self.bert(input_ids, token_type_ids, 
            attention_mask, output_all_encoded_layers=False)
        logits = self.qa_outputs(sequence_output)
        start_logits, end_logits = logits.split(1, dim=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        if start_positions is not None and end_positions is not None:
            # if we are on mulit-GPU, split add a dimension 
            if len(start_positions.size()) > 1:
                start_positions = start_positions.squeeze(-1)
            if len(end_positions.size()) > 1:
                end_positions = end_positions.squeeze(-1)
            # sometime the stat/ end positions are outsize our model inputs. 
            # we ignore these terms
            ignored_index = start_logits.size(1)
            start_positions.clamp_(0, ignored_index)
            end_positions.clamp_(0, ignored_index)

            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
            start_loss = loss_fct(start_logits, start_positions)
            end_loss = loss_fct(end_logits, end_positions)
            total_loss = (start_loss + end_loss) / 2 
            return total_loss 
        else:
            return start_logits, end_logits 
Example #23
Source File: cross_entropy.py    From MobileNetV3-pytorch with MIT License 5 votes vote down vote up
def __init__(self, weight=None, ignore_index=-100, reduction='mean'):
        super(CrossEntropyLoss, self).__init__(weight=weight, ignore_index=ignore_index, reduction=reduction) 
Example #24
Source File: common_utils.py    From interpret-text with MIT License 5 votes vote down vote up
def create_pytorch_multiclass_classifier(X, y):
    # Get unique number of classes
    numClasses = np.unique(y).shape[0]
    # create simple (dummy) Pytorch DNN model for multiclass classification
    epochs = 12
    torch_X = torch.Tensor(X).float()
    torch_y = torch.Tensor(y).long()
    # Create network structure
    net = _common_pytorch_generator(X.shape[1], numClasses=numClasses)
    # Train the model
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
    return _train_pytorch_model(epochs, criterion, optimizer, net, torch_X, torch_y) 
Example #25
Source File: common_utils.py    From interpret-text with MIT License 5 votes vote down vote up
def create_pytorch_classifier(X, y):
    # create simple (dummy) Pytorch DNN model for binary classification
    epochs = 12
    torch_X = torch.Tensor(X).float()
    torch_y = torch.Tensor(y).long()
    # Create network structure
    net = _common_pytorch_generator(X.shape[1], numClasses=2)
    # Train the model
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
    return _train_pytorch_model(epochs, criterion, optimizer, net, torch_X, torch_y) 
Example #26
Source File: validate.py    From ghostnet with Apache License 2.0 5 votes vote down vote up
def main():
    args = parser.parse_args()

    model = ghostnet(num_classes=args.num_classes, width=args.width, dropout=args.dropout)
    model.load_state_dict(torch.load('./models/state_dict_93.98.pth'))

    if args.num_gpu > 1:
        model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda()
    elif args.num_gpu < 1:
        model = model
    else:
        model = model.cuda()
    print('GhostNet created.')
    
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(valdir, transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    model.eval()
    
    validate_loss_fn = nn.CrossEntropyLoss().cuda()
    eval_metrics = validate(model, loader, validate_loss_fn, args)
    print(eval_metrics) 
Example #27
Source File: trainer.py    From Hash-Embeddings with MIT License 5 votes vote down vote up
def __init__(self,
                 model,
                 criterion=nn.CrossEntropyLoss,
                 optimizer=torch.optim.Adam,
                 verbose=3,
                 seed=123,
                 metric="accuracy",
                 isCuda=torch.cuda.is_available()):
        self.model = model
        np.random.seed(seed)
        torch.random.manual_seed(seed)

        self.isCuda = isCuda
        if self.isCuda:
            assert torch.cuda.is_available()
            print("Using CUDA")
            self.model = self.model.cuda()
            torch.cuda.manual_seed(seed)

        self.criterion = criterion
        self.optimizer = optimizer
        self.verbose = verbose
        self.seed = seed
        self.eval_metric = metric
        if metric == "accuracy":
            self.eval_metric = evaluate_accuracy
        self.criterion = criterion()
        self.optimizer = optimizer(model.parameters()) 
Example #28
Source File: train.py    From MomentumContrast.pytorch with MIT License 5 votes vote down vote up
def train(model_q, model_k, device, train_loader, queue, optimizer, epoch, temp=0.07):
    model_q.train()
    total_loss = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        x_q = data[0]
        x_k = data[1]

        x_q, x_k = x_q.to(device), x_k.to(device)
        q = model_q(x_q)
        k = model_k(x_k)
        k = k.detach()

        N = data[0].shape[0]
        K = queue.shape[0]
        l_pos = torch.bmm(q.view(N,1,-1), k.view(N,-1,1))
        l_neg = torch.mm(q.view(N,-1), queue.T.view(-1,K))

        logits = torch.cat([l_pos.view(N, 1), l_neg], dim=1)

        labels = torch.zeros(N, dtype=torch.long)
        labels = labels.to(device)

        cross_entropy_loss = nn.CrossEntropyLoss()
        loss = cross_entropy_loss(logits/temp, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        momentum_update(model_q, model_k)

        queue = queue_data(queue, k)
        queue = dequeue_data(queue)

    total_loss /= len(train_loader.dataset)

    print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, total_loss)) 
Example #29
Source File: modeling.py    From cmrc2019 with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, next_sentence_label=None):
        sequence_output, pooled_output = self.bert(input_ids, token_type_ids, attention_mask,
                                                   output_all_encoded_layers=False)
        prediction_scores, seq_relationship_score = self.cls(sequence_output, pooled_output)

        if masked_lm_labels is not None and next_sentence_label is not None:
            loss_fct = CrossEntropyLoss(ignore_index=-1)
            masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1))
            next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1))
            total_loss = masked_lm_loss + next_sentence_loss
            return total_loss
        else:
            return prediction_scores, seq_relationship_score 
Example #30
Source File: loss.py    From overhaul-distillation with MIT License 5 votes vote down vote up
def CrossEntropyLoss(self, logit, target):
        n, c, h, w = logit.size()
        criterion = nn.CrossEntropyLoss(weight=self.weight, ignore_index=self.ignore_index,
                                        size_average=self.size_average)
        if self.cuda:
            criterion = criterion.cuda()

        loss = criterion(logit, target.long())

        if self.batch_average:
            loss /= n

        return loss