Python warpctc_pytorch.CTCLoss() Examples

The following are 8 code examples of warpctc_pytorch.CTCLoss(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module warpctc_pytorch , or try the search function .
Example #1
Source File: ctc.py    From adviser with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, odim, eprojs, dropout_rate, ctc_type='warpctc', reduce=True):
        super().__init__()
        self.dropout_rate = dropout_rate
        self.loss = None
        self.ctc_lo = torch.nn.Linear(eprojs, odim)
        self.ctc_type = ctc_type

        if self.ctc_type == 'builtin':
            reduction_type = 'sum' if reduce else 'none'
            self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
        elif self.ctc_type == 'warpctc':
            import warpctc_pytorch as warp_ctc
            self.ctc_loss = warp_ctc.CTCLoss(size_average=True, reduce=reduce)
        else:
            raise ValueError('ctc_type must be "builtin" or "warpctc": {}'
                             .format(self.ctc_type))

        self.ignore_id = -1
        self.reduce = reduce 
Example #2
Source File: ctc.py    From espnet with Apache License 2.0 5 votes vote down vote up
def __init__(self, odim, eprojs, dropout_rate, ctc_type="warpctc", reduce=True):
        super().__init__()
        self.dropout_rate = dropout_rate
        self.loss = None
        self.ctc_lo = torch.nn.Linear(eprojs, odim)

        # In case of Pytorch >= 1.2.0, CTC will be always builtin
        self.ctc_type = (
            ctc_type
            if LooseVersion(torch.__version__) < LooseVersion("1.2.0")
            else "builtin"
        )
        if ctc_type != self.ctc_type:
            logging.warning(f"CTC was set to {self.ctc_type} due to PyTorch version.")
        if self.ctc_type == "builtin":
            reduction_type = "sum" if reduce else "none"
            self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
        elif self.ctc_type == "warpctc":
            import warpctc_pytorch as warp_ctc

            self.ctc_loss = warp_ctc.CTCLoss(size_average=True, reduce=reduce)
        else:
            raise ValueError(
                'ctc_type must be "builtin" or "warpctc": {}'.format(self.ctc_type)
            )

        self.ignore_id = -1
        self.reduce = reduce 
Example #3
Source File: ctc.py    From espnet with Apache License 2.0 5 votes vote down vote up
def __init__(
        self,
        odim: int,
        encoder_output_sizse: int,
        dropout_rate: float = 0.0,
        ctc_type: str = "builtin",
        reduce: bool = True,
    ):
        assert check_argument_types()
        super().__init__()
        eprojs = encoder_output_sizse
        self.dropout_rate = dropout_rate
        self.ctc_lo = torch.nn.Linear(eprojs, odim)
        self.ctc_type = ctc_type

        if self.ctc_type == "builtin":
            reduction_type = "sum" if reduce else "none"
            self.ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)
        elif self.ctc_type == "warpctc":
            import warpctc_pytorch as warp_ctc

            self.ctc_loss = warp_ctc.CTCLoss(size_average=True, reduce=reduce)
        else:
            raise ValueError(
                f'ctc_type must be "builtin" or "warpctc": {self.ctc_type}'
            )

        self.reduce = reduce 
Example #4
Source File: crnn.py    From Text-Recognition with GNU Lesser General Public License v2.1 4 votes vote down vote up
def __init__(self, abc, config, profiler, imgH, nc, rnn_hidden_size=256, rnn_num_layers=2, leakyRelu=False):
		
		super(CRNN_resnet, self).__init__()
		assert imgH % 16 == 0, 'imgH has to be a multiple of 16'

		self.abc = abc
		self.num_classes = len(abc)+1
		self.hidden_size = rnn_hidden_size

		self.use_pretrained_rnn = False

		self.config = config
		self.seed()
		self.profiler = profiler

		self.loss_name = self.config['lossf']
		
		self.prev_lr = config['lr'][1]
		self.is_gray = (nc == 1)

		resnet = torchvision.models.resnet.resnet18(pretrained=True)

		for param in resnet.parameters():
		    param.requires_grad = False

		features = list(resnet.children())[:-2]
		self.cnn = nn.Sequential(
			*features,
			)#nn.Conv2d(1024, 512, 2)

		self.rnn = nn.Sequential(
			BidirectionalLSTM(512, self.hidden_size, self.hidden_size),
			BidirectionalLSTM(self.hidden_size, self.hidden_size, self.num_classes))

		if config['lossf'] == 'CTC':
			log.info('Using CTC')
			self.lossf = CTCLoss()

		if self.config['optimizer'] == 'Adam':
			log.info('Using Adam optimizer')
			self.opt = optim.Adam(self.parameters(), lr=config['lr'][1], weight_decay=config['weight_decay'])
		
		elif self.config['optimizer'] == 'SGD':
			log.info('Using SGD optimizer')
			self.opt = optim.SGD(self.parameters(), lr=config['lr'][1], momentum=config['momentum'], weight_decay=config['weight_decay'])

		if self.config['PreTrained_net'] and self.config['PreTrained_model']['check'] == False and self.use_pretrained_rnn:
			self.custom_pick()

		if config['varying_width']:
			self.list_or_tensor = 'list'
		else:
			self.list_or_tensor = 'tensor' 
Example #5
Source File: ctc.py    From neural_sp with Apache License 2.0 4 votes vote down vote up
def __init__(self,
                 eos,
                 blank,
                 enc_n_units,
                 vocab,
                 dropout=0.,
                 lsm_prob=0.,
                 fc_list=None,
                 param_init=0.1,
                 backward=False):

        super(CTC, self).__init__()

        self.eos = eos
        self.blank = blank
        self.vocab = vocab
        self.lsm_prob = lsm_prob
        self.bwd = backward

        self.space = -1  # TODO(hirofumi): fix later

        # for posterior plot
        self.prob_dict = {}
        self.data_dict = {}

        # Fully-connected layers before the softmax
        if fc_list is not None and len(fc_list) > 0:
            _fc_list = [int(fc) for fc in fc_list.split('_')]
            fc_layers = OrderedDict()
            for i in range(len(_fc_list)):
                input_dim = enc_n_units if i == 0 else _fc_list[i - 1]
                fc_layers['fc' + str(i)] = nn.Linear(input_dim, _fc_list[i])
                fc_layers['dropout' + str(i)] = nn.Dropout(p=dropout)
            fc_layers['fc' + str(len(_fc_list))] = nn.Linear(_fc_list[-1], vocab)
            self.output = nn.Sequential(fc_layers)
        else:
            self.output = nn.Linear(enc_n_units, vocab)

        import warpctc_pytorch
        self.warpctc_loss = warpctc_pytorch.CTCLoss(size_average=True)

        self.forced_aligner = CTCForcedAligner() 
Example #6
Source File: test_loss.py    From espnet with Apache License 2.0 4 votes vote down vote up
def test_ctc_loss(in_length, out_length, use_warpctc):
    pytest.importorskip("torch")
    if use_warpctc:
        pytest.importorskip("warpctc_pytorch")
        import warpctc_pytorch

        torch_ctcloss = warpctc_pytorch.CTCLoss(size_average=True)
    else:
        if LooseVersion(torch.__version__) < LooseVersion("1.0"):
            pytest.skip("pytorch < 1.0 doesn't support CTCLoss")
        _ctcloss_sum = torch.nn.CTCLoss(reduction="sum")

        def torch_ctcloss(th_pred, th_target, th_ilen, th_olen):
            th_pred = th_pred.log_softmax(2)
            loss = _ctcloss_sum(th_pred, th_target, th_ilen, th_olen)
            # Batch-size average
            loss = loss / th_pred.size(1)
            return loss

    n_out = 7
    input_length = numpy.array(in_length, dtype=numpy.int32)
    label_length = numpy.array(out_length, dtype=numpy.int32)
    np_pred = [
        numpy.random.rand(il, n_out).astype(numpy.float32) for il in input_length
    ]
    np_target = [
        numpy.random.randint(0, n_out, size=ol, dtype=numpy.int32)
        for ol in label_length
    ]

    # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr.py
    ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2)
    ch_target = F.pad_sequence(np_target, padding=-1)
    ch_loss = F.connectionist_temporal_classification(
        ch_pred, ch_target, 0, input_length, label_length
    ).data

    th_pred = pad_list([torch.from_numpy(x) for x in np_pred], 0.0).transpose(0, 1)
    th_target = torch.from_numpy(numpy.concatenate(np_target))
    th_ilen = torch.from_numpy(input_length)
    th_olen = torch.from_numpy(label_length)
    th_loss = torch_ctcloss(th_pred, th_target, th_ilen, th_olen).numpy()
    numpy.testing.assert_allclose(th_loss, ch_loss, 0.05) 
Example #7
Source File: test_loss.py    From espnet with Apache License 2.0 4 votes vote down vote up
def test_attn_loss():
    n_out = 7
    _eos = n_out - 1
    n_batch = 3
    label_length = numpy.array([4, 2, 3], dtype=numpy.int32)
    np_pred = numpy.random.rand(n_batch, max(label_length) + 1, n_out).astype(
        numpy.float32
    )
    # NOTE: 0 is only used for CTC, never appeared in attn target
    np_target = [
        numpy.random.randint(1, n_out - 1, size=ol, dtype=numpy.int32)
        for ol in label_length
    ]

    eos = numpy.array([_eos], "i")
    ys_out = [F.concat([y, eos], axis=0) for y in np_target]

    # padding for ys with -1
    # pys: utt x olen
    # NOTE: -1 is default ignore index for chainer
    pad_ys_out = F.pad_sequence(ys_out, padding=-1)
    y_all = F.reshape(np_pred, (n_batch * (max(label_length) + 1), n_out))
    ch_loss = F.softmax_cross_entropy(y_all, F.concat(pad_ys_out, axis=0))

    # NOTE: this index 0 is only for CTC not attn. so it can be ignored
    # unfortunately, torch cross_entropy does not accept out-of-bound ids
    th_ignore = 0
    th_pred = torch.from_numpy(y_all.data)
    th_target = pad_list([torch.from_numpy(t.data).long() for t in ys_out], th_ignore)
    if LooseVersion(torch.__version__) < LooseVersion("1.0"):
        reduction_str = "elementwise_mean"
    else:
        reduction_str = "mean"
    th_loss = torch.nn.functional.cross_entropy(
        th_pred, th_target.view(-1), ignore_index=th_ignore, reduction=reduction_str
    )
    print(ch_loss)
    print(th_loss)

    # NOTE: warpctc_pytorch.CTCLoss does not normalize itself by batch-size
    # while chainer's default setting does
    loss_data = float(th_loss)
    numpy.testing.assert_allclose(loss_data, ch_loss.data, 0.05) 
Example #8
Source File: train.py    From pytorch-asr with GNU General Public License v3.0 4 votes vote down vote up
def __init__(self, vlog=None, tlog=None, batch_size=8, init_lr=1e-4, max_norm=400,
                 use_cuda=False, log_dir='logs_densenet_ctc', model_prefix='densenet_ctc',
                 checkpoint=False, continue_from=None, opt_type="sgd", *args, **kwargs):
        # training parameters
        self.batch_size = batch_size
        self.init_lr = init_lr
        self.max_norm = max_norm
        self.use_cuda = use_cuda
        self.log_dir = log_dir
        self.model_prefix = model_prefix
        self.checkpoint = checkpoint
        self.epoch = 0

        # visual logging
        self.vlog = vlog
        if self.vlog is not None:
            self.vlog.add_plot(title='loss', xlabel='epoch')
        self.tlog = tlog

        # setup model
        self.model = densenet_custom(num_classes=p.NUM_CTC_LABELS)
        if self.use_cuda:
            self.model.cuda()

        # setup loss
        self.loss = CTCLoss(blank=0, size_average=True)

        # setup optimizer
        assert opt_type in OPTIMIZER_TYPES
        parameters = self.model.parameters()
        if opt_type == "sgd":
            logger.info("using SGD")
            self.optimizer = torch.optim.SGD(parameters, lr=self.init_lr, momentum=0.9)
            self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=5)
        elif opt_type == "sgdr":
            logger.info("using SGDR")
            self.optimizer = torch.optim.SGD(parameters, lr=self.init_lr, momentum=0.9)
            #self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=1, gamma=0.5)
            self.lr_scheduler = CosineAnnealingWithRestartsLR(self.optimizer, T_max=5, T_mult=2)
        elif opt_type == "adam":
            logger.info("using AdamW")
            self.optimizer = torch.optim.Adam(parameters, lr=self.init_lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=0.0005, l2_reg=False)
            self.lr_scheduler = None

        # setup decoder for test
        self.decoder = LatGenCTCDecoder()

        if continue_from is not None:
            self.load(continue_from)