Python torch.cuda() Examples

The following are 30 code examples of torch.cuda(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch , or try the search function

Example #1

Source File: gpipe.py From torchgpipe with Apache License 2.0

6 votes

def recommend_auto_balance(message: str) -> str:
    """Expands a message with recommendation to :mod:`torchgpipe.balance`."""
    return f'''{message}

If your model is still under development, its optimal balance would change
frequently. In this case, we highly recommend 'torchgpipe.balance' for naive
automatic balancing:

  from torchgpipe import GPipe
  from torchgpipe.balance import balance_by_time

  partitions = torch.cuda.device_count()
  sample = torch.empty(...)
  balance = balance_by_time(partitions, model, sample)

  model = GPipe(model, balance, ...)
'''

Example #2

Source File: model_classes.py From e2e-model-learning with Apache License 2.0

6 votes

def forward(self, log_prices):
        prices = torch.exp(log_prices)
        
        nBatch = prices.size(0)
        T = self.T

        Q = self.Q.unsqueeze(0).expand(nBatch, self.Q.size(0), self.Q.size(1))
        c = torch.cat(
            [prices, -prices, 
            -Variable(self.lam * self.B * torch.ones(T)).unsqueeze(0).expand(nBatch,T).cuda()], 
            1)
        A = self.A.unsqueeze(0).expand(nBatch, self.A.size(0), self.A.size(1))
        b = self.b.unsqueeze(0).expand(nBatch, self.b.size(0))
        Ae = self.Ae.unsqueeze(0).expand(nBatch, self.Ae.size(0), self.Ae.size(1))
        be = self.be.unsqueeze(0).expand(nBatch, self.be.size(0))
                
        out = QPFunction(verbose=True)\
            (Q.double(), c.double(), A.double(), b.double(), Ae.double(), be.double())
        
        return out

Example #3

Source File: model_classes.py From e2e-model-learning with Apache License 2.0

6 votes

def __init__(self, X, Y, hidden_layer_sizes):
        super(Net, self).__init__()

        # Initialize linear layer with least squares solution
        X_ = np.hstack([X, np.ones((X.shape[0],1))])
        Theta = np.linalg.solve(X_.T.dot(X_), X_.T.dot(Y))
        
        self.lin = nn.Linear(X.shape[1], Y.shape[1])
        W,b = self.lin.parameters()
        W.data = torch.Tensor(Theta[:-1,:].T)
        b.data = torch.Tensor(Theta[-1,:])
        
        # Set up non-linear network of 
        # Linear -> BatchNorm -> ReLU -> Dropout layers
        layer_sizes = [X.shape[1]] + hidden_layer_sizes
        layers = reduce(operator.add, 
            [[nn.Linear(a,b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.2)] 
                for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])])
        layers += [nn.Linear(layer_sizes[-1], Y.shape[1])]
        self.net = nn.Sequential(*layers)
        self.sig = Parameter(torch.ones(1, Y.shape[1]).cuda())

Example #4

Source File: nets.py From e2e-model-learning with Apache License 2.0

6 votes

def run_weighted_rmse_net_helper(X_train, Y_train, X_test, Y_test, params, weights, i):
    X_train_ = Variable(torch.Tensor(X_train[:,:-1])).cuda()
    Y_train_ = Variable(torch.Tensor(Y_train)).cuda()
    X_test_ = Variable(torch.Tensor(X_test[:,:-1])).cuda()
    Y_test_ = Variable(torch.Tensor(Y_test)).cuda()

    model = model_classes.Net(X_train[:,:-1], Y_train, [200, 200])
    model.cuda()
    opt = optim.Adam(model.parameters(), lr=1e-3)
    solver = model_classes.SolveScheduling(params)
    for j in range(100):

        model.train()
        batch_train_weightrmse(100, i*100 + j, X_train_.data, Y_train_.data, model, opt, weights.data)

    # Rebalance weights
    model.eval()
    mu_pred_train, sig_pred_train = model(X_train_)
    Y_sched_train = solver(mu_pred_train.double(), sig_pred_train.double())
    weights2 = task_loss_no_mean(
        Y_sched_train.float(), Y_train_, params).cuda()
    model.set_sig(X_train_, Y_train_)

    return model, weights2

Example #5

Source File: task_net.py From e2e-model-learning with Apache License 2.0

6 votes

def __init__(self, params, eps=1e-2):
        super(SolveNewsvendor, self).__init__()
        k = len(params['d'])
        self.Q = Variable(torch.diag(torch.Tensor(
            [params['c_quad']] + [params['b_quad']]*k + [params['h_quad']]*k)) \
                .cuda())
        self.p = Variable(torch.Tensor(
            [params['c_lin']] + [params['b_lin']]*k + [params['h_lin']]*k) \
                .cuda())
        self.G = Variable(torch.cat([
            torch.cat([-torch.ones(k,1), -torch.eye(k), torch.zeros(k,k)], 1),
            torch.cat([torch.ones(k,1), torch.zeros(k,k), -torch.eye(k)], 1),
            -torch.eye(1 + 2*k)], 0).cuda())
        self.h = Variable(torch.Tensor(
            np.concatenate([-params['d'], params['d'], np.zeros(1+ 2*k)])).cuda())
        self.one = Variable(torch.Tensor([1])).cuda()
        self.eps_eye = eps * Variable(torch.eye(1 + 2*k).cuda()).unsqueeze(0)

Example #6

Source File: mle_net.py From e2e-model-learning with Apache License 2.0

6 votes

def __init__(self, params, eps=1e-2):
        super(SolveNewsvendor, self).__init__()
        k = len(params['d'])
        self.Q = Variable(torch.diag(torch.Tensor(
            [params['c_quad']] + [params['b_quad']]*k + [params['h_quad']]*k)) \
                .cuda())
        self.p = Variable(torch.Tensor(
            [params['c_lin']] + [params['b_lin']]*k + [params['h_lin']]*k) \
                .cuda())
        self.G = Variable(torch.cat([
            torch.cat([-torch.ones(k,1), -torch.eye(k), torch.zeros(k,k)], 1),
            torch.cat([torch.ones(k,1), torch.zeros(k,k), -torch.eye(k)], 1),
            -torch.eye(1 + 2*k)], 0).cuda())
        self.h = Variable(torch.Tensor(
            np.concatenate([-params['d'], params['d'], np.zeros(1+ 2*k)])).cuda())
        self.one = Variable(torch.Tensor([1])).cuda()
        self.eps_eye = eps * Variable(torch.eye(1 + 2*k).cuda()).unsqueeze(0)

Example #7

Source File: mle_net.py From e2e-model-learning with Apache License 2.0

6 votes

def forward(self, y):
        nBatch, k = y.size()

        eps2 = 1e-8
        Q_scale = torch.cat([torch.diag(torch.cat(
            [self.one, y[i]+eps2, y[i]+eps2])).unsqueeze(0) for i in range(nBatch)], 0)
        Q = self.Q.unsqueeze(0).expand_as(Q_scale).mul(Q_scale)
        p_scale = torch.cat([Variable(torch.ones(nBatch,1).cuda()), y, y], 1)
        p = self.p.unsqueeze(0).expand_as(p_scale).mul(p_scale)
        G = self.G.unsqueeze(0).expand(nBatch, self.G.size(0), self.G.size(1))
        h = self.h.unsqueeze(0).expand(nBatch, self.h.size(0))
        e = Variable(torch.Tensor().cuda()).double()

        out = QPFunction(verbose=False)\
            (Q.double(), p.double(), G.double(), h.double(), e, e).float()

        return out[:,:1]

Example #8

Source File: structured_attention.py From ITDD with MIT License

6 votes

def forward(self, input):
        laplacian = input.exp() + self.eps
        output = input.clone()
        for b in range(input.size(0)):
            lap = laplacian[b].masked_fill(
                torch.eye(input.size(1)).cuda().ne(0), 0)
            lap = -lap + torch.diag(lap.sum(0))
            # store roots on diagonal
            lap[0] = input[b].diag().exp()
            inv_laplacian = lap.inverse()

            factor = inv_laplacian.diag().unsqueeze(1)\
                                         .expand_as(input[b]).transpose(0, 1)
            term1 = input[b].exp().mul(factor).clone()
            term2 = input[b].exp().mul(inv_laplacian.transpose(0, 1)).clone()
            term1[:, 0] = 0
            term2[0] = 0
            output[b] = term1 - term2
            roots_output = input[b].diag().exp().mul(
                inv_laplacian.transpose(0, 1)[0])
            output[b] = output[b] + torch.diag(roots_output)
        return output

Example #9

Source File: perf_stats.py From ClassyVision with MIT License

6 votes

def _process_cuda_events(self):
        """
        Service pending timers. Dequeue timers and aggregate Cuda time intervals,
        until the first "pending" timer (i.e. dependent on a not-yet-ready cuda event).
        """
        while len(self._cuda_pending_timers) > 0:
            timer = self._cuda_pending_timers[0]
            elapsed_cuda = 0.0

            for ev_start, ev_end in timer._cuda_event_intervals:
                if not ev_start.query() or not ev_end.query():
                    # Cuda events associated with this timer aren't ready yet,
                    # stop servicing the queue.
                    return
                # Use seconds (instead of ms) for consistency with "host" timers
                elapsed_cuda += ev_start.elapsed_time(ev_end) / 1000.0

            # All time intervals for this timer are now accounted for.
            # Aggregate stats and pop from pending queue.
            self._cuda_stats[timer.name].update(elapsed_cuda)
            self._cuda_pending_timers.popleft()

Example #10

Source File: gpipe.py From torchgpipe with Apache License 2.0

6 votes

def recommend_auto_balance(message: str) -> str:
    """Expands a message with recommendation to :mod:`torchgpipe.balance`."""
    return f'''{message}

If your model is still under development, its optimal balance would change
frequently. In this case, we highly recommend 'torchgpipe.balance' for naive
automatic balancing:

  from torchgpipe import GPipe
  from torchgpipe.balance import balance_by_time

  partitions = torch.cuda.device_count()
  sample = torch.empty(...)
  balance = balance_by_time(partitions, model, sample)

  model = GPipe(model, balance, ...)
'''

Example #11

Source File: gpipe.py From torchgpipe with Apache License 2.0

6 votes

def recommend_auto_balance(message: str) -> str:
    """Expands a message with recommendation to :mod:`torchgpipe.balance`."""
    return f'''{message}

If your model is still under development, its optimal balance would change
frequently. In this case, we highly recommend 'torchgpipe.balance' for naive
automatic balancing:

  from torchgpipe import GPipe
  from torchgpipe.balance import balance_by_time

  partitions = torch.cuda.device_count()
  sample = torch.empty(...)
  balance = balance_by_time(partitions, model, sample)

  model = GPipe(model, balance, ...)
'''

Example #12

Source File: gpipe.py From torchgpipe with Apache License 2.0

6 votes

def recommend_auto_balance(message: str) -> str:
    """Expands a message with recommendation to :mod:`torchgpipe.balance`."""
    return f'''{message}

If your model is still under development, its optimal balance would change
frequently. In this case, we highly recommend 'torchgpipe.balance' for naive
automatic balancing:

  from torchgpipe import GPipe
  from torchgpipe.balance import balance_by_time

  partitions = torch.cuda.device_count()
  sample = torch.empty(...)
  balance = balance_by_time(partitions, model, sample)

  model = GPipe(model, balance, ...)
'''

Example #13

Source File: gpipe.py From torchgpipe with Apache License 2.0

6 votes

def recommend_auto_balance(message: str) -> str:
    """Expands a message with recommendation to :mod:`torchgpipe.balance`."""
    return f'''{message}

If your model is still under development, its optimal balance would change
frequently. In this case, we highly recommend 'torchgpipe.balance' for naive
automatic balancing:

  from torchgpipe import GPipe
  from torchgpipe.balance import balance_by_time

  partitions = torch.cuda.device_count()
  sample = torch.empty(...)
  balance = balance_by_time(partitions, model, sample)

  model = GPipe(model, balance, ...)
'''

Example #14

Source File: gpipe.py From torchgpipe with Apache License 2.0

6 votes

def recommend_auto_balance(message: str) -> str:
    """Expands a message with recommendation to :mod:`torchgpipe.balance`."""
    return f'''{message}

If your model is still under development, its optimal balance would change
frequently. In this case, we highly recommend 'torchgpipe.balance' for naive
automatic balancing:

  from torchgpipe import GPipe
  from torchgpipe.balance import balance_by_time

  partitions = torch.cuda.device_count()
  sample = torch.empty(...)
  balance = balance_by_time(partitions, model, sample)

  model = GPipe(model, balance, ...)
'''

Example #15

Source File: gpipe.py From torchgpipe with Apache License 2.0

6 votes

def recommend_auto_balance(message: str) -> str:
    """Expands a message with recommendation to :mod:`torchgpipe.balance`."""
    return f'''{message}

If your model is still under development, its optimal balance would change
frequently. In this case, we highly recommend 'torchgpipe.balance' for naive
automatic balancing:

  from torchgpipe import GPipe
  from torchgpipe.balance import balance_by_time

  partitions = torch.cuda.device_count()
  sample = torch.empty(...)
  balance = balance_by_time(partitions, model, sample)

  model = GPipe(model, balance, ...)
'''

Example #16

Source File: gpipe.py From torchgpipe with Apache License 2.0

6 votes

def recommend_auto_balance(message: str) -> str:
    """Expands a message with recommendation to :mod:`torchgpipe.balance`."""
    return f'''{message}

If your model is still under development, its optimal balance would change
frequently. In this case, we highly recommend 'torchgpipe.balance' for naive
automatic balancing:

  from torchgpipe import GPipe
  from torchgpipe.balance import balance_by_time

  partitions = torch.cuda.device_count()
  sample = torch.empty(...)
  balance = balance_by_time(partitions, model, sample)

  model = GPipe(model, balance, ...)
'''

Example #17

Source File: Beam.py From SEASS with MIT License

6 votes

def __init__(self, size, cuda=False):

        self.size = size
        self.done = False

        self.tt = torch.cuda if cuda else torch

        # The score for each translation on the beam.
        self.scores = self.tt.FloatTensor(size).zero_()
        self.all_scores = []
        self.all_length = []

        # The backpointers at each time-step.
        self.prevKs = []

        # The outputs at each time-step.
        self.nextYs = [self.tt.LongTensor(size).fill_(s2s.Constants.PAD)]
        self.nextYs[0][0] = s2s.Constants.BOS

        # The attentions (matrix) for each time.
        self.attn = []

    # Get the outputs for the current timestep.

Example #18

Source File: StructuredAttention.py From video-caption-openNMT.pytorch with MIT License

6 votes

def forward(self, input):
        laplacian = input.exp() + self.eps
        output = input.clone()
        for b in range(input.size(0)):
            lap = laplacian[b].masked_fill(
                Variable(torch.eye(input.size(1)).cuda().ne(0)), 0)
            lap = -lap + torch.diag(lap.sum(0))
            # store roots on diagonal
            lap[0] = input[b].diag().exp()
            inv_laplacian = lap.inverse()

            factor = inv_laplacian.diag().unsqueeze(1)\
                                         .expand_as(input[b]).transpose(0, 1)
            term1 = input[b].exp().mul(factor).clone()
            term2 = input[b].exp().mul(inv_laplacian.transpose(0, 1)).clone()
            term1[:, 0] = 0
            term2[0] = 0
            output[b] = term1 - term2
            roots_output = input[b].diag().exp().mul(
                inv_laplacian.transpose(0, 1)[0])
            output[b] = output[b] + torch.diag(roots_output)
        return output

Example #19

Source File: Translator.py From SEASS with MIT License

5 votes

def buildData(self, srcBatch, goldBatch):
        srcData = [self.src_dict.convertToIdx(b,
                                              s2s.Constants.UNK_WORD) for b in srcBatch]
        tgtData = None
        if goldBatch:
            tgtData = [self.tgt_dict.convertToIdx(b,
                                                  s2s.Constants.UNK_WORD,
                                                  s2s.Constants.BOS_WORD,
                                                  s2s.Constants.EOS_WORD) for b in goldBatch]

        return s2s.Dataset(srcData, tgtData, self.opt.batch_size, self.opt.cuda, volatile=True)

Example #20

Source File: data_parallel.py From semantic-segmentation-pytorch with BSD 3-Clause "New" or "Revised" License

5 votes

def async_copy_to(obj, dev, main_stream=None):
    if torch.is_tensor(obj):
        v = obj.cuda(dev, non_blocking=True)
        if main_stream is not None:
            v.data.record_stream(main_stream)
        return v
    elif isinstance(obj, collections.Mapping):
        return {k: async_copy_to(o, dev, main_stream) for k, o in obj.items()}
    elif isinstance(obj, collections.Sequence):
        return [async_copy_to(o, dev, main_stream) for o in obj]
    else:
        return obj

Example #21

Source File: train.py From fastNLP with Apache License 2.0

5 votes

def evaluate(args):
    data = get_data(args)
    test_data = data['test']
    model = load_model_from_path(args)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    tester = Tester(
        data=test_data, model=model, batch_size=args.batch_size,
        num_workers=2, device=device,
        metrics=SpanFPreRecMetric(
            tag_vocab=data['tag_vocab'], encoding_type=data['encoding_type'],
            ignore_labels=data['ignore_labels']),
    )
    print(tester.test())

Example #22

Source File: train.py From fastNLP with Apache License 2.0

5 votes

def train(args):
    data = get_data(args)
    train_data = data['train']
    dev_data = data['dev']
    model = get_model(args)
    optimizer = get_optim(args)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    callbacks = []
    trainer = Trainer(
        train_data=train_data,
        model=model,
        optimizer=optimizer,
        loss=None,
        batch_size=args.batch_size,
        n_epochs=args.epochs,
        num_workers=4,
        metrics=SpanFPreRecMetric(
            tag_vocab=data['tag_vocab'], encoding_type=data['encoding_type'],
            ignore_labels=data['ignore_labels']),
        metric_key='f1',
        dev_data=dev_data,
        save_path=args.save_path,
        device=device,
        callbacks=callbacks,
        check_code_level=-1,
    )

    print(trainer.train())

Example #23

Source File: data_parallel.py From semantic-segmentation-pytorch with BSD 3-Clause "New" or "Revised" License

5 votes

def _async_copy(inputs, device_ids):
    nr_devs = len(device_ids)
    assert type(inputs) in (tuple, list)
    assert len(inputs) == nr_devs

    outputs = []
    for i, dev in zip(inputs, device_ids):
        with cuda.device(dev):
            outputs.append(async_copy_to(i, dev))

    return tuple(outputs)

Example #24

Source File: trainer.py From fairseq with MIT License

5 votes

def _build_optimizer(self):
        params = list(
            filter(
                lambda p: p.requires_grad,
                chain(self.model.parameters(), self.criterion.parameters()),
            )
        )

        if self.args.fp16 or self.args.bf16:
            if self.cuda and torch.cuda.get_device_capability(0)[0] < 7:
                logger.info(
                    "NOTE: your device does NOT support faster training with --fp16, "
                    "please switch to FP32 which is likely to be faster"
                )
            if self.args.memory_efficient_fp16 or self.args.memory_efficient_bf16:
                self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer(
                    self.args, params
                )
            else:
                self._optimizer = optim.FP16Optimizer.build_optimizer(self.args, params)
        else:
            if self.cuda and torch.cuda.get_device_capability(0)[0] >= 7:
                logger.info("NOTE: your device may support faster training with --fp16")
            self._optimizer = optim.build_optimizer(self.args, params)

        if self.args.use_bmuf:
            self._optimizer = optim.FairseqBMUF(self.args, self._optimizer)

        # We should initialize the learning rate scheduler immediately after
        # building the optimizer, so that the initial learning rate is set.
        self._lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self.optimizer)
        self._lr_scheduler.step_update(0)

Example #25

Source File: data_parallel.py From EMANet with GNU General Public License v3.0

5 votes

def _get_stream(device):
    """Gets a background stream for copying between CPU and GPU"""
    global _streams
    if device == -1:
        return None
    if _streams is None:
        _streams = [None] * cuda.device_count()
    if _streams[device] is None: _streams[device] = cuda.Stream(device)
    return _streams[device]

Example #26

Source File: gpipe.py From torchgpipe with Apache License 2.0

5 votes

def __iter__(self) -> Iterable[nn.Module]:
        """Iterates over children of the underlying sequential module."""
        for partition in self.partitions:
            yield from partition

    # GPipe should manage the device of each partition.
    # Deny cuda(), cpu(), and to() with device, by TypeError.

Example #27

Source File: gpipe.py From torchgpipe with Apache License 2.0

5 votes

def cuda(self, device: Optional[Device] = None) -> 'GPipe':
        raise MOVING_DENIED

Example #28

Source File: data_parallel.py From EMANet with GNU General Public License v3.0

5 votes

def _async_copy_stream(inputs, device_ids):
    nr_devs = len(device_ids)
    assert type(inputs) in (tuple, list)
    assert len(inputs) == nr_devs

    outputs = []
    streams = [_get_stream(d) for d in device_ids]
    for i, dev, stream in zip(inputs, device_ids, streams):
        with cuda.device(dev):
            main_stream = cuda.current_stream()
            with cuda.stream(stream):
                outputs.append(async_copy_to(i, dev, main_stream=main_stream))
            main_stream.wait_stream(stream)

    return outputs

Example #29

Source File: gpipe.py From torchgpipe with Apache License 2.0

5 votes

def cuda(self, device: Optional[Device] = None) -> 'GPipe':
        raise MOVING_DENIED

Example #30

Source File: gpipe.py From torchgpipe with Apache License 2.0

5 votes

def __iter__(self) -> Iterable[nn.Module]:
        """Iterates over children of the underlying sequential module."""
        for partition in self.partitions:
            yield from partition

    # GPipe should manage the device of each partition.
    # Deny cuda(), cpu(), and to() with device, by TypeError.