Python torch.cuda() Examples
The following are 30
code examples of torch.cuda().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch
, or try the search function
.
Example #1
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 6 votes |
def recommend_auto_balance(message: str) -> str: """Expands a message with recommendation to :mod:`torchgpipe.balance`.""" return f'''{message} If your model is still under development, its optimal balance would change frequently. In this case, we highly recommend 'torchgpipe.balance' for naive automatic balancing: from torchgpipe import GPipe from torchgpipe.balance import balance_by_time partitions = torch.cuda.device_count() sample = torch.empty(...) balance = balance_by_time(partitions, model, sample) model = GPipe(model, balance, ...) '''
Example #2
Source File: model_classes.py From e2e-model-learning with Apache License 2.0 | 6 votes |
def forward(self, log_prices): prices = torch.exp(log_prices) nBatch = prices.size(0) T = self.T Q = self.Q.unsqueeze(0).expand(nBatch, self.Q.size(0), self.Q.size(1)) c = torch.cat( [prices, -prices, -Variable(self.lam * self.B * torch.ones(T)).unsqueeze(0).expand(nBatch,T).cuda()], 1) A = self.A.unsqueeze(0).expand(nBatch, self.A.size(0), self.A.size(1)) b = self.b.unsqueeze(0).expand(nBatch, self.b.size(0)) Ae = self.Ae.unsqueeze(0).expand(nBatch, self.Ae.size(0), self.Ae.size(1)) be = self.be.unsqueeze(0).expand(nBatch, self.be.size(0)) out = QPFunction(verbose=True)\ (Q.double(), c.double(), A.double(), b.double(), Ae.double(), be.double()) return out
Example #3
Source File: model_classes.py From e2e-model-learning with Apache License 2.0 | 6 votes |
def __init__(self, X, Y, hidden_layer_sizes): super(Net, self).__init__() # Initialize linear layer with least squares solution X_ = np.hstack([X, np.ones((X.shape[0],1))]) Theta = np.linalg.solve(X_.T.dot(X_), X_.T.dot(Y)) self.lin = nn.Linear(X.shape[1], Y.shape[1]) W,b = self.lin.parameters() W.data = torch.Tensor(Theta[:-1,:].T) b.data = torch.Tensor(Theta[-1,:]) # Set up non-linear network of # Linear -> BatchNorm -> ReLU -> Dropout layers layer_sizes = [X.shape[1]] + hidden_layer_sizes layers = reduce(operator.add, [[nn.Linear(a,b), nn.BatchNorm1d(b), nn.ReLU(), nn.Dropout(p=0.2)] for a,b in zip(layer_sizes[0:-1], layer_sizes[1:])]) layers += [nn.Linear(layer_sizes[-1], Y.shape[1])] self.net = nn.Sequential(*layers) self.sig = Parameter(torch.ones(1, Y.shape[1]).cuda())
Example #4
Source File: nets.py From e2e-model-learning with Apache License 2.0 | 6 votes |
def run_weighted_rmse_net_helper(X_train, Y_train, X_test, Y_test, params, weights, i): X_train_ = Variable(torch.Tensor(X_train[:,:-1])).cuda() Y_train_ = Variable(torch.Tensor(Y_train)).cuda() X_test_ = Variable(torch.Tensor(X_test[:,:-1])).cuda() Y_test_ = Variable(torch.Tensor(Y_test)).cuda() model = model_classes.Net(X_train[:,:-1], Y_train, [200, 200]) model.cuda() opt = optim.Adam(model.parameters(), lr=1e-3) solver = model_classes.SolveScheduling(params) for j in range(100): model.train() batch_train_weightrmse(100, i*100 + j, X_train_.data, Y_train_.data, model, opt, weights.data) # Rebalance weights model.eval() mu_pred_train, sig_pred_train = model(X_train_) Y_sched_train = solver(mu_pred_train.double(), sig_pred_train.double()) weights2 = task_loss_no_mean( Y_sched_train.float(), Y_train_, params).cuda() model.set_sig(X_train_, Y_train_) return model, weights2
Example #5
Source File: task_net.py From e2e-model-learning with Apache License 2.0 | 6 votes |
def __init__(self, params, eps=1e-2): super(SolveNewsvendor, self).__init__() k = len(params['d']) self.Q = Variable(torch.diag(torch.Tensor( [params['c_quad']] + [params['b_quad']]*k + [params['h_quad']]*k)) \ .cuda()) self.p = Variable(torch.Tensor( [params['c_lin']] + [params['b_lin']]*k + [params['h_lin']]*k) \ .cuda()) self.G = Variable(torch.cat([ torch.cat([-torch.ones(k,1), -torch.eye(k), torch.zeros(k,k)], 1), torch.cat([torch.ones(k,1), torch.zeros(k,k), -torch.eye(k)], 1), -torch.eye(1 + 2*k)], 0).cuda()) self.h = Variable(torch.Tensor( np.concatenate([-params['d'], params['d'], np.zeros(1+ 2*k)])).cuda()) self.one = Variable(torch.Tensor([1])).cuda() self.eps_eye = eps * Variable(torch.eye(1 + 2*k).cuda()).unsqueeze(0)
Example #6
Source File: mle_net.py From e2e-model-learning with Apache License 2.0 | 6 votes |
def __init__(self, params, eps=1e-2): super(SolveNewsvendor, self).__init__() k = len(params['d']) self.Q = Variable(torch.diag(torch.Tensor( [params['c_quad']] + [params['b_quad']]*k + [params['h_quad']]*k)) \ .cuda()) self.p = Variable(torch.Tensor( [params['c_lin']] + [params['b_lin']]*k + [params['h_lin']]*k) \ .cuda()) self.G = Variable(torch.cat([ torch.cat([-torch.ones(k,1), -torch.eye(k), torch.zeros(k,k)], 1), torch.cat([torch.ones(k,1), torch.zeros(k,k), -torch.eye(k)], 1), -torch.eye(1 + 2*k)], 0).cuda()) self.h = Variable(torch.Tensor( np.concatenate([-params['d'], params['d'], np.zeros(1+ 2*k)])).cuda()) self.one = Variable(torch.Tensor([1])).cuda() self.eps_eye = eps * Variable(torch.eye(1 + 2*k).cuda()).unsqueeze(0)
Example #7
Source File: mle_net.py From e2e-model-learning with Apache License 2.0 | 6 votes |
def forward(self, y): nBatch, k = y.size() eps2 = 1e-8 Q_scale = torch.cat([torch.diag(torch.cat( [self.one, y[i]+eps2, y[i]+eps2])).unsqueeze(0) for i in range(nBatch)], 0) Q = self.Q.unsqueeze(0).expand_as(Q_scale).mul(Q_scale) p_scale = torch.cat([Variable(torch.ones(nBatch,1).cuda()), y, y], 1) p = self.p.unsqueeze(0).expand_as(p_scale).mul(p_scale) G = self.G.unsqueeze(0).expand(nBatch, self.G.size(0), self.G.size(1)) h = self.h.unsqueeze(0).expand(nBatch, self.h.size(0)) e = Variable(torch.Tensor().cuda()).double() out = QPFunction(verbose=False)\ (Q.double(), p.double(), G.double(), h.double(), e, e).float() return out[:,:1]
Example #8
Source File: structured_attention.py From ITDD with MIT License | 6 votes |
def forward(self, input): laplacian = input.exp() + self.eps output = input.clone() for b in range(input.size(0)): lap = laplacian[b].masked_fill( torch.eye(input.size(1)).cuda().ne(0), 0) lap = -lap + torch.diag(lap.sum(0)) # store roots on diagonal lap[0] = input[b].diag().exp() inv_laplacian = lap.inverse() factor = inv_laplacian.diag().unsqueeze(1)\ .expand_as(input[b]).transpose(0, 1) term1 = input[b].exp().mul(factor).clone() term2 = input[b].exp().mul(inv_laplacian.transpose(0, 1)).clone() term1[:, 0] = 0 term2[0] = 0 output[b] = term1 - term2 roots_output = input[b].diag().exp().mul( inv_laplacian.transpose(0, 1)[0]) output[b] = output[b] + torch.diag(roots_output) return output
Example #9
Source File: perf_stats.py From ClassyVision with MIT License | 6 votes |
def _process_cuda_events(self): """ Service pending timers. Dequeue timers and aggregate Cuda time intervals, until the first "pending" timer (i.e. dependent on a not-yet-ready cuda event). """ while len(self._cuda_pending_timers) > 0: timer = self._cuda_pending_timers[0] elapsed_cuda = 0.0 for ev_start, ev_end in timer._cuda_event_intervals: if not ev_start.query() or not ev_end.query(): # Cuda events associated with this timer aren't ready yet, # stop servicing the queue. return # Use seconds (instead of ms) for consistency with "host" timers elapsed_cuda += ev_start.elapsed_time(ev_end) / 1000.0 # All time intervals for this timer are now accounted for. # Aggregate stats and pop from pending queue. self._cuda_stats[timer.name].update(elapsed_cuda) self._cuda_pending_timers.popleft()
Example #10
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 6 votes |
def recommend_auto_balance(message: str) -> str: """Expands a message with recommendation to :mod:`torchgpipe.balance`.""" return f'''{message} If your model is still under development, its optimal balance would change frequently. In this case, we highly recommend 'torchgpipe.balance' for naive automatic balancing: from torchgpipe import GPipe from torchgpipe.balance import balance_by_time partitions = torch.cuda.device_count() sample = torch.empty(...) balance = balance_by_time(partitions, model, sample) model = GPipe(model, balance, ...) '''
Example #11
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 6 votes |
def recommend_auto_balance(message: str) -> str: """Expands a message with recommendation to :mod:`torchgpipe.balance`.""" return f'''{message} If your model is still under development, its optimal balance would change frequently. In this case, we highly recommend 'torchgpipe.balance' for naive automatic balancing: from torchgpipe import GPipe from torchgpipe.balance import balance_by_time partitions = torch.cuda.device_count() sample = torch.empty(...) balance = balance_by_time(partitions, model, sample) model = GPipe(model, balance, ...) '''
Example #12
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 6 votes |
def recommend_auto_balance(message: str) -> str: """Expands a message with recommendation to :mod:`torchgpipe.balance`.""" return f'''{message} If your model is still under development, its optimal balance would change frequently. In this case, we highly recommend 'torchgpipe.balance' for naive automatic balancing: from torchgpipe import GPipe from torchgpipe.balance import balance_by_time partitions = torch.cuda.device_count() sample = torch.empty(...) balance = balance_by_time(partitions, model, sample) model = GPipe(model, balance, ...) '''
Example #13
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 6 votes |
def recommend_auto_balance(message: str) -> str: """Expands a message with recommendation to :mod:`torchgpipe.balance`.""" return f'''{message} If your model is still under development, its optimal balance would change frequently. In this case, we highly recommend 'torchgpipe.balance' for naive automatic balancing: from torchgpipe import GPipe from torchgpipe.balance import balance_by_time partitions = torch.cuda.device_count() sample = torch.empty(...) balance = balance_by_time(partitions, model, sample) model = GPipe(model, balance, ...) '''
Example #14
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 6 votes |
def recommend_auto_balance(message: str) -> str: """Expands a message with recommendation to :mod:`torchgpipe.balance`.""" return f'''{message} If your model is still under development, its optimal balance would change frequently. In this case, we highly recommend 'torchgpipe.balance' for naive automatic balancing: from torchgpipe import GPipe from torchgpipe.balance import balance_by_time partitions = torch.cuda.device_count() sample = torch.empty(...) balance = balance_by_time(partitions, model, sample) model = GPipe(model, balance, ...) '''
Example #15
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 6 votes |
def recommend_auto_balance(message: str) -> str: """Expands a message with recommendation to :mod:`torchgpipe.balance`.""" return f'''{message} If your model is still under development, its optimal balance would change frequently. In this case, we highly recommend 'torchgpipe.balance' for naive automatic balancing: from torchgpipe import GPipe from torchgpipe.balance import balance_by_time partitions = torch.cuda.device_count() sample = torch.empty(...) balance = balance_by_time(partitions, model, sample) model = GPipe(model, balance, ...) '''
Example #16
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 6 votes |
def recommend_auto_balance(message: str) -> str: """Expands a message with recommendation to :mod:`torchgpipe.balance`.""" return f'''{message} If your model is still under development, its optimal balance would change frequently. In this case, we highly recommend 'torchgpipe.balance' for naive automatic balancing: from torchgpipe import GPipe from torchgpipe.balance import balance_by_time partitions = torch.cuda.device_count() sample = torch.empty(...) balance = balance_by_time(partitions, model, sample) model = GPipe(model, balance, ...) '''
Example #17
Source File: Beam.py From SEASS with MIT License | 6 votes |
def __init__(self, size, cuda=False): self.size = size self.done = False self.tt = torch.cuda if cuda else torch # The score for each translation on the beam. self.scores = self.tt.FloatTensor(size).zero_() self.all_scores = [] self.all_length = [] # The backpointers at each time-step. self.prevKs = [] # The outputs at each time-step. self.nextYs = [self.tt.LongTensor(size).fill_(s2s.Constants.PAD)] self.nextYs[0][0] = s2s.Constants.BOS # The attentions (matrix) for each time. self.attn = [] # Get the outputs for the current timestep.
Example #18
Source File: StructuredAttention.py From video-caption-openNMT.pytorch with MIT License | 6 votes |
def forward(self, input): laplacian = input.exp() + self.eps output = input.clone() for b in range(input.size(0)): lap = laplacian[b].masked_fill( Variable(torch.eye(input.size(1)).cuda().ne(0)), 0) lap = -lap + torch.diag(lap.sum(0)) # store roots on diagonal lap[0] = input[b].diag().exp() inv_laplacian = lap.inverse() factor = inv_laplacian.diag().unsqueeze(1)\ .expand_as(input[b]).transpose(0, 1) term1 = input[b].exp().mul(factor).clone() term2 = input[b].exp().mul(inv_laplacian.transpose(0, 1)).clone() term1[:, 0] = 0 term2[0] = 0 output[b] = term1 - term2 roots_output = input[b].diag().exp().mul( inv_laplacian.transpose(0, 1)[0]) output[b] = output[b] + torch.diag(roots_output) return output
Example #19
Source File: Translator.py From SEASS with MIT License | 5 votes |
def buildData(self, srcBatch, goldBatch): srcData = [self.src_dict.convertToIdx(b, s2s.Constants.UNK_WORD) for b in srcBatch] tgtData = None if goldBatch: tgtData = [self.tgt_dict.convertToIdx(b, s2s.Constants.UNK_WORD, s2s.Constants.BOS_WORD, s2s.Constants.EOS_WORD) for b in goldBatch] return s2s.Dataset(srcData, tgtData, self.opt.batch_size, self.opt.cuda, volatile=True)
Example #20
Source File: data_parallel.py From semantic-segmentation-pytorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def async_copy_to(obj, dev, main_stream=None): if torch.is_tensor(obj): v = obj.cuda(dev, non_blocking=True) if main_stream is not None: v.data.record_stream(main_stream) return v elif isinstance(obj, collections.Mapping): return {k: async_copy_to(o, dev, main_stream) for k, o in obj.items()} elif isinstance(obj, collections.Sequence): return [async_copy_to(o, dev, main_stream) for o in obj] else: return obj
Example #21
Source File: train.py From fastNLP with Apache License 2.0 | 5 votes |
def evaluate(args): data = get_data(args) test_data = data['test'] model = load_model_from_path(args) device = 'cuda' if torch.cuda.is_available() else 'cpu' tester = Tester( data=test_data, model=model, batch_size=args.batch_size, num_workers=2, device=device, metrics=SpanFPreRecMetric( tag_vocab=data['tag_vocab'], encoding_type=data['encoding_type'], ignore_labels=data['ignore_labels']), ) print(tester.test())
Example #22
Source File: train.py From fastNLP with Apache License 2.0 | 5 votes |
def train(args): data = get_data(args) train_data = data['train'] dev_data = data['dev'] model = get_model(args) optimizer = get_optim(args) device = 'cuda' if torch.cuda.is_available() else 'cpu' callbacks = [] trainer = Trainer( train_data=train_data, model=model, optimizer=optimizer, loss=None, batch_size=args.batch_size, n_epochs=args.epochs, num_workers=4, metrics=SpanFPreRecMetric( tag_vocab=data['tag_vocab'], encoding_type=data['encoding_type'], ignore_labels=data['ignore_labels']), metric_key='f1', dev_data=dev_data, save_path=args.save_path, device=device, callbacks=callbacks, check_code_level=-1, ) print(trainer.train())
Example #23
Source File: data_parallel.py From semantic-segmentation-pytorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _async_copy(inputs, device_ids): nr_devs = len(device_ids) assert type(inputs) in (tuple, list) assert len(inputs) == nr_devs outputs = [] for i, dev in zip(inputs, device_ids): with cuda.device(dev): outputs.append(async_copy_to(i, dev)) return tuple(outputs)
Example #24
Source File: trainer.py From fairseq with MIT License | 5 votes |
def _build_optimizer(self): params = list( filter( lambda p: p.requires_grad, chain(self.model.parameters(), self.criterion.parameters()), ) ) if self.args.fp16 or self.args.bf16: if self.cuda and torch.cuda.get_device_capability(0)[0] < 7: logger.info( "NOTE: your device does NOT support faster training with --fp16, " "please switch to FP32 which is likely to be faster" ) if self.args.memory_efficient_fp16 or self.args.memory_efficient_bf16: self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer( self.args, params ) else: self._optimizer = optim.FP16Optimizer.build_optimizer(self.args, params) else: if self.cuda and torch.cuda.get_device_capability(0)[0] >= 7: logger.info("NOTE: your device may support faster training with --fp16") self._optimizer = optim.build_optimizer(self.args, params) if self.args.use_bmuf: self._optimizer = optim.FairseqBMUF(self.args, self._optimizer) # We should initialize the learning rate scheduler immediately after # building the optimizer, so that the initial learning rate is set. self._lr_scheduler = lr_scheduler.build_lr_scheduler(self.args, self.optimizer) self._lr_scheduler.step_update(0)
Example #25
Source File: data_parallel.py From EMANet with GNU General Public License v3.0 | 5 votes |
def _get_stream(device): """Gets a background stream for copying between CPU and GPU""" global _streams if device == -1: return None if _streams is None: _streams = [None] * cuda.device_count() if _streams[device] is None: _streams[device] = cuda.Stream(device) return _streams[device]
Example #26
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 5 votes |
def __iter__(self) -> Iterable[nn.Module]: """Iterates over children of the underlying sequential module.""" for partition in self.partitions: yield from partition # GPipe should manage the device of each partition. # Deny cuda(), cpu(), and to() with device, by TypeError.
Example #27
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 5 votes |
def cuda(self, device: Optional[Device] = None) -> 'GPipe': raise MOVING_DENIED
Example #28
Source File: data_parallel.py From EMANet with GNU General Public License v3.0 | 5 votes |
def _async_copy_stream(inputs, device_ids): nr_devs = len(device_ids) assert type(inputs) in (tuple, list) assert len(inputs) == nr_devs outputs = [] streams = [_get_stream(d) for d in device_ids] for i, dev, stream in zip(inputs, device_ids, streams): with cuda.device(dev): main_stream = cuda.current_stream() with cuda.stream(stream): outputs.append(async_copy_to(i, dev, main_stream=main_stream)) main_stream.wait_stream(stream) return outputs
Example #29
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 5 votes |
def cuda(self, device: Optional[Device] = None) -> 'GPipe': raise MOVING_DENIED
Example #30
Source File: gpipe.py From torchgpipe with Apache License 2.0 | 5 votes |
def __iter__(self) -> Iterable[nn.Module]: """Iterates over children of the underlying sequential module.""" for partition in self.partitions: yield from partition # GPipe should manage the device of each partition. # Deny cuda(), cpu(), and to() with device, by TypeError.