Python theano.tensor.shape_padright() Examples
The following are 17
code examples of theano.tensor.shape_padright().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
theano.tensor
, or try the search function
.
Example #1
Source File: MoGNADE.py From NADE with BSD 3-Clause "New" or "Revised" License | 6 votes |
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha)) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
Example #2
Source File: attention.py From attention-lvcsr with MIT License | 6 votes |
def compute_weighted_averages(self, weights, attended): """Compute weighted averages of the attended sequence vectors. Parameters ---------- weights : :class:`~theano.Variable` The weights. The shape must be equal to the attended shape without the last dimension. attended : :class:`~theano.Variable` The attended. The index in the sequence must be the first dimension. Returns ------- weighted_averages : :class:`~theano.Variable` The weighted averages of the attended elements. The shape is equal to the attended shape with the first dimension dropped. """ return (tensor.shape_padright(weights) * attended).sum(axis=0)
Example #3
Source File: MoLaplaceNADE.py From NADE with BSD 3-Clause "New" or "Revised" License | 6 votes |
def sym_logdensity(self, x): """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """ def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma) return (p, a, x) # First element is different (it is predicted from the bias only) a0 = T.zeros_like(T.dot(x.T, self.W)) # BxH p0 = T.zeros_like(x[0]) x0 = T.ones_like(x[0]) ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x, sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling], outputs_info=[p0, a0, x0]) return (ps[-1], updates)
Example #4
Source File: noise.py From Mozi with MIT License | 5 votes |
def _train_fprop(self, state_below): rd = theano_rand.binomial(size=(state_below.shape[0],), n=1, p=(1-self.ratio), dtype=floatX) return state_below * T.shape_padright(rd)
Example #5
Source File: layers.py From kaggle-heart with MIT License | 5 votes |
def get_output_for(self, inputs, **kwargs): # take the minimal working slice size, and use that one. return inputs[0] * T.shape_padright(inputs[1], n_ones=inputs[0].ndim-inputs[1].ndim)
Example #6
Source File: core.py From starry with MIT License | 5 votes |
def compute_illumination(self, xyz, xs, ys, zs, Rs, sigr, on94_exact): """Compute the illumination profile when rendering maps.""" if self.source_npts == 1: return self.compute_illumination_point_source( xyz, xs, ys, zs, sigr, on94_exact ) else: # The effective size of the star as seen by the planet # is smaller. Only include points # that fall on this smaller disk. rs = tt.sqrt(xs ** 2 + ys ** 2 + zs ** 2) Reff = Rs * tt.sqrt(1 - ((Rs - 1) / rs) ** 2) dx = tt.shape_padright(Reff) * self.source_dx dy = tt.shape_padright(Reff) * self.source_dy # Note that the star is *closer* to the planet, hence the - sign dz = -tt.sqrt(Rs ** 2 - dx ** 2 - dy ** 2) # Compute the illumination for each point on the source disk I = self.compute_illumination_point_source( xyz, tt.reshape(tt.shape_padright(xs) + dx, (-1,)), tt.reshape(tt.shape_padright(ys) + dy, (-1,)), tt.reshape(tt.shape_padright(zs) + dz, (-1,)), sigr, on94_exact, ) I = tt.reshape(I, (-1, tt.shape(xs)[0], self.source_npts)) # Average over each profile return tt.sum(I, axis=2) / self.source_npts
Example #7
Source File: OrderlessMoGNADE.py From NADE with BSD 3-Clause "New" or "Revised" License | 5 votes |
def sym_masked_neg_loglikelihood_gradient(self, x, mask): """ x is a matrix of column datapoints (DxB) D = n_visible, Bfloat = batch size """ logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h = self.sym_mask_logdensity_estimator_intermediate(x, mask) # nnz = output_mask.sum(0) # sparsity_multiplier = T.shape_padright(T.shape_padleft((B+1e-6)/(nnz+1e-6))) # wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0)) #BxDxC # lp_current = log_sum_exp(wPhi, axis = 2) * output_mask #BxD # lp_current_sum = (lp_current.sum(1) * D / (D-d)).sum() #1 loglikelihood = logdensity.mean(dtype=floatX) loss = -loglikelihood dp_dz_alpha = T.grad(loss, z_alpha) # BxDxC gb_alpha = dp_dz_alpha.sum(0) # DxC gV_alpha = T.tensordot(h.T, dp_dz_alpha, [[1], [0]]).dimshuffle((1, 0, 2)) # DxHxC dp_dz_mu = T.grad(loss, z_mu) # BxDxC dp_dz_mu = dp_dz_mu * Sigma # Heuristic gb_mu = dp_dz_mu.sum(0) # DxC gV_mu = T.tensordot(h.T, dp_dz_mu, [[1], [0]]).dimshuffle((1, 0, 2)) # DxHxC dp_dz_sigma = T.grad(loss, z_sigma) # BxDxC gb_sigma = dp_dz_sigma.sum(0) # DxC gV_sigma = T.tensordot(h.T, dp_dz_sigma, [[1], [0]]).dimshuffle((1, 0, 2)) # DxHxC if self.n_layers > 1: gWs, gbs, gW1, gWflags, gb1 = T.grad(loss, [self.Ws, self.bs, self.W1, self.Wflags, self.b1]) gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "Ws":gWs, "bs":gbs, "W1":gW1, "b1":gb1, "Wflags":gWflags} else: gW1, gWflags, gb1 = T.grad(loss, [self.W1, self.Wflags, self.b1]) gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "W1":gW1, "b1":gb1, "Wflags":gWflags} # Gradients return (loss, gradients)
Example #8
Source File: OrderlessMoGNADE.py From NADE with BSD 3-Clause "New" or "Revised" License | 5 votes |
def sym_mask_logdensity_estimator_intermediate(self, x, mask): non_linearity_name = self.parameters["nonlinearity"].get_name() assert(non_linearity_name == "sigmoid" or non_linearity_name == "RLU") x = x.T # BxD mask = mask.T # BxD output_mask = constantX(1) - mask # BxD D = constantX(self.n_visible) d = mask.sum(1) # d is the 1-based index of the dimension whose value to infer (not the size of the context) masked_input = x * mask # BxD h = self.nonlinearity(T.dot(masked_input, self.W1) + T.dot(mask, self.Wflags) + self.b1) # BxH for l in xrange(self.n_layers - 1): h = self.nonlinearity(T.dot(h, self.Ws[l]) + self.bs[l]) # BxH z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(self.b_alpha) z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(self.b_mu) z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(self.b_sigma) temp = T.exp(z_alpha) # + 1e-6 # temp += T.shape_padright(temp.sum(2)/1e-3) Alpha = temp / T.shape_padright(temp.sum(2)) # BxDxC Mu = z_mu # BxDxC Sigma = T.exp(z_sigma) # + 1e-6 #BxDxC # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask) # Mu = Mu * T.shape_padright(output_mask) # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask) # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) # BxDxC logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) * output_mask).sum(1) * D / (D - d) return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h)
Example #9
Source File: theano_helpers.py From NADE with BSD 3-Clause "New" or "Revised" License | 5 votes |
def log_sum_exp(x, axis=1): max_x = T.max(x, axis) return max_x + T.log(T.sum(T.exp(x - T.shape_padright(max_x, 1)), axis))
Example #10
Source File: layers.py From kaggle-galaxies with BSD 3-Clause "New" or "Revised" License | 5 votes |
def error(self, *args, **kwargs): input = self.input_layer.output(*args, **kwargs) # never actually dropout anything on the output layer, just pass it along! if self.error_measure == 'mse': error = T.mean((input - self.target_var) ** 2) elif self.error_measure == 'ce': # cross entropy error = T.mean(T.nnet.binary_crossentropy(input, self.target_var)) elif self.error_measure == 'nca': epsilon = 1e-8 #dist_ij = - T.dot(input, input.T) # dist_ij = input dist_ij = T.sum((input.dimshuffle(0, 'x', 1) - input.dimshuffle('x', 0, 1)) ** 2, axis=2) p_ij_unnormalised = T.exp(-dist_ij) + epsilon p_ij_unnormalised = p_ij_unnormalised * (1 - T.eye(self.mb_size)) # set the diagonal to 0 p_ij = p_ij_unnormalised / T.sum(p_ij_unnormalised, axis=1) return - T.mean(p_ij * self.target_var) # # p_ij = p_ij_unnormalised / T.sum(p_ij_unnormalised, axis=1) # return np.mean(p_ij * self.target_var) elif self.error_measure == 'maha': # e = T.shape_padright(input - self.target_var) # e = (input - self.target_var).dimshuffle((0, 'x', 1)) # error = T.sum(T.sum(self.target_cov_var * e, 2) ** 2) / self.mb_size e = (input - self.target_var) eTe = e.dimshuffle((0, 'x', 1)) * e.dimshuffle((0, 1, 'x')) error = T.sum(self.target_cov_var * eTe) / self.mb_size else: 1 / 0 return error
Example #11
Source File: recurrent.py From CAPTCHA-breaking with MIT License | 5 votes |
def get_padded_shuffled_mask(self, train, X, pad=0): mask = self.get_input_mask(train) if mask is None: mask = T.ones_like(X.sum(axis=-1)) # is there a better way to do this without a sum? # mask is (nb_samples, time) mask = T.shape_padright(mask) # (nb_samples, time, 1) mask = T.addbroadcast(mask, -1) # (time, nb_samples, 1) matrix. mask = mask.dimshuffle(1, 0, 2) # (time, nb_samples, 1) if pad > 0: # left-pad in time with 0 padding = alloc_zeros_matrix(pad, mask.shape[1], 1) mask = T.concatenate([padding, mask], axis=0) return mask.astype('int8')
Example #12
Source File: strength_weighted_gru.py From gated-graph-transformer-network with MIT License | 5 votes |
def step(self, ipt, state, state_strength, dropout_masks=None): """ Perform a single step of the network Params: ipt: The current input. Should be an int tensor of shape (n_batch, self.input_width) state: The previous state. Should be a float tensor of shape (n_batch, self.output_width) state_strength: Strength of the previous state. Should be a float tensor of shape (n_batch) dropout_masks: Masks from get_dropout_masks Returns: The next output state, and the next output strength """ if dropout_masks is not None: ipt_masks, state_masks = dropout_masks ipt = ipt*ipt_masks state = state*state_masks obs_state = state * T.shape_padright(state_strength) cat_ipt_state = T.concatenate([ipt, obs_state], 1) reset = do_layer( T.nnet.sigmoid, cat_ipt_state, self._reset_W, self._reset_b ) update = do_layer( T.nnet.sigmoid, cat_ipt_state, self._update_W, self._update_b ) update_state = update[:,:-1] update_strength = update[:,-1] cat_reset_ipt_state = T.concatenate([ipt, (reset * obs_state)], 1) candidate_act = do_layer( T.tanh, cat_reset_ipt_state, self._activation_W, self._activation_b ) candidate_strength = do_layer( T.nnet.sigmoid, cat_reset_ipt_state, self._strength_W, self._strength_b ).reshape(state_strength.shape) newstate = update_state * state + (1-update_state) * candidate_act newstrength = update_strength * state_strength + (1-update_strength) * candidate_strength return newstate, newstrength
Example #13
Source File: aggregate_representation.py From gated-graph-transformer-network with MIT License | 5 votes |
def process(self, gstate, dropout_masks=Ellipsis): """ Convert the graph state to a representation vector, using sigmoid attention to scale representations Params: gstate: A GraphState giving the current state Returns: A representation vector of shape (n_batch, representation_width) """ if dropout_masks is Ellipsis: dropout_masks = None append_masks = False else: append_masks = True flat_obs = T.concatenate([ gstate.node_ids.reshape([-1, self._graph_spec.num_node_ids]), gstate.node_states.reshape([-1, self._graph_spec.node_state_size])], 1) flat_activations, dropout_masks = self._representation_stack.process(flat_obs, dropout_masks) activations = flat_activations.reshape([gstate.n_batch, gstate.n_nodes, self._representation_width+1]) activation_strengths = activations[:,:,0] selector = T.shape_padright(T.nnet.sigmoid(activation_strengths) * gstate.node_strengths) representations = T.tanh(activations[:,:,1:]) result = T.tanh(T.sum(selector * representations, 1)) if append_masks: return result, dropout_masks else: return result
Example #14
Source File: aggregate_representation_softmax.py From gated-graph-transformer-network with MIT License | 5 votes |
def process(self, gstate, dropout_masks=Ellipsis): """ Convert the graph state to a representation vector, using softmax attention to scale representations Params: gstate: A GraphState giving the current state Returns: A representation vector of shape (n_batch, representation_width) """ if dropout_masks is Ellipsis: dropout_masks = None append_masks = False else: append_masks = True flat_obs = T.concatenate([ gstate.node_ids.reshape([-1, self._graph_spec.num_node_ids]), gstate.node_states.reshape([-1, self._graph_spec.node_state_size])], 1) flat_activations, dropout_masks = self._representation_stack.process(flat_obs, dropout_masks) activations = flat_activations.reshape([gstate.n_batch, gstate.n_nodes, self._representation_width+1]) activation_strengths = activations[:,:,0] existence_penalty = T.log(gstate.node_strengths + EPSILON) # TODO: consider removing epsilon here selector = T.shape_padright(T.nnet.softmax(activation_strengths + existence_penalty)) representations = T.tanh(activations[:,:,1:]) result = T.sum(selector * representations, 1) if append_masks: return result, dropout_masks else: return result
Example #15
Source File: propagation.py From gated-graph-transformer-network with MIT License | 4 votes |
def process(self, gstate, dropout_masks=Ellipsis): """ Process a graph state. 1. Data is transfered from each node to each other node along both forward and backward edges. This data is processed with a Wx+b style update, and an optional transformation is applied 2. Nodes sum the transfered data, weighted by the existence of the other node and the edge. 3. Nodes perform a GRU update with this input Params: gstate: A GraphState giving the current state """ if dropout_masks is Ellipsis: dropout_masks = None append_masks = False else: append_masks = True node_obs = T.concatenate([gstate.node_ids, gstate.node_states],2) flat_node_obs = node_obs.reshape([-1, self._process_input_size]) transformed, dropout_masks = self._transfer_stack.process(flat_node_obs,dropout_masks) transformed = transformed.reshape([gstate.n_batch, gstate.n_nodes, 2*self._graph_spec.num_edge_types, self._transfer_size]) scaled_transformed = transformed * T.shape_padright(T.shape_padright(gstate.node_strengths)) # scaled_transformed is of shape (n_batch, n_nodes, 2*num_edge_types, transfer_size) # We want to multiply through by edge strengths, which are of shape # (n_batch, n_nodes, n_nodes, num_edge_types), both fwd and backward edge_strength_scale = T.concatenate([gstate.edge_strengths, gstate.edge_strengths.swapaxes(1,2)], 3) # edge_strength_scale is of (n_batch, n_nodes, n_nodes, 2*num_edge_types) intermed = T.shape_padaxis(scaled_transformed, 2) * T.shape_padright(edge_strength_scale) # intermed is of shape (n_batch, n_nodes "source", n_nodes "dest", 2*num_edge_types, transfer_size) # now reduce along the "source" and "edge_types" dimensions to get dest activations # of shape (n_batch, n_nodes, transfer_size) reduced_result = T.sum(T.sum(intermed, 3), 1) # now add information fom current node id full_input = T.concatenate([gstate.node_ids, reduced_result], 2) # we flatten to apply GRU flat_input = full_input.reshape([-1, self._graph_spec.num_node_ids + self._transfer_size]) flat_state = gstate.node_states.reshape([-1, self._graph_spec.node_state_size]) new_flat_state, dropout_masks = self._propagation_gru.step(flat_input, flat_state, dropout_masks) new_node_states = new_flat_state.reshape(gstate.node_states.shape) new_gstate = gstate.with_updates(node_states=new_node_states) if append_masks: return new_gstate, dropout_masks else: return new_gstate
Example #16
Source File: rws.py From reweighted-ws with GNU Affero General Public License v3.0 | 4 votes |
def log_likelihood(self, X, Y=None, n_samples=None): p_layers = self.p_layers q_layers = self.q_layers n_layers = len(p_layers) if n_samples == None: n_samples = self.n_samples batch_size = X.shape[0] # Get samples X = f_replicate_batch(X, n_samples) samples, log_p, log_q = self.sample_q(X, None) # Reshape and sum log_p_all = T.zeros((batch_size, n_samples)) log_q_all = T.zeros((batch_size, n_samples)) for l in xrange(n_layers): samples[l] = samples[l].reshape((batch_size, n_samples, p_layers[l].n_X)) log_q[l] = log_q[l].reshape((batch_size, n_samples)) log_p[l] = log_p[l].reshape((batch_size, n_samples)) log_p_all += log_p[l] # agregate all layers log_q_all += log_q[l] # agregate all layers # Approximate log P(X) log_px = f_logsumexp(log_p_all-log_q_all, axis=1) - T.log(n_samples) # Calculate samplig weights log_pq = (log_p_all-log_q_all-T.log(n_samples)) w_norm = f_logsumexp(log_pq, axis=1) log_w = log_pq-T.shape_padright(w_norm) w = T.exp(log_w) # Calculate KL(P|Q), Hp, Hq KL = [None]*n_layers Hp = [None]*n_layers Hq = [None]*n_layers for l in xrange(n_layers): KL[l] = T.sum(w*(log_p[l]-log_q[l]), axis=1) Hp[l] = f_logsumexp(log_w+log_p[l], axis=1) Hq[l] = T.sum(w*log_q[l], axis=1) return log_px, w, log_p_all, log_q_all, KL, Hp, Hq
Example #17
Source File: new_nodes_vote.py From gated-graph-transformer-network with MIT License | 4 votes |
def get_candidates(self, gstate, input_vector, max_candidates, dropout_masks=None): """ Get the current candidate new nodes. This is accomplished as follows: 1. The proposer network, conditioned on the input vector, proposes multiple candidate nodes, along with a confidence 2. Every existing node, conditioned on its own state and the candidate, votes on whether or not to accept this node 3. A new node is created for each candidate node, with an existence strength given by confidence * [product of all votes], and an initial state state as proposed This method directly returns these new nodes for comparision Params: gstate: A GraphState giving the current state input_vector: A tensor of the form (n_batch, input_width) max_candidates: Integer, limit on the number of candidates to produce Returns: new_strengths: A tensor of the form (n_batch, new_node_idx) new_ids: A tensor of the form (n_batch, new_node_idx, num_node_ids) """ n_batch = gstate.n_batch n_nodes = gstate.n_nodes outputs_info = [self._proposer_gru.initial_state(n_batch)] proposer_step = lambda st,ipt,*dm: self._proposer_gru.step(ipt,st,dm if dropout_masks is not None else None) raw_proposal_acts, _ = theano.scan(proposer_step, n_steps=max_candidates, non_sequences=[input_vector]+(dropout_masks if dropout_masks is not None else []), outputs_info=outputs_info) # raw_proposal_acts is of shape (candidate, n_batch, blah) flat_raw_acts = raw_proposal_acts.reshape([-1, self._proposal_width]) flat_processed_acts = self._proposer_stack.process(flat_raw_acts) candidate_strengths = T.nnet.sigmoid(flat_processed_acts[:,0]).reshape([max_candidates, n_batch]) candidate_ids = T.nnet.softmax(flat_processed_acts[:,1:]).reshape([max_candidates, n_batch, self._graph_spec.num_node_ids]) # Votes will be of shape (candidate, n_batch, n_nodes) # To generate this we want to assemble (candidate, n_batch, n_nodes, input_stuff), # squash to (parallel, input_stuff), do voting op, then unsquash candidate_id_part = T.shape_padaxis(candidate_ids, 2) node_id_part = T.shape_padaxis(gstate.node_ids, 0) node_state_part = T.shape_padaxis(gstate.node_states, 0) full_vote_input = broadcast_concat([node_id_part, node_state_part, candidate_id_part], 3) flat_vote_input = full_vote_input.reshape([-1, full_vote_input.shape[-1]]) vote_result = self._vote_stack.process(flat_vote_input) final_votes_no = vote_result.reshape([max_candidates, n_batch, n_nodes]) weighted_votes_yes = 1 - final_votes_no * T.shape_padleft(gstate.node_strengths) # Add in the strength vote all_votes = T.concatenate([T.shape_padright(candidate_strengths), weighted_votes_yes], 2) # Take the product -> (candidate, n_batch) chosen_strengths = T.prod(all_votes, 2) new_strengths = chosen_strengths.dimshuffle([1,0]) new_ids = candidate_ids.dimshuffle([1,0,2]) return new_strengths, new_ids