Python theano.tensor.shape_padright() Examples

The following are 17 code examples of theano.tensor.shape_padright(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .
Example #1
Source File: MoGNADE.py    From NADE with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def sym_logdensity(self, x):
        """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha))
            return (p, a, x)
        # First element is different (it is predicted from the bias only)
        a0 = T.zeros_like(T.dot(x.T, self.W))  # BxH
        p0 = T.zeros_like(x[0])
        x0 = T.ones_like(x[0])
        ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
                                                sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
                                                outputs_info=[p0, a0, x0])
        return (ps[-1], updates) 
Example #2
Source File: attention.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def compute_weighted_averages(self, weights, attended):
        """Compute weighted averages of the attended sequence vectors.

        Parameters
        ----------
        weights : :class:`~theano.Variable`
            The weights. The shape must be equal to the attended shape
            without the last dimension.
        attended : :class:`~theano.Variable`
            The attended. The index in the sequence must be the first
            dimension.

        Returns
        -------
        weighted_averages : :class:`~theano.Variable`
            The weighted averages of the attended elements. The shape
            is equal to the attended shape with the first dimension
            dropped.

        """
        return (tensor.shape_padright(weights) * attended).sum(axis=0) 
Example #3
Source File: MoLaplaceNADE.py    From NADE with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def sym_logdensity(self, x):
        """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma)
            return (p, a, x)
        # First element is different (it is predicted from the bias only)
        a0 = T.zeros_like(T.dot(x.T, self.W))  # BxH
        p0 = T.zeros_like(x[0])
        x0 = T.ones_like(x[0])
        ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
                                                sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
                                                outputs_info=[p0, a0, x0])
        return (ps[-1], updates) 
Example #4
Source File: noise.py    From Mozi with MIT License 5 votes vote down vote up
def _train_fprop(self, state_below):
        rd = theano_rand.binomial(size=(state_below.shape[0],), n=1, p=(1-self.ratio), dtype=floatX)
        return state_below * T.shape_padright(rd) 
Example #5
Source File: layers.py    From kaggle-heart with MIT License 5 votes vote down vote up
def get_output_for(self, inputs, **kwargs):
        # take the minimal working slice size, and use that one.
        return inputs[0] * T.shape_padright(inputs[1], n_ones=inputs[0].ndim-inputs[1].ndim) 
Example #6
Source File: core.py    From starry with MIT License 5 votes vote down vote up
def compute_illumination(self, xyz, xs, ys, zs, Rs, sigr, on94_exact):
        """Compute the illumination profile when rendering maps."""

        if self.source_npts == 1:

            return self.compute_illumination_point_source(
                xyz, xs, ys, zs, sigr, on94_exact
            )

        else:

            # The effective size of the star as seen by the planet
            # is smaller. Only include points
            # that fall on this smaller disk.
            rs = tt.sqrt(xs ** 2 + ys ** 2 + zs ** 2)
            Reff = Rs * tt.sqrt(1 - ((Rs - 1) / rs) ** 2)
            dx = tt.shape_padright(Reff) * self.source_dx
            dy = tt.shape_padright(Reff) * self.source_dy
            # Note that the star is *closer* to the planet, hence the - sign
            dz = -tt.sqrt(Rs ** 2 - dx ** 2 - dy ** 2)

            # Compute the illumination for each point on the source disk
            I = self.compute_illumination_point_source(
                xyz,
                tt.reshape(tt.shape_padright(xs) + dx, (-1,)),
                tt.reshape(tt.shape_padright(ys) + dy, (-1,)),
                tt.reshape(tt.shape_padright(zs) + dz, (-1,)),
                sigr,
                on94_exact,
            )
            I = tt.reshape(I, (-1, tt.shape(xs)[0], self.source_npts))

            # Average over each profile
            return tt.sum(I, axis=2) / self.source_npts 
Example #7
Source File: OrderlessMoGNADE.py    From NADE with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def sym_masked_neg_loglikelihood_gradient(self, x, mask):
        """ x is a matrix of column datapoints (DxB) D = n_visible, Bfloat = batch size """
        logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h = self.sym_mask_logdensity_estimator_intermediate(x, mask)

#        nnz = output_mask.sum(0)
#        sparsity_multiplier = T.shape_padright(T.shape_padleft((B+1e-6)/(nnz+1e-6)))

#        wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0)) #BxDxC
#        lp_current = log_sum_exp(wPhi, axis = 2) * output_mask #BxD
#        lp_current_sum = (lp_current.sum(1) * D / (D-d)).sum() #1

        loglikelihood = logdensity.mean(dtype=floatX)
        loss = -loglikelihood

        dp_dz_alpha = T.grad(loss, z_alpha)  # BxDxC
        gb_alpha = dp_dz_alpha.sum(0)  # DxC
        gV_alpha = T.tensordot(h.T, dp_dz_alpha, [[1], [0]]).dimshuffle((1, 0, 2))  # DxHxC

        dp_dz_mu = T.grad(loss, z_mu)  # BxDxC
        dp_dz_mu = dp_dz_mu * Sigma  # Heuristic
        gb_mu = dp_dz_mu.sum(0)  # DxC
        gV_mu = T.tensordot(h.T, dp_dz_mu, [[1], [0]]).dimshuffle((1, 0, 2))  # DxHxC

        dp_dz_sigma = T.grad(loss, z_sigma)  # BxDxC
        gb_sigma = dp_dz_sigma.sum(0)  # DxC
        gV_sigma = T.tensordot(h.T, dp_dz_sigma, [[1], [0]]).dimshuffle((1, 0, 2))  # DxHxC

        if self.n_layers > 1:
            gWs, gbs, gW1, gWflags, gb1 = T.grad(loss, [self.Ws, self.bs, self.W1, self.Wflags, self.b1])
            gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "Ws":gWs, "bs":gbs, "W1":gW1, "b1":gb1, "Wflags":gWflags}
        else:
            gW1, gWflags, gb1 = T.grad(loss, [self.W1, self.Wflags, self.b1])
            gradients = {"V_alpha":gV_alpha, "b_alpha":gb_alpha, "V_mu":gV_mu, "b_mu":gb_mu, "V_sigma":gV_sigma, "b_sigma":gb_sigma, "W1":gW1, "b1":gb1, "Wflags":gWflags}
        # Gradients
        return (loss, gradients) 
Example #8
Source File: OrderlessMoGNADE.py    From NADE with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def sym_mask_logdensity_estimator_intermediate(self, x, mask):
        non_linearity_name = self.parameters["nonlinearity"].get_name()
        assert(non_linearity_name == "sigmoid" or non_linearity_name == "RLU")
        x = x.T  # BxD
        mask = mask.T  # BxD
        output_mask = constantX(1) - mask  # BxD
        D = constantX(self.n_visible)
        d = mask.sum(1)  # d is the 1-based index of the dimension whose value to infer (not the size of the context)
        masked_input = x * mask  # BxD
        h = self.nonlinearity(T.dot(masked_input, self.W1) + T.dot(mask, self.Wflags) + self.b1)  # BxH
        for l in xrange(self.n_layers - 1):
            h = self.nonlinearity(T.dot(h, self.Ws[l]) + self.bs[l])  # BxH
        z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(self.b_alpha)
        z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(self.b_mu)
        z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(self.b_sigma)
        temp = T.exp(z_alpha)  # + 1e-6
        # temp += T.shape_padright(temp.sum(2)/1e-3)
        Alpha = temp / T.shape_padright(temp.sum(2))  # BxDxC
        Mu = z_mu  # BxDxC
        Sigma = T.exp(z_sigma)  # + 1e-6 #BxDxC

        # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Mu = Mu * T.shape_padright(output_mask)
        # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC

        Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi))  # BxDxC
        logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) * output_mask).sum(1) * D / (D - d)
        return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h) 
Example #9
Source File: theano_helpers.py    From NADE with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def log_sum_exp(x, axis=1):
    max_x = T.max(x, axis)
    return max_x + T.log(T.sum(T.exp(x - T.shape_padright(max_x, 1)), axis)) 
Example #10
Source File: layers.py    From kaggle-galaxies with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def error(self, *args, **kwargs):
        input = self.input_layer.output(*args, **kwargs)

        # never actually dropout anything on the output layer, just pass it along!

        if self.error_measure == 'mse':
            error = T.mean((input - self.target_var) ** 2)
        elif self.error_measure == 'ce': # cross entropy
            error = T.mean(T.nnet.binary_crossentropy(input, self.target_var))
        elif self.error_measure == 'nca':
            epsilon = 1e-8
            #dist_ij = - T.dot(input, input.T)
            # dist_ij = input
            dist_ij = T.sum((input.dimshuffle(0, 'x', 1) - input.dimshuffle('x', 0, 1)) ** 2, axis=2)
            p_ij_unnormalised = T.exp(-dist_ij) + epsilon
            p_ij_unnormalised = p_ij_unnormalised * (1 - T.eye(self.mb_size)) # set the diagonal to 0
            p_ij = p_ij_unnormalised / T.sum(p_ij_unnormalised, axis=1)
            return - T.mean(p_ij * self.target_var)

            # 
            # p_ij = p_ij_unnormalised / T.sum(p_ij_unnormalised, axis=1)
            # return np.mean(p_ij * self.target_var)
        elif self.error_measure == 'maha':
            # e = T.shape_padright(input - self.target_var)
            # e = (input - self.target_var).dimshuffle((0, 'x', 1))
            # error = T.sum(T.sum(self.target_cov_var * e, 2) ** 2) / self.mb_size

            e = (input - self.target_var)
            eTe = e.dimshuffle((0, 'x', 1)) * e.dimshuffle((0, 1, 'x'))
            error = T.sum(self.target_cov_var * eTe) / self.mb_size
        else:
            1 / 0

        return error 
Example #11
Source File: recurrent.py    From CAPTCHA-breaking with MIT License 5 votes vote down vote up
def get_padded_shuffled_mask(self, train, X, pad=0):
        mask = self.get_input_mask(train)
        if mask is None:
            mask = T.ones_like(X.sum(axis=-1))  # is there a better way to do this without a sum?

        # mask is (nb_samples, time)
        mask = T.shape_padright(mask)  # (nb_samples, time, 1)
        mask = T.addbroadcast(mask, -1)  # (time, nb_samples, 1) matrix.
        mask = mask.dimshuffle(1, 0, 2)  # (time, nb_samples, 1)

        if pad > 0:
            # left-pad in time with 0
            padding = alloc_zeros_matrix(pad, mask.shape[1], 1)
            mask = T.concatenate([padding, mask], axis=0)
        return mask.astype('int8') 
Example #12
Source File: strength_weighted_gru.py    From gated-graph-transformer-network with MIT License 5 votes vote down vote up
def step(self, ipt, state, state_strength, dropout_masks=None):
        """
        Perform a single step of the network

        Params:
            ipt: The current input. Should be an int tensor of shape (n_batch, self.input_width)
            state: The previous state. Should be a float tensor of shape (n_batch, self.output_width)
            state_strength: Strength of the previous state. Should be a float tensor of shape
                (n_batch)
            dropout_masks: Masks from get_dropout_masks

        Returns: The next output state, and the next output strength
        """
        if dropout_masks is not None:
            ipt_masks, state_masks = dropout_masks
            ipt = ipt*ipt_masks
            state = state*state_masks

        obs_state = state * T.shape_padright(state_strength)
        cat_ipt_state = T.concatenate([ipt, obs_state], 1)
        reset = do_layer( T.nnet.sigmoid, cat_ipt_state,
                            self._reset_W, self._reset_b )
        update = do_layer( T.nnet.sigmoid, cat_ipt_state,
                            self._update_W, self._update_b )
        update_state = update[:,:-1]
        update_strength = update[:,-1]

        cat_reset_ipt_state = T.concatenate([ipt, (reset * obs_state)], 1)
        candidate_act = do_layer( T.tanh, cat_reset_ipt_state,
                            self._activation_W, self._activation_b )
        candidate_strength = do_layer( T.nnet.sigmoid, cat_reset_ipt_state,
                            self._strength_W, self._strength_b ).reshape(state_strength.shape)

        newstate = update_state * state + (1-update_state) * candidate_act
        newstrength = update_strength * state_strength + (1-update_strength) * candidate_strength

        return newstate, newstrength 
Example #13
Source File: aggregate_representation.py    From gated-graph-transformer-network with MIT License 5 votes vote down vote up
def process(self, gstate, dropout_masks=Ellipsis):
        """
        Convert the graph state to a representation vector, using sigmoid attention to scale representations

        Params:
            gstate: A GraphState giving the current state

        Returns: A representation vector of shape (n_batch, representation_width)
        """
        if dropout_masks is Ellipsis:
            dropout_masks = None
            append_masks = False
        else:
            append_masks = True

        flat_obs = T.concatenate([
                        gstate.node_ids.reshape([-1, self._graph_spec.num_node_ids]),
                        gstate.node_states.reshape([-1, self._graph_spec.node_state_size])], 1)
        flat_activations, dropout_masks = self._representation_stack.process(flat_obs, dropout_masks)
        activations = flat_activations.reshape([gstate.n_batch, gstate.n_nodes, self._representation_width+1])

        activation_strengths = activations[:,:,0]
        selector = T.shape_padright(T.nnet.sigmoid(activation_strengths) * gstate.node_strengths)
        representations = T.tanh(activations[:,:,1:])

        result = T.tanh(T.sum(selector * representations, 1))
        if append_masks:
            return result, dropout_masks
        else:
            return result 
Example #14
Source File: aggregate_representation_softmax.py    From gated-graph-transformer-network with MIT License 5 votes vote down vote up
def process(self, gstate, dropout_masks=Ellipsis):
        """
        Convert the graph state to a representation vector, using softmax attention to scale representations

        Params:
            gstate: A GraphState giving the current state

        Returns: A representation vector of shape (n_batch, representation_width)
        """
        if dropout_masks is Ellipsis:
            dropout_masks = None
            append_masks = False
        else:
            append_masks = True

        flat_obs = T.concatenate([
                        gstate.node_ids.reshape([-1, self._graph_spec.num_node_ids]),
                        gstate.node_states.reshape([-1, self._graph_spec.node_state_size])], 1)
        flat_activations, dropout_masks = self._representation_stack.process(flat_obs, dropout_masks)
        activations = flat_activations.reshape([gstate.n_batch, gstate.n_nodes, self._representation_width+1])

        activation_strengths = activations[:,:,0]
        existence_penalty = T.log(gstate.node_strengths + EPSILON) # TODO: consider removing epsilon here
        selector = T.shape_padright(T.nnet.softmax(activation_strengths + existence_penalty))
        representations = T.tanh(activations[:,:,1:])

        result = T.sum(selector * representations, 1)
        if append_masks:
            return result, dropout_masks
        else:
            return result 
Example #15
Source File: propagation.py    From gated-graph-transformer-network with MIT License 4 votes vote down vote up
def process(self, gstate, dropout_masks=Ellipsis):
        """
        Process a graph state.
          1. Data is transfered from each node to each other node along both forward and backward edges.
                This data is processed with a Wx+b style update, and an optional transformation is applied
          2. Nodes sum the transfered data, weighted by the existence of the other node and the edge.
          3. Nodes perform a GRU update with this input

        Params:
            gstate: A GraphState giving the current state
        """
        if dropout_masks is Ellipsis:
            dropout_masks = None
            append_masks = False
        else:
            append_masks = True

        node_obs = T.concatenate([gstate.node_ids, gstate.node_states],2)
        flat_node_obs = node_obs.reshape([-1, self._process_input_size])
        transformed, dropout_masks = self._transfer_stack.process(flat_node_obs,dropout_masks)
        transformed = transformed.reshape([gstate.n_batch, gstate.n_nodes, 2*self._graph_spec.num_edge_types, self._transfer_size])
        scaled_transformed = transformed * T.shape_padright(T.shape_padright(gstate.node_strengths))
        # scaled_transformed is of shape (n_batch, n_nodes, 2*num_edge_types, transfer_size)
        # We want to multiply  through by edge strengths, which are of shape
        # (n_batch, n_nodes, n_nodes, num_edge_types), both fwd and backward
        edge_strength_scale = T.concatenate([gstate.edge_strengths, gstate.edge_strengths.swapaxes(1,2)], 3)
        # edge_strength_scale is of (n_batch, n_nodes, n_nodes, 2*num_edge_types)
        intermed = T.shape_padaxis(scaled_transformed, 2) * T.shape_padright(edge_strength_scale)
        # intermed is of shape (n_batch, n_nodes "source", n_nodes "dest", 2*num_edge_types, transfer_size)
        # now reduce along the "source" and "edge_types" dimensions to get dest activations
        # of shape (n_batch, n_nodes, transfer_size)
        reduced_result = T.sum(T.sum(intermed, 3), 1)

        # now add information fom current node id
        full_input = T.concatenate([gstate.node_ids, reduced_result], 2)

        # we flatten to apply GRU
        flat_input = full_input.reshape([-1, self._graph_spec.num_node_ids + self._transfer_size])
        flat_state = gstate.node_states.reshape([-1, self._graph_spec.node_state_size])
        new_flat_state, dropout_masks = self._propagation_gru.step(flat_input, flat_state, dropout_masks)

        new_node_states = new_flat_state.reshape(gstate.node_states.shape)

        new_gstate = gstate.with_updates(node_states=new_node_states)
        if append_masks:
            return new_gstate, dropout_masks
        else:
            return new_gstate 
Example #16
Source File: rws.py    From reweighted-ws with GNU Affero General Public License v3.0 4 votes vote down vote up
def log_likelihood(self, X, Y=None, n_samples=None):
        p_layers = self.p_layers
        q_layers = self.q_layers
        n_layers = len(p_layers)

        if n_samples == None:
            n_samples = self.n_samples

        batch_size = X.shape[0]

        # Get samples
        X = f_replicate_batch(X, n_samples)
        samples, log_p, log_q = self.sample_q(X, None)

        # Reshape and sum
        log_p_all = T.zeros((batch_size, n_samples))
        log_q_all = T.zeros((batch_size, n_samples))
        for l in xrange(n_layers):
            samples[l] = samples[l].reshape((batch_size, n_samples, p_layers[l].n_X))
            log_q[l] = log_q[l].reshape((batch_size, n_samples))
            log_p[l] = log_p[l].reshape((batch_size, n_samples))
            log_p_all += log_p[l]   # agregate all layers
            log_q_all += log_q[l]   # agregate all layers

        # Approximate log P(X)
        log_px = f_logsumexp(log_p_all-log_q_all, axis=1) - T.log(n_samples)
        
        # Calculate samplig weights
        log_pq = (log_p_all-log_q_all-T.log(n_samples))
        w_norm = f_logsumexp(log_pq, axis=1)
        log_w = log_pq-T.shape_padright(w_norm)
        w = T.exp(log_w)

        # Calculate KL(P|Q), Hp, Hq
        KL = [None]*n_layers
        Hp = [None]*n_layers
        Hq = [None]*n_layers
        for l in xrange(n_layers):
            KL[l] = T.sum(w*(log_p[l]-log_q[l]), axis=1)
            Hp[l] = f_logsumexp(log_w+log_p[l], axis=1)
            Hq[l] = T.sum(w*log_q[l], axis=1)

        return log_px, w, log_p_all, log_q_all, KL, Hp, Hq 
Example #17
Source File: new_nodes_vote.py    From gated-graph-transformer-network with MIT License 4 votes vote down vote up
def get_candidates(self, gstate, input_vector, max_candidates, dropout_masks=None):
        """
        Get the current candidate new nodes. This is accomplished as follows:
          1. The proposer network, conditioned on the input vector, proposes multiple candidate nodes,
                along with a confidence
          2. Every existing node, conditioned on its own state and the candidate, votes on whether or not
                to accept this node
          3. A new node is created for each candidate node, with an existence strength given by
                confidence * [product of all votes], and an initial state state as proposed
        This method directly returns these new nodes for comparision

        Params:
            gstate: A GraphState giving the current state
            input_vector: A tensor of the form (n_batch, input_width)
            max_candidates: Integer, limit on the number of candidates to produce

        Returns:
            new_strengths: A tensor of the form (n_batch, new_node_idx)
            new_ids: A tensor of the form (n_batch, new_node_idx, num_node_ids)
        """
        n_batch = gstate.n_batch
        n_nodes = gstate.n_nodes
        outputs_info = [self._proposer_gru.initial_state(n_batch)]
        proposer_step = lambda st,ipt,*dm: self._proposer_gru.step(ipt,st,dm if dropout_masks is not None else None)
        raw_proposal_acts, _ = theano.scan(proposer_step, n_steps=max_candidates, non_sequences=[input_vector]+(dropout_masks if dropout_masks is not None else []), outputs_info=outputs_info)

        # raw_proposal_acts is of shape (candidate, n_batch, blah)
        flat_raw_acts = raw_proposal_acts.reshape([-1, self._proposal_width])
        flat_processed_acts = self._proposer_stack.process(flat_raw_acts)
        candidate_strengths = T.nnet.sigmoid(flat_processed_acts[:,0]).reshape([max_candidates, n_batch])
        candidate_ids = T.nnet.softmax(flat_processed_acts[:,1:]).reshape([max_candidates, n_batch, self._graph_spec.num_node_ids])

        # Votes will be of shape (candidate, n_batch, n_nodes)
        # To generate this we want to assemble (candidate, n_batch, n_nodes, input_stuff),
        # squash to (parallel, input_stuff), do voting op, then unsquash
        candidate_id_part = T.shape_padaxis(candidate_ids, 2)
        node_id_part = T.shape_padaxis(gstate.node_ids, 0)
        node_state_part = T.shape_padaxis(gstate.node_states, 0)
        full_vote_input = broadcast_concat([node_id_part, node_state_part, candidate_id_part], 3)
        flat_vote_input = full_vote_input.reshape([-1, full_vote_input.shape[-1]])
        vote_result = self._vote_stack.process(flat_vote_input)
        final_votes_no = vote_result.reshape([max_candidates, n_batch, n_nodes])
        weighted_votes_yes = 1 - final_votes_no * T.shape_padleft(gstate.node_strengths)
        # Add in the strength vote
        all_votes = T.concatenate([T.shape_padright(candidate_strengths), weighted_votes_yes], 2)
        # Take the product -> (candidate, n_batch)
        chosen_strengths = T.prod(all_votes, 2)

        new_strengths = chosen_strengths.dimshuffle([1,0])
        new_ids = candidate_ids.dimshuffle([1,0,2])
        return new_strengths, new_ids