Python numpy.array() Examples

The following are 50 code examples for showing how to use numpy.array(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don't like. You can also save this page to your account.

Example 1
Project: pyku   Author: dubvulture   File: utils.py    (GNU General Public License v3.0) View Source Project 8 votes vote down vote up
def min_side(_, pos):
    """
    Given an object pixels' positions, return the minimum side length of its
    bounding box
    :param _: pixel values (unused)
    :param pos: pixel position (1-D)
    :return: minimum bounding box side length
    """
    xs = np.array([i / SSIZE for i in pos])
    ys = np.array([i % SSIZE for i in pos])
    minx = np.amin(xs)
    miny = np.amin(ys)
    maxx = np.amax(xs)
    maxy = np.amax(ys)
    ct1 = compute_line(np.array([minx, miny]), np.array([minx, maxy]))
    ct2 = compute_line(np.array([minx, miny]), np.array([maxx, miny]))
    return min(ct1, ct2) 
Example 2
Project: namegenderclassifier   Author: joaoalvarenga   File: genderclassifier.py    (MIT License) View Source Project 7 votes vote down vote up
def train(self, dataset, train_split=0.8, dense_size=32, learning_rate=0.001, batch_size=32, epochs=50, activation='relu'):
        self.__load_dataset(dataset, train_split)

        train_x = np.array(self.__train_data[:, 0].tolist())
        train_y = to_categorical(self.__train_data[:, 1], 2)

        test_x = np.array(self.__test_data[:, 0].tolist())
        test_y = to_categorical(self.__test_data[:, 1], 2)

        print(train_x.shape)
        self.__model = Sequential()
        self.__model.add(Dense(dense_size, input_dim=train_x.shape[1], activation=activation, init='glorot_uniform'))
        self.__model.add(Dense(train_y.shape[1], activation='softmax', init='glorot_uniform'))
        self.__model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['categorical_accuracy'])

        self.__model.fit(train_x, train_y, batch_size=batch_size, nb_epoch=epochs, validation_data=(test_x, test_y), verbose=2) 
Example 3
Project: YellowFin_Pytorch   Author: JianGoForIt   File: nn1.py    (Apache License 2.0) View Source Project 7 votes vote down vote up
def test_accuracy_full_batch(tokens, features, mini_batch_size, word_attn, sent_attn, th=0.5):
    p = []
    l = []
    cnt = 0
    g = gen_minibatch1(tokens, features, mini_batch_size, False)
    for token, feature in g:
        if cnt % 100 == 0:
            print cnt
        cnt +=1
#         print token.size()
#         y_pred = get_predictions(token, word_attn, sent_attn)
#         print y_pred
        y_pred = get_predictions(token, feature, word_attn, sent_attn)
#         print y_pred
#         _, y_pred = torch.max(y_pred, 1)
#         y_pred = y_pred[:, 1]
#         print y_pred
        p.append(np.ndarray.flatten(y_pred.data.cpu().numpy()))
    p = [item for sublist in p for item in sublist]
    p = np.array(p)
    return p 
Example 4
Project: rca-evaluation   Author: sieve-microservices   File: kshape.py    (license) View Source Project 7 votes vote down vote up
def _ncc_c(x, y):
    """
    >>> _ncc_c([1,2,3,4], [1,2,3,4])
    array([ 0.13333333,  0.36666667,  0.66666667,  1.        ,  0.66666667,
            0.36666667,  0.13333333])
    >>> _ncc_c([1,1,1], [1,1,1])
    array([ 0.33333333,  0.66666667,  1.        ,  0.66666667,  0.33333333])
    >>> _ncc_c([1,2,3], [-1,-1,-1])
    array([-0.15430335, -0.46291005, -0.9258201 , -0.77151675, -0.46291005])
    """
    den = np.array(norm(x) * norm(y))
    den[den == 0] = np.Inf

    x_len = len(x)
    fft_size = 1<<(2*x_len-1).bit_length()
    cc = ifft(fft(x, fft_size) * np.conj(fft(y, fft_size)))
    cc = np.concatenate((cc[-(x_len-1):], cc[:x_len]))
    return np.real(cc) / den 
Example 5
Project: gcForest   Author: pylablanche   File: GCForest.py    (license) View Source Project 7 votes vote down vote up
def _cascade_evaluation(self, X_test, y_test):
        """ Evaluate the accuracy of the cascade using X and y.

        :param X_test: np.array
            Array containing the test input samples.
            Must be of the same shape as training data.

        :param y_test: np.array
            Test target values.

        :return: float
            the cascade accuracy.
        """
        casc_pred_prob = np.mean(self.cascade_forest(X_test), axis=0)
        casc_pred = np.argmax(casc_pred_prob, axis=1)
        casc_accuracy = accuracy_score(y_true=y_test, y_pred=casc_pred)
        print('Layer validation accuracy = {}'.format(casc_accuracy))

        return casc_accuracy 
Example 6
Project: gcForest   Author: pylablanche   File: GCForest.py    (license) View Source Project 7 votes vote down vote up
def _create_feat_arr(self, X, prf_crf_pred):
        """ Concatenate the original feature vector with the predicition probabilities
        of a cascade layer.

        :param X: np.array
            Array containing the input samples.
            Must be of shape [n_samples, data] where data is a 1D array.

        :param prf_crf_pred: list
            Prediction probabilities by a cascade layer for X.

        :return: np.array
            Concatenation of X and the predicted probabilities.
            To be used for the next layer in a cascade forest.
        """
        swap_pred = np.swapaxes(prf_crf_pred, 0, 1)
        add_feat = swap_pred.reshape([np.shape(X)[0], -1])
        feat_arr = np.concatenate([add_feat, X], axis=1)

        return feat_arr 
Example 7
Project: RasterFairy   Author: Quasimondo   File: rfoptimizer.py    (BSD 3-Clause "New" or "Revised" License) View Source Project 7 votes vote down vote up
def shuffleBlock(self,cells,d,tlx,tly,cols,rows,width,height):
        if tlx+cols < width and tly+rows < height:
            temp = []
            for row in range( rows):
                for col in range( cols):
                    temp.append(d[cells[tlx+col][tly+row]])
            temp = np.array(temp)
            oldState = temp.copy()
            np.random.shuffle(temp)
            i = 0
            for row in range( rows):
                for col in range( cols):
                    d[cells[tlx+col][tly+row]] = temp[i]
                    i+=1
            return oldState
        else:
            return [] 
Example 8
Project: AutoML5   Author: djajetic   File: libscores.py    (MIT License) View Source Project 6 votes vote down vote up
def normalize_array (solution, prediction):
    ''' Use min and max of solution as scaling factors to normalize prediction,
    then threshold it to [0, 1]. Binarize solution to {0, 1}. 
    This allows applying classification scores to all cases.
    In principle, this should not do anything to properly formatted 
    classification inputs and outputs.'''
    # Binarize solution
    sol=np.ravel(solution) # convert to 1-d array
    maxi = np.nanmax((filter(lambda x: x != float('inf'), sol))) # Max except NaN and Inf
    mini = np.nanmin((filter(lambda x: x != float('-inf'), sol))) # Mini except NaN and Inf
    if maxi == mini:
        print('Warning, cannot normalize')
        return [solution, prediction]
    diff = maxi - mini
    mid = (maxi + mini)/2.
    new_solution = np.copy(solution)
    new_solution[solution>=mid] = 1
    new_solution[solution<mid] = 0
    # Normalize and threshold predictions (takes effect only if solution not in {0, 1})
    new_prediction = (np.copy(prediction) - float(mini))/float(diff)
    new_prediction[new_prediction>1] = 1 # and if predictions exceed the bounds [0, 1]
    new_prediction[new_prediction<0] = 0
    # Make probabilities smoother
    #new_prediction = np.power(new_prediction, (1./10))
    return [new_solution, new_prediction] 
Example 9
Project: AutoML5   Author: djajetic   File: libscores.py    (MIT License) View Source Project 6 votes vote down vote up
def mvmean(R, axis=0):
    ''' Moving average to avoid rounding errors. A bit slow, but...
    Computes the mean along the given axis, except if this is a vector, in which case the mean is returned.
    Does NOT flatten.'''
    if len(R.shape)==0: return R
    average = lambda x: reduce(lambda i, j: (0, (j[0]/(j[0]+1.))*i[1]+(1./(j[0]+1))*j[1]), enumerate(x))[1]
    R=np.array(R)    
    if len(R.shape)==1: return average(R)
    if axis==1:
        return np.array(map(average, R))
    else:
        return np.array(map(average, R.transpose()))
            
 
# ======= All metrics used for scoring in the challenge ========

### REGRESSION METRICS (work on raw solution and prediction)
# These can be computed on all solutions and predictions (classification included) 
Example 10
Project: AutoML5   Author: djajetic   File: data_io.py    (MIT License) View Source Project 6 votes vote down vote up
def data_binary_sparse (filename, nbr_features):	
	''' This function takes as an argument a file representing a binary sparse matrix
	binary_sparse_matrix[i][j] = a means matrix[i][j] = 1
	It converts it into a numpy array an returns this array. '''
	
	data = data_converter.file_to_array (filename)
	nbr_samples = len(data)
	dok_sparse = dok_matrix ((nbr_samples, nbr_features)) # the construction is easier w/ dok_sparse
	print ("Converting {} to dok sparse matrix".format(filename))
	for row in range (nbr_samples):
		for feature in data[row]:
			dok_sparse[row, int(feature)-1] = 1
	print ("Converting {} to csr sparse matrix".format(filename))
	return dok_sparse.tocsr()
 
# ================ Copy results from input to output ========================== 
Example 11
Project: deep-summarization   Author: harpribot   File: stacked_simple.py    (MIT License) View Source Project 6 votes vote down vote up
def generate_one_summary(self, review):
        """
        Create summary for one review using Encoder Decoder Seq2Seq model

        :param review: The input review
        :return: Output Summary of the model
        """
        review = review.T
        review = [np.array([int(x)]) for x in review]
        feed_dict_rev = {self.enc_inp[t]: review[t] for t in range(self.seq_length)}
        feed_dict_rev.update({self.labels[t]: review[t] for t in range(self.seq_length)})
        summary = self.sess.run(self.dec_outputs_tst, feed_dict_rev)
        summary = [logits_t.argmax(axis=1) for logits_t in summary]
        summary = [x[0] for x in summary]

        return summary 
Example 12
Project: deep-summarization   Author: harpribot   File: simple.py    (MIT License) View Source Project 6 votes vote down vote up
def generate_one_summary(self, review):
        """
        Create summary for one review using Encoder Decoder Seq2Seq model

        :param review: The input review
        :return: Output Summary of the model
        """
        review = review.T
        review = [np.array([int(x)]) for x in review]
        feed_dict_rev = {self.enc_inp[t]: review[t] for t in range(self.seq_length)}
        feed_dict_rev.update({self.labels[t]: review[t] for t in range(self.seq_length)})
        summary = self.sess.run(self.dec_outputs_tst, feed_dict_rev)
        summary = [logits_t.argmax(axis=1) for logits_t in summary]
        summary = [x[0] for x in summary]

        return summary 
Example 13
Project: deep-summarization   Author: harpribot   File: extracter.py    (MIT License) View Source Project 6 votes vote down vote up
def __crawl_review(self):
        """
        Crawl review

        :return: review [numpy array]
        """
        review_list = []
        print 'Crawling Reviews....'
        num_lines = 0
        with open(self.raw_data_file) as infile:
            for line in infile:
                if line.startswith('review/text'):
                    if num_lines >= self.num_reviews:
                        break
                    num_lines += 1
                    _,review = line.split('/text: ')
                    review_list.append(review)

        return np.array(review_list) 
Example 14
Project: deep-summarization   Author: harpribot   File: extracter.py    (MIT License) View Source Project 6 votes vote down vote up
def __crawl_summary(self):
        """
        Crawl summary

        :return: summary [numpy array]
        """
        summary_list = []
        print 'Crawling Summary....'
        num_lines = 0
        with open(self.raw_data_file) as infile:
            for line in infile:
                if line.startswith('review/summary'):
                    if num_lines >= self.num_reviews:
                        break
                    num_lines += 1
                    _,summary = line.split('/summary: ')
                    summary_list.append(summary)

        return np.array(summary_list) 
Example 15
Project: DeepAnomaly   Author: adiyoss   File: dense_lstm_test.py    (MIT License) View Source Project 6 votes vote down vote up
def test(path_test, input_size, hidden_size, batch_size, save_dir, model_name, maxlen):
    db = read_data(path_test)

    X = create_sequences(db[:-maxlen], win_size=maxlen, step=maxlen)
    X = np.reshape(X, (X.shape[0], X.shape[1], input_size))

    # build the model: 1 layer LSTM
    print('Build model...')
    model = Sequential()
    model.add(LSTM(hidden_size, return_sequences=False, input_shape=(maxlen, input_size)))
    model.add(Dense(maxlen))

    model.load_weights(save_dir + model_name)
    model.compile(loss='mse', optimizer='adam')

    prediction = model.predict(X, batch_size, verbose=1)
    prediction = prediction.flatten()
    # prediction_container = np.array(prediction).flatten()
    Y = db[maxlen:]
    plt.plot(prediction, label='prediction')
    plt.plot(Y, label='true')
    plt.legend()
    plt.show() 
Example 16
Project: GELUs   Author: hendrycks   File: twitter_pos.py    (MIT License) View Source Project 6 votes vote down vote up
def word_list_to_embedding(words, embeddings, embedding_dimension=50):
    '''
    :param words: an n x (2*window_size + 1) matrix from data_to_mat
    :param embeddings: an embedding dictionary where keys are strings and values
    are embeddings; the output from embeddings_to_dict
    :param embedding_dimension: the dimension of the values in embeddings; in this
    assignment, embedding_dimension=50
    :return: an n x ((2*window_size + 1)*embedding_dimension) matrix where each entry of the
    words matrix is replaced with its embedding
    '''
    m, n = words.shape
    words = words.reshape((-1))

    return np.array([embeddings[w] for w in words], dtype=np.float32).reshape(m, n*embedding_dimension)

#
# End Twitter Helper Functions
# 
Example 17
Project: mpiFFT4py   Author: spectralDNS   File: slab.py    (GNU Lesser General Public License v3.0) View Source Project 6 votes vote down vote up
def __init__(self, N, L, comm, precision,
                 communication="Alltoall",
                 padsize=1.5,
                 threads=1,
                 planner_effort=defaultdict(lambda: "FFTW_MEASURE")):
        R2C.__init__(self, N, L, comm, precision,
                     communication=communication,
                     padsize=padsize, threads=threads,
                     planner_effort=planner_effort)
        # Reuse all shapes from r2c transform R2C simply by resizing the final complex z-dimension:
        self.Nf = N[2]
        self.Nfp = int(self.padsize*self.N[2]) # Independent complex wavenumbers in z-direction for padded array

        # Rename since there's no real space
        self.original_shape_padded = self.real_shape_padded
        self.original_shape = self.real_shape
        self.transformed_shape = self.complex_shape
        self.original_local_slice = self.real_local_slice
        self.transformed_local_slice = self.complex_local_slice
        self.ks = (fftfreq(N[2])*N[2]).astype(int) 
Example 18
Project: almond-nnparser   Author: Stanford-Mobisocial-IoT-Lab   File: eval_output_embeddings.py    (license) View Source Project 6 votes vote down vote up
def bag_of_tokens(config, labels, label_lengths):
    if config.train_output_embeddings:
        with tf.variable_scope('embed', reuse=True):
            output_embeddings = tf.get_variable('output_embedding')
    else:
        output_embeddings = tf.constant(config.output_embedding_matrix)

    #everything_label_placeholder = tf.placeholder(shape=(None, config.max_length,), dtype=tf.int32)
    #everything_label_length_placeholder = tf.placeholder(shape=(None,), dtype=tf.int32)

    labels = tf.constant(np.array(labels))
    embedded_output = tf.gather(output_embeddings, labels)
    print('embedded_output before', embedded_output)
    #mask = tf.sequence_mask(label_lengths, maxlen=config.max_length, dtype=tf.float32)
    # note: this multiplication will broadcast the mask along all elements of the depth dimension
    # (which is why we run the expand_dims to choose how to broadcast)
    #embedded_output = embedded_output * tf.expand_dims(mask, axis=2)
    #print('embedded_output after', embedded_output)

    return tf.reduce_sum(embedded_output, axis=1) 
Example 19
Project: uwb_tracker_ros   Author: eth-ait   File: uwb_tracker_node.py    (license) View Source Project 6 votes vote down vote up
def _compute_process_and_covariance_matrices(self, dt):
        """Computes the transition and covariance matrix of the process model and measurement model.

        Args:
             dt (float): Timestep of the discrete transition.

        Returns:
            F (numpy.ndarray): Transition matrix.
            Q (numpy.ndarray): Process covariance matrix.
            R (numpy.ndarray): Measurement covariance matrix.
        """
        F = np.array(np.bmat([[np.eye(3), dt * np.eye(3)], [np.zeros((3, 3)), np.eye(3)]]))
        self.process_matrix = F
        q_p = self.process_covariance_position
        q_v = self.process_covariance_velocity
        Q = np.diag([q_p, q_p, q_p, q_v, q_v, q_v]) ** 2 * dt
        r = self.measurement_covariance
        R = r * np.eye(4)
        self.process_covariance = Q
        self.measurement_covariance = R
        return F, Q, R 
Example 20
Project: variational-text-tensorflow   Author: carpedm20   File: nvdm.py    (MIT License) View Source Project 6 votes vote down vote up
def sample(self, sample_size=20, text=None):
    """Sample the documents."""
    p = 1

    if text != None:
      try:
        x, word_idxs = self.reader.get(text)
      except Exception as e:
        print(e)
        return
    else:
      x, word_idxs = self.reader.random()
    print(" [*] Text: %s" % " ".join([self.reader.idx2word[word_idx] for word_idx in word_idxs]))

    cur_ps = self.sess.run(self.p_x_i, feed_dict={self.x: x})
    word_idxs = np.array(cur_ps).argsort()[-sample_size:][::-1]
    ps = cur_ps[word_idxs]

    for idx, (cur_p, word_idx) in enumerate(zip(ps, word_idxs)):
      print("  [%d] %-20s: %.8f" % (idx+1, self.reader.idx2word[word_idx], cur_p))
      p *= cur_p

      print(" [*] perp : %8.f" % -np.log(p)) 
Example 21
Project: nanoQC   Author: wdecoster   File: nanoQC.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def plot_nucleotide_diversity(ax, fqlists, invert=False):
    '''
    Create a FastQC-like "?Per base sequence content" plot
    Plot fraction of nucleotides per position
    zip will stop when shortest read is exhausted
    '''
    if invert:
        fqlists = [list(reversed(read)) for read in fqlists]
    numreads = len(fqlists)
    sns.set_style("darkgrid")
    l_A, = ax.plot(
        np.array([pos.count('A') / numreads for pos in zip(*fqlists)]), 'green', label='A')
    l_T, = ax.plot(
        np.array([pos.count('T') / numreads for pos in zip(*fqlists)]), 'red', label='T')
    l_G, = ax.plot(
        np.array([pos.count('G') / numreads for pos in zip(*fqlists)]), 'black', label='G')
    l_C, = ax.plot(
        np.array([pos.count('C') / numreads for pos in zip(*fqlists)]), 'blue', label='C')
    if invert:
        ax.set_xticklabels(-1 * ax.get_xticks().astype(int))
    return [l_A, l_T, l_G, l_C] 
Example 22
Project: nanoQC   Author: wdecoster   File: nanoQC.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def plot_qual(ax, quallist, invert=False):
    '''
    Create a FastQC-like "?Per base sequence quality?" plot
    Plot average quality per position
    zip will stop when shortest read is exhausted
    '''
    sns.set_style("darkgrid")
    if invert:
        l_Q, = ax.plot(np.array([np.mean(position) for position in zip(
            *[list(reversed(read)) for read in quallist])]), 'orange', label="Quality")
        ax.set_xlabel('Position in read from end')
        ax.set_xticklabels(-1 * ax.get_xticks().astype(int))
    else:
        l_Q, = ax.plot(np.array([np.mean(position)
                                 for position in zip(*quallist)]), 'orange', label="Quality")
        ax.set_xlabel('Position in read from start')
    return l_Q 
Example 23
Project: pyfds   Author: emtpb   File: fields.py    (BSD 3-Clause "New" or "Revised" License) View Source Project 6 votes vote down vote up
def d_x2(self, factors=None):
        """Creates a sparse matrix for computing the second derivative with respect to x multiplied
        by factors given for every point. Uses central difference quotient.

        Args:
            factors: Factor for each point to be applied after derivation.

        Returns:
            Sparse matrix the calculate second derivatives of field components.
        """

        # use ones as factors if none are specified
        if factors is None:
            factors = np.array(1).repeat(self.num_points)

        return sp.dia_matrix((np.array([factors, -2*factors, factors]), [-1, 0, 1]),
                             shape=(self.num_points, self.num_points)) 
Example 24
Project: pyfds   Author: emtpb   File: fields.py    (BSD 3-Clause "New" or "Revised" License) View Source Project 6 votes vote down vote up
def d_x2(self, factors=None):
        """Creates a sparse matrix for computing the second derivative with respect to x multiplied
        by factors given for every point. Uses central difference quotient.

        Args:
            factors: Factor for each point to be applied after derivation.

        Returns:
            Sparse matrix the calculate second derivatives of field components.
        """

        # use ones as factors if none are specified
        if factors is None:
            factors = np.array(1).repeat(self.num_points)

        return sp.dia_matrix((np.array([factors, -2*factors, factors]), [-1, 0, 1]),
                             shape=(self.num_points, self.num_points)) 
Example 25
Project: pyfds   Author: emtpb   File: gfx.py    (BSD 3-Clause "New" or "Revised" License) View Source Project 6 votes vote down vote up
def plot_region(self, region):
        """Shows the given region in the field plot.

        Args:
            region: Region to be plotted.
        """

        if type(region) == reg.PointRegion:
            self.axes.plot(np.ones(2) * region.point_coordinates / self._x_axis_factor,
                           np.array([-1, 1]) * self.scale, color='black')
        elif type(region) == reg.LineRegion:
            self.axes.plot(np.ones(2) * region.line_coordinates[0] / self._x_axis_factor,
                           np.array([-1, 1]) * self.scale, color='black')
            self.axes.plot(np.ones(2) * region.line_coordinates[1] / self._x_axis_factor,
                           np.array([-1, 1]) * self.scale, color='black')
        else:
            raise TypeError('Unknown type in region list: {}'.format(type(region))) 
Example 26
Project: YellowFin_Pytorch   Author: JianGoForIt   File: nn1_stress_test.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_accuracy_full_batch(tokens, features, mini_batch_size, word_attn, sent_attn, th=0.5):
    p = []
    l = []
    cnt = 0
    g = gen_minibatch1(tokens, features, mini_batch_size, False)
    for token, feature in g:
        if cnt % 100 == 0:
            print(cnt)
        cnt +=1
#         print token.size()
#         y_pred = get_predictions(token, word_attn, sent_attn)
#         print y_pred
        y_pred = get_predictions(token, feature, word_attn, sent_attn)
#         print y_pred
#         _, y_pred = torch.max(y_pred, 1)
#         y_pred = y_pred[:, 1]
#         print y_pred
        p.append(np.ndarray.flatten(y_pred.data.cpu().numpy()))
    p = [item for sublist in p for item in sublist]
    p = np.array(p)
    return p 
Example 27
Project: treecat   Author: posterior   File: plotting.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def layout_tree(correlation):
    """Layout tree for visualization with e.g. matplotlib.

    Args:
        correlation: A [V, V]-shaped numpy array of latent correlations.

    Returns:
        A [V, 3]-shaped numpy array of spectral positions of vertices.
    """
    assert len(correlation.shape) == 2
    assert correlation.shape[0] == correlation.shape[1]
    assert correlation.dtype == np.float32

    laplacian = -correlation
    np.fill_diagonal(laplacian, 0)
    np.fill_diagonal(laplacian, -laplacian.sum(axis=0))
    evals, evects = scipy.linalg.eigh(laplacian, eigvals=[1, 2, 3])
    assert np.all(evals > 0)
    assert evects.shape[1] == 3
    return evects 
Example 28
Project: treecat   Author: posterior   File: training.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def __init__(self, N, V, tree_prior, config):
        """Initialize a model with an empty subsample.

        Args:
            N (int): Number of rows in the dataset.
            V (int): Number of columns (features) in the dataset.
            tree_prior: A [K]-shaped numpy array of prior edge log odds, where
                K is the number of edges in the complete graph on V vertices.
            config: A global config dict.
        """
        assert isinstance(N, int)
        assert isinstance(V, int)
        assert isinstance(tree_prior, np.ndarray)
        assert isinstance(config, dict)
        K = V * (V - 1) // 2  # Number of edges in complete graph.
        assert V <= 32768, 'Invalid # features > 32768: {}'.format(V)
        assert tree_prior.shape == (K, )
        assert tree_prior.dtype == np.float32
        self._config = config.copy()
        self._num_rows = N
        self._tree_prior = tree_prior
        self._tree = TreeStructure(V)
        assert self._tree.num_vertices == V
        self._program = make_propagation_program(self._tree.tree_grid)
        self._added_rows = set() 
Example 29
Project: treecat   Author: posterior   File: training.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def sample_tree(self):
        """Samples a random tree.

        Returns:
            A pair (edges, edge_logits), where:
                edges: A list of (vertex, vertex) pairs.
                edge_logits: A [K]-shaped numpy array of edge logits.
        """
        logger.info('TreeCatTrainer.sample_tree given %d rows',
                    len(self._added_rows))
        SERIES.sample_tree_num_rows.append(len(self._added_rows))
        complete_grid = self._tree.complete_grid
        edge_logits = self.compute_edge_logits()
        assert edge_logits.shape[0] == complete_grid.shape[1]
        assert edge_logits.dtype == np.float32
        edges = self.get_edges()
        edges = sample_tree(complete_grid, edge_logits, edges)
        return edges, edge_logits 
Example 30
Project: treecat   Author: posterior   File: training.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def compute_edge_logits(self):
        """Compute non-normalized logprob of all V(V-1)/2 candidate edges.

        This is used for sampling and estimating the latent tree.
        """
        V, E, K, M = self._VEKM
        vert_logits = logprob_dc(self._vert_ss, self._vert_prior, axis=1)
        if len(self._added_rows) == V:
            assignments = self._assignments
        else:
            assignments = self._assignments[sorted(self._added_rows), :]
        assignments = np.array(assignments, order='F')
        parallel = self._config['learning_parallel']
        result = treecat_compute_edge_logits(M, self._tree.complete_grid,
                                             self._gammaln_table, assignments,
                                             vert_logits, parallel)
        result += self._tree_prior
        return result 
Example 31
Project: treecat   Author: posterior   File: training.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def train(self):
        """Train a TreeCat model using subsample-annealed MCMC.

        Returns:
            A trained model as a dictionary with keys:
                config: A global config dict.
                tree: A TreeStructure instance with the learned latent
                    structure.
                edge_logits: A [K]-shaped array of all edge logits.
                suffstats: Sufficient statistics of features, vertices, and
                    edges and a ragged_index for the features array.
                assignments: An [N, V]-shaped numpy array of latent cluster
                    ids for each cell in the dataset, where N be the number of
                    data rows and V is the number of features.
        """
        model = TreeTrainer.train(self)
        model['assignments'] = self._assignments
        model['suffstats'] = {
            'ragged_index': self._table.ragged_index,
            'vert_ss': self._vert_ss,
            'edge_ss': self._edge_ss,
            'feat_ss': self._feat_ss,
            'meas_ss': self._meas_ss,
        }
        return model 
Example 32
Project: treecat   Author: posterior   File: training.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def __init__(self, data, tree_prior, config):
        """Initialize a model with an empty subsample.

        Args:
            data: An [N, V]-shaped numpy array of real-valued data.
            tree_prior: A [K]-shaped numpy array of prior edge log odds, where
                K is the number of edges in the complete graph on V vertices.
            config: A global config dict.
        """
        assert isinstance(data, np.ndarray)
        data = np.asarray(data, np.float32)
        assert len(data.shape) == 2
        N, V = data.shape
        D = config['model_latent_dim']
        E = V - 1  # Number of edges in the tree.
        TreeTrainer.__init__(self, N, V, tree_prior, config)
        self._data = data
        self._latent = np.zeros([N, V, D], np.float32)

        # This is symmetric positive definite.
        self._vert_ss = np.zeros([V, D, D], np.float32)
        # This is arbitrary (not necessarily symmetric).
        self._edge_ss = np.zeros([E, D, D], np.float32)
        # This represents (count, mean, covariance).
        self._feat_ss = np.zeros([V, D, 1 + 1 + D], np.float32) 
Example 33
Project: treecat   Author: posterior   File: training.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def train(self):
        """Train a TreeGauss model using subsample-annealed MCMC.

        Returns:
            A trained model as a dictionary with keys:
                config: A global config dict.
                tree: A TreeStructure instance with the learned latent
                    structure.
                edge_logits: A [K]-shaped array of all edge logits.
                suffstats: Sufficient statistics of features and vertices.
                latent: An [N, V, M]-shaped numpy array of latent states, where
                    N is the number of data rows, V is the number of features,
                    and M is the dimension of each latent variable.
        """
        model = TreeTrainer.train(self)
        model['latent'] = self._latent
        model['suffstats'] = {
            'vert_ss': self._vert_ss,
            'edge_ss': self._edge_ss,
            'feat_ss': self._feat_ss,
        }
        return model 
Example 34
Project: treecat   Author: posterior   File: training.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def train_ensemble(table, tree_prior, config):
    """Train a TreeCat ensemble model using subsample-annealed MCMC.

    The ensemble size is controlled by config['model_ensemble_size'].
    Let N be the number of data rows and V be the number of features.

    Args:
        table: A Table instance holding N rows of V features of data.
        tree_prior: A [K]-shaped numpy array of prior edge log odds, where
            K is the number of edges in the complete graph on V vertices.
        config: A global config dict.

    Returns:
        A trained model as a dictionary with keys:
            tree: A TreeStructure instance with the learned latent structure.
            suffstats: Sufficient statistics of features, vertices, and edges.
            assignments: An [N, V] numpy array of latent cluster ids for each
                cell in the dataset.
    """
    tasks = []
    for sub_seed in range(config['model_ensemble_size']):
        sub_config = config.copy()
        sub_config['seed'] += sub_seed
        tasks.append((table, tree_prior, sub_config))
    return parallel_map(_train_model, tasks) 
Example 35
Project: treecat   Author: posterior   File: serving_test.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_server_logprob_normalized(N, V, C, M):
    model = generate_fake_model(N, V, C, M)
    config = TINY_CONFIG.copy()
    config['model_num_clusters'] = M
    model['config'] = config
    server = TreeCatServer(model)

    # The total probability of all categorical rows should be 1.
    ragged_index = model['suffstats']['ragged_index']
    factors = []
    for v in range(V):
        C = ragged_index[v + 1] - ragged_index[v]
        factors.append([one_hot(c, C) for c in range(C)])
    data = np.array(
        [np.concatenate(columns) for columns in itertools.product(*factors)],
        dtype=np.int8)
    logprobs = server.logprob(data)
    logtotal = np.logaddexp.reduce(logprobs)
    assert logtotal == pytest.approx(0.0, abs=1e-5) 
Example 36
Project: treecat   Author: posterior   File: serving.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def observed_perplexity(self, counts):
        """Compute perplexity = exp(entropy) of observed variables.

        Perplexity is an information theoretic measure of the number of
        clusters or latent classes. Perplexity is a real number in the range
        [1, M], where M is model_num_clusters.

        Args:
            counts: A [V]-shaped array of multinomial counts.

        Returns:
            A [V]-shaped numpy array of perplexity.
        """
        V, E, M, R = self._VEMR
        if counts is not None:
            counts = np.ones(V, dtype=np.int8)
        assert counts.shape == (V, )
        assert counts.dtype == np.int8
        assert np.all(counts > 0)
        observed_entropy = np.empty(V, dtype=np.float32)
        for v in range(V):
            beg, end = self._ragged_index[v:v + 2]
            probs = np.dot(self._feat_cond[beg:end, :], self._vert_probs[v, :])
            observed_entropy[v] = multinomial_entropy(probs, counts[v])
        return np.exp(observed_entropy) 
Example 37
Project: treecat   Author: posterior   File: serving.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def observed_perplexity(self, counts):
        """Compute perplexity = exp(entropy) of observed variables.

        Perplexity is an information theoretic measure of the number of
        clusters or observed classes. Perplexity is a real number in the range
        [1, dim[v]], where dim[v] is the number of categories in an observed
        categorical variable or 2 for an ordinal variable.

        Args:
            counts: A [V]-shaped array of multinomial counts.

        Returns:
            A [V]-shaped numpy array of perplexity.
        """
        result = self._ensemble[0].observed_perplexity(counts)
        for server in self._ensemble[1:]:
            result += server.observed_perplexity(counts)
        result /= len(self._ensemble)
        return result 
Example 38
Project: treecat   Author: posterior   File: serving.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def latent_correlation(self):
        """Compute correlation matrix among latent features.

        This computes the generalization of Pearson's correlation to discrete
        data. Let I(X;Y) be the mutual information. Then define correlation as

          rho(X,Y) = sqrt(1 - exp(-2 I(X;Y)))

        Returns:
            A [V, V]-shaped numpy array of feature-feature correlations.
        """
        result = self._ensemble[0].latent_correlation()
        for server in self._ensemble[1:]:
            result += server.latent_correlation()
        result /= len(self._ensemble)
        return result 
Example 39
Project: treecat   Author: posterior   File: serving.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def logprob(self, rows, evidence=None):
        """Compute non-normalized log probabilies of many rows of data.

        If evidence is specified, compute conditional log probability;
        otherwise compute unconditional log probability.

        Args:
            data: A list of rows of data, where each row is a sparse dict
                mapping feature name to feature value.
            evidence: An optional row of conditioning data, as a sparse dict
                mapping feature name to feature value.

        Returns:
            An [len(rows)]-shaped numpy array of log probabilities.
        """
        data = import_rows(self._schema, rows)
        if evidence is None:
            return self._server.logprob(data)
        else:
            ragged_evidence = import_rows(self._schema, [evidence])
            return (self._server.logprob(data + ragged_evidence) -
                    self._server.logprob(data + evidence)) 
Example 40
Project: treecat   Author: posterior   File: serving.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def sample(self, N, evidence=None):
        """Draw N samples from the posterior distribution.

        Args:
            N: The number of samples to draw.
            evidence: An optional single row of conditioning data, as a sparse
                dict mapping feature name to feature value.

        Returns:
            An [N, R]-shaped numpy array of sampled multinomial data.
        """
        if evidence is None:
            data = None
        else:
            data = import_rows(self._schema, [evidence])[0]
        ragged_samples = self._server.sample(N, self._counts, data)
        return export_rows(self._schema, ragged_samples) 
Example 41
Project: gcForest   Author: pylablanche   File: GCForest.py    (license) View Source Project 6 votes vote down vote up
def fit(self, X, y):
        """ Training the gcForest on input data X and associated target y.

        :param X: np.array
            Array containing the input samples.
            Must be of shape [n_samples, data] where data is a 1D array.

        :param y: np.array
            1D array containing the target values.
            Must be of shape [n_samples]
        """
        if np.shape(X)[0] != len(y):
            raise ValueError('Sizes of y and X do not match.')

        mgs_X = self.mg_scanning(X, y)
        _ = self.cascade_forest(mgs_X, y) 
Example 42
Project: RasterFairy   Author: Quasimondo   File: images2gif.py    (BSD 3-Clause "New" or "Revised" License) View Source Project 6 votes vote down vote up
def contest(self, b, g, r):
        """ Search for biased BGR values
                Finds closest neuron (min dist) and updates self.freq
                finds best neuron (min dist-self.bias) and returns position
                for frequently chosen neurons, self.freq[i] is high and self.bias[i] is negative
                self.bias[i] = self.GAMMA*((1/self.NETSIZE)-self.freq[i])"""
        i, j = self.SPECIALS, self.NETSIZE
        dists = abs(self.network[i:j] - np.array([b,g,r])).sum(1)
        bestpos = i + np.argmin(dists)
        biasdists = dists - self.bias[i:j]
        bestbiaspos = i + np.argmin(biasdists)
        self.freq[i:j] *= (1-self.BETA)
        self.bias[i:j] += self.BETAGAMMA * self.freq[i:j]
        self.freq[bestpos] += self.BETA
        self.bias[bestpos] -= self.BETAGAMMA
        return bestbiaspos 
Example 43
Project: RasterFairy   Author: Quasimondo   File: rasterfairy.py    (BSD 3-Clause "New" or "Revised" License) View Source Project 6 votes vote down vote up
def rasterMaskToGrid( rasterMask ):
    grid = []
    mask = rasterMask['mask']
    for y in range(rasterMask['height']):
        for x in range(rasterMask['width']):
            if mask[y,x]==0:
                grid.append([x,y])
    
    grid = np.array(grid,dtype=np.float)
    if not (rasterMask is None) and rasterMask['hex'] is True:
        f = math.sqrt(3.0)/2.0 
        offset = -0.5
        if np.argmin(rasterMask['mask'][0]) > np.argmin(rasterMask['mask'][1]):
            offset = 0.5
        for i in range(len(grid)):
            if (grid[i][1]%2.0==0.0):
                grid[i][0]-=offset
            grid[i][1] *= f
    return grid 
Example 44
Project: pyTBA   Author: Thing342   File: stats.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def match_matrix(event: Event):
    """Returns a numpy participation matrix for the qualification matches in this event, used for calculating OPR.

        Each row in the matrix corresponds to a single alliance in a match, meaning that there will be two rows (one for
    red, one for blue) per match. Each column represents a single team, ordered by team number. If a team participated
    on a certain alliance, the value at that row and column would be 1, otherwise, it would be 0. For example, an
    event with teams 1-7 that featured a match that pitted teams 1, 3, and 5 against 2, 4, and 6 would have a match
    matrix that looks like this (sans labels):

                                #1  #2  #3  #4  #5  #6  #7
                    qm1_red     1   0   1   0   1   0   0
                    qm1_blue    0   1   0   1   0   1   0
    """
    match_list = []
    for match in filter(lambda match: match['comp_level'] == 'qm', event.matches):
        matchRow = []
        for team in event.teams:
            matchRow.append(1 if team['key'] in match['alliances']['red']['teams'] else 0)
        match_list.append(matchRow)
        matchRow = []
        for team in event.teams:
            matchRow.append(1 if team['key'] in match['alliances']['blue']['teams'] else 0)
        match_list.append(matchRow)

    mat = numpy.array(match_list)
    sum_matches = numpy.sum(mat, axis=0)
    avg_team_matches = sum(sum_matches) / float(len(sum_matches))
    return mat[:, numpy.apply_along_axis(numpy.count_nonzero, 0, mat) > avg_team_matches - 2] 
Example 45
Project: Cat-Segmentation   Author: ardamavi   File: get_dataset.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def get_img(data_path):
    # Getting image array from path:
    img = imread(data_path)
    img = imresize(img, (64, 64))
    return img 
Example 46
Project: Cat-Segmentation   Author: ardamavi   File: get_dataset.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def get_dataset(dataset_path='Data/Train_Data'):
    # Getting all data from data path:
    try:
        X = np.load('Data/npy_train_data/X.npy')
        Y = np.load('Data/npy_train_data/Y.npy')
    except:
        inputs_path = dataset_path+'/input'
        images = listdir(inputs_path) # Geting images
        X = []
        Y = []
        for img in images:
            img_path = inputs_path+'/'+img

            x_img = get_img(img_path).astype('float32').reshape(64, 64, 3)
            x_img /= 255.

            y_img = get_img(img_path.replace('input/', 'mask/mask_')).astype('float32').reshape(64, 64, 1)
            y_img /= 255.

            X.append(x_img)
            Y.append(y_img)
        X = np.array(X)
        Y = np.array(Y)
        # Create dateset:
        if not os.path.exists('Data/npy_train_data/'):
            os.makedirs('Data/npy_train_data/')
        np.save('Data/npy_train_data/X.npy', X)
        np.save('Data/npy_train_data/Y.npy', Y)
    X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42)
    return X, X_test, Y, Y_test 
Example 47
Project: pyku   Author: dubvulture   File: test_groundtruth.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def read_groundtruth():
    ret = []
    with open(
            os.path.join(
                os.path.abspath(os.path.dirname(__file__)),
                'groundtruth.txt'), 'rb') as lines:
        for line in lines:
            ret.append(line[:-2])

    return np.array(ret) 
Example 48
Project: pyku   Author: dubvulture   File: sudoku_steps.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def extract_digits(self, image):
        """
        Extract digits from a binary image representing a sudoku
        :param image: binary image/sudoku
        :return: array of digits and their probabilities
        """
        prob = np.zeros(4, dtype=np.float32)
        digits = np.zeros((4, 9, 9), dtype=object)
        for i in range(4):
            labeled, features = label(image, structure=CROSS)
            objs = find_objects(labeled)
            for obj in objs:
                roi = image[obj]
                # center of bounding box
                cy = (obj[0].stop + obj[0].start) / 2
                cx = (obj[1].stop + obj[1].start) / 2
                dists = cdist([[cy, cx]], CENTROIDS, 'euclidean')
                pos = np.argmin(dists)
                cy, cx = pos % 9, pos / 9
                # 28x28 image, center relative to sudoku
                prediction = self.classifier.classify(morph(roi))
                if digits[i, cy, cx] is 0:
                    # Newly found digit
                    digits[i, cy, cx] = prediction
                    prob[i] += prediction[0, 0]
                elif prediction[0, 0] > digits[i, cy, cx][0, 0]:
                    # Overlapping! (noise), choose the most probable prediction
                    prob[i] -= digits[i, cy, cx][0, 0]
                    digits[i, cy, cx] = prediction
                    prob[i] += prediction[0, 0]
            image = np.rot90(image)
        logging.info(prob)
        return digits[np.argmax(prob)] 
Example 49
Project: pyku   Author: dubvulture   File: utils.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def diagonal(_, pos):
    """
    Given an object pixels' positions, return the diagonal length of its
    bound box
    :param _: pixel values (unused)
    :param pos: pixel position (1-D)
    :return: diagonal of bounding box
    """
    xs = np.array([i / SSIZE for i in pos])
    ys = np.array([i % SSIZE for i in pos])
    minx = np.amin(xs)
    miny = np.amin(ys)
    maxx = np.amax(xs)
    maxy = np.amax(ys)
    return compute_line(np.array([minx, miny]), np.array([maxx, maxy])) 
Example 50
Project: pyku   Author: dubvulture   File: utils.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def compute_line(pt0, pt1):
    """
    Given 2 points, compute their distance
    :type pt0: numpy.array
    :type pt1: numpy.array
    :return: distance
    """
    return np.linalg.norm(pt0 - pt1)