Python numpy.sum() Examples

The following are 50 code examples for showing how to use numpy.sum(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don't like. You can also save this page to your account.

Example 1
Project: pylspm   Author: lseman   File: adequacy.py    (MIT License) View Source Project 7 votes vote down vote up
def KMO(data):

    cor_ = pd.DataFrame.corr(data)
    invCor = np.linalg.inv(cor_)
    rows = cor_.shape[0]
    cols = cor_.shape[1]
    A = np.ones((rows, cols))

    for i in range(rows):
        for j in range(i, cols):
            A[i, j] = - (invCor[i, j]) / (np.sqrt(invCor[i, i] * invCor[j, j]))
            A[j, i] = A[i, j]

    num = np.sum(np.sum((cor_)**2)) - np.sum(np.sum(np.diag(cor_**2)))
    den = num + (np.sum(np.sum(A**2)) - np.sum(np.sum(np.diag(A**2))))
    kmo = num / den

    return kmo 
Example 2
Project: pyku   Author: dubvulture   File: sudoku_steps.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def remove_artifacts(self, image):
        """
        Remove the connected components that are not within the parameters
        Operates in place
        :param image: sudoku's thresholded image w/o grid
        :return: None
        """
        labeled, features = label(image, structure=CROSS)
        lbls = np.arange(1, features + 1)
        areas = extract_feature(image, labeled, lbls, np.sum,
                                np.uint32, 0)
        sides = extract_feature(image, labeled, lbls, min_side,
                                np.float32, 0, True)
        diags = extract_feature(image, labeled, lbls, diagonal,
                                np.float32, 0, True)

        for index in lbls:
            area = areas[index - 1] / 255
            side = sides[index - 1]
            diag = diags[index - 1]
            if side < 5 or side > 20 \
                    or diag < 15 or diag > 25 \
                    or area < 40:
                image[labeled == index] = 0
        return None 
Example 3
Project: pyku   Author: dubvulture   File: sudoku.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def remove_artifacts(self, image):
        """
        Remove the connected components that are not within the parameters
        Operates in place
        :param image: sudoku's thresholded image w/o grid
        :return: None
        """
        labeled, features = label(image, structure=CROSS)
        lbls = np.arange(1, features + 1)
        areas = extract_feature(image, labeled, lbls, np.sum,
                                np.uint32, 0)
        sides = extract_feature(image, labeled, lbls, min_side,
                                np.float32, 0, True)
        diags = extract_feature(image, labeled, lbls, diagonal,
                                np.float32, 0, True)

        for index in lbls:
            area = areas[index - 1] / 255
            side = sides[index - 1]
            diag = diags[index - 1]
            if side < 5 or side > 20 \
                    or diag < 15 or diag > 25 \
                    or area < 40:
                image[labeled == index] = 0
        return None 
Example 4
Project: namegenderclassifier   Author: joaoalvarenga   File: genderclassifier.py    (MIT License) View Source Project 6 votes vote down vote up
def evaluate(self, dataset):
        predictions = self.predict(dataset[:,0])
        confusion_matrix = sklearn_confusion_matrix(dataset[:,1], predictions, labels=self.__classes)

        precisions = []
        recalls = []
        accuracies = []

        for gender in self.__classes:
            idx = self.__classes_indexes[gender]
            precision = 1
            recall = 1
            if np.sum(confusion_matrix[idx,:]) > 0:
                precision = confusion_matrix[idx][idx]/np.sum(confusion_matrix[idx,:])
            if np.sum(confusion_matrix[:, idx]) > 0:
                recall = confusion_matrix[idx][idx]/np.sum(confusion_matrix[:, idx])
            precisions.append(precision)
            recalls.append(recall)

        precision = np.mean(precisions)
        recall = np.mean(recalls)
        f1 = (2*(precision*recall))/float(precision+recall)
        accuracy = np.sum(confusion_matrix.diagonal())/float(np.sum(confusion_matrix))

        return precision, recall, accuracy, f1 
Example 5
Project: pylspm   Author: lseman   File: run_mpi.py    (MIT License) View Source Project 6 votes vote down vote up
def do_work_pso(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
    output = pd.DataFrame(population[item].position)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
#    print((1 / np.sum(f1)))
    return (1 / np.sum(f1)) 
Example 6
Project: pylspm   Author: lseman   File: run_mpi.py    (MIT License) View Source Project 6 votes vote down vote up
def do_work_ga(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
    output = pd.DataFrame(population[item].genes)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
    return (1 / np.sum(f1))

# Main 
Example 7
Project: pylspm   Author: lseman   File: pylspm.py    (MIT License) View Source Project 6 votes vote down vote up
def xloads(self):
        # Xloadings
        A = self.data_.transpose().values
        B = self.fscores.transpose().values
        A_mA = A - A.mean(1)[:, None]
        B_mB = B - B.mean(1)[:, None]

        ssA = (A_mA**2).sum(1)
        ssB = (B_mB**2).sum(1)

        xloads_ = (np.dot(A_mA, B_mB.T) /
                   np.sqrt(np.dot(ssA[:, None], ssB[None])))
        xloads = pd.DataFrame(
            xloads_, index=self.manifests, columns=self.latent)

        return xloads 
Example 8
Project: pylspm   Author: lseman   File: boot_mpi.py    (MIT License) View Source Project 6 votes vote down vote up
def do_work_pso(data, LVcsv, Mcsv, scheme, reg, h, maximo):
    output = pd.DataFrame(population[item].position)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
    print((1 / np.sum(f1)))
    return (1 / np.sum(f1)) 
Example 9
Project: pylspm   Author: lseman   File: boot.py    (MIT License) View Source Project 6 votes vote down vote up
def do_work_pso(self, item):
        output = pd.DataFrame(self.population[item].position)
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)
        print((1 / np.sum(f1)))
        return (1 / np.sum(f1)) 
Example 10
Project: pylspm   Author: lseman   File: boot.py    (MIT License) View Source Project 6 votes vote down vote up
def do_work_tabu(self, item):
        output = pd.DataFrame(self.population[item])
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)

        cost = (np.sum(f1))
        print(1 / cost)
        return [self.population[item], cost] 
Example 11
Project: pytorch_tutorial   Author: soravux   File: example4.py    (license) View Source Project 6 votes vote down vote up
def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        # A different (control flow based) way to control dropout
        if self.training:
            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        else:
            x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        if self.training:
            x = F.dropout(x, training=True)
        x = self.fc2(x)

        # Check for NaNs and infinites
        nans = np.sum(np.isnan(x.data.numpy()))
        infs = np.sum(np.isinf(x.data.numpy()))
        if nans > 0:
            print("There is {} NaN at the output layer".format(nans))
        if infs > 0:
            print("There is {} infinite values at the output layer".format(infs))

        return F.log_softmax(x) 
Example 12
Project: pytorch_tutorial   Author: soravux   File: example4.py    (license) View Source Project 6 votes vote down vote up
def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset))) 
Example 13
Project: spyking-circus   Author: spyking-circus   File: utils.py    (license) View Source Project 6 votes vote down vote up
def score_samples(self, X):
        """Return the log-likelihood of each sample
        See. "Pattern Recognition and Machine Learning"
        by C. Bishop, 12.2.1 p. 574
        or http://www.miketipping.com/papers/met-mppca.pdf
        Parameters
        ----------
        X: array, shape(n_samples, n_features)
            The data.
        Returns
        -------
        ll: array, shape (n_samples,)
            Log-likelihood of each sample under the current model
        """
        check_is_fitted(self, 'mean_')

        X = check_array(X)
        Xr = X - self.mean_
        n_features = X.shape[1]
        log_like = np.zeros(X.shape[0])
        precision = self.get_precision()
        log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
        log_like -= .5 * (n_features * log(2. * np.pi)
                          - fast_logdet(precision))
        return log_like 
Example 14
Project: youtube-8m   Author: wangheda   File: YM_labels_matrix.py    (license) View Source Project 6 votes vote down vote up
def main():
    files = tf.gfile.Glob(flags.FLAGS.src_path_1)
    labels_uni = np.zeros([4716,1])
    labels_matrix = np.zeros([4716,4716])
    for file in files:
        labels_all = get_video_input_feature(file)
        print(len(labels_all[0][2]),len(labels_all[0][3]),len(labels_all[0][4]),len(labels_all[0][5]))
        """
        for labels in labels_all:
            for i in range(len(labels)):
                labels_uni[labels[i]] += 1
                for j in range(len(labels)):
                    labels_matrix[labels[i],labels[j]] += 1
    labels_matrix = labels_matrix/labels_uni
    labels_matrix = labels_matrix/(np.sum(labels_matrix,axis=0)-1.0)
    for i in range(4716):
        labels_matrix[i,i] = 1.0
    np.savetxt('labels_uni.out', labels_uni, delimiter=',')
    np.savetxt('labels_matrix.out', labels_matrix, delimiter=',')""" 
Example 15
Project: youtube-8m   Author: wangheda   File: eval_util.py    (license) View Source Project 6 votes vote down vote up
def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n() 
Example 16
Project: youtube-8m   Author: wangheda   File: eval_util.py    (license) View Source Project 6 votes vote down vote up
def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n() 
Example 17
Project: youtube-8m   Author: wangheda   File: inference-sample-error-analysis.py    (license) View Source Project 6 votes vote down vote up
def format_lines(video_ids, predictions, labels, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    n_recall = max(int(numpy.sum(labels[video_index])), 1)
    # labels
    label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]
    label_predictions = [(class_index, predictions[video_index][class_index]) 
                           for class_index in label_indices]
    label_predictions = sorted(label_predictions, key=lambda p: -p[1])
    label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])
    # predictions
    top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    top_k_predictions = [(class_index, predictions[video_index][class_index])
                         for class_index in top_k_indices]
    top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])
    top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])
    # compute PERR
    top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]
    positives = [labels[video_index][class_index] 
                 for class_index in top_n_indices]
    perr = sum(positives) / float(n_recall)
    # URL
    url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8')
    yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n" 
Example 18
Project: youtube-8m   Author: wangheda   File: eval_util.py    (license) View Source Project 6 votes vote down vote up
def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n() 
Example 19
Project: MKLMM   Author: omerwe   File: kernels.py    (BSD 2-Clause "Simplified" License) View Source Project 6 votes vote down vote up
def getTrainKernel(self, params):
		self.checkParams(params)
		if (self.sameParams(params)): return self.cache['getTrainKernel']
		
		ell = np.exp(params[0])
		if (self.K_sq is None): K = sq_dist(self.X_scaled.T / ell)	#precompute squared distances
		else: K = self.K_sq / ell**2		
		self.cache['K_sq_scaled'] = K

		# # # #manual computation (just for sanity checks)
		# # # K1 = np.exp(-K / 2.0)
		# # # K2 = np.zeros((self.X_scaled.shape[0], self.X_scaled.shape[0]))
		# # # for i1 in xrange(self.X_scaled.shape[0]):
			# # # for i2 in xrange(i1, self.X_scaled.shape[0]):
				# # # diff = self.X_scaled[i1,:] - self.X_scaled[i2,:]
				# # # K2[i1, i2] = np.exp(-np.sum(diff**2) / (2*ell))
				# # # K2[i2, i1] = K2[i1, i2]				
		# # # print np.max((K1-K2)**2)
		# # # sys.exit(0)
		
		K_exp = np.exp(-K / 2.0)
		self.cache['getTrainKernel'] = K_exp
		self.saveParams(params)
		return K_exp 
Example 20
Project: MKLMM   Author: omerwe   File: kernels.py    (BSD 2-Clause "Simplified" License) View Source Project 6 votes vote down vote up
def getTrainTestKernel(self, params, Xtest):
		self.checkParams(params)
		ell2 = np.exp(2*params[0])
		
		z = Xtest / np.sqrt(Xtest.shape[1])
		S = 1 + self.X_scaled.dot(z.T)
		sz = 1 + np.sum(z**2, axis=1)
		sqrtEll2Psx = np.sqrt(ell2+self.sx)
		sqrtEll2Psz = np.sqrt(ell2+sz)
		K = S / np.outer(sqrtEll2Psx, sqrtEll2Psz)
		return np.arcsin(K) 
Example 21
Project: pyTBA   Author: Thing342   File: stats.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def match_matrix(event: Event):
    """Returns a numpy participation matrix for the qualification matches in this event, used for calculating OPR.

        Each row in the matrix corresponds to a single alliance in a match, meaning that there will be two rows (one for
    red, one for blue) per match. Each column represents a single team, ordered by team number. If a team participated
    on a certain alliance, the value at that row and column would be 1, otherwise, it would be 0. For example, an
    event with teams 1-7 that featured a match that pitted teams 1, 3, and 5 against 2, 4, and 6 would have a match
    matrix that looks like this (sans labels):

                                #1  #2  #3  #4  #5  #6  #7
                    qm1_red     1   0   1   0   1   0   0
                    qm1_blue    0   1   0   1   0   1   0
    """
    match_list = []
    for match in filter(lambda match: match['comp_level'] == 'qm', event.matches):
        matchRow = []
        for team in event.teams:
            matchRow.append(1 if team['key'] in match['alliances']['red']['teams'] else 0)
        match_list.append(matchRow)
        matchRow = []
        for team in event.teams:
            matchRow.append(1 if team['key'] in match['alliances']['blue']['teams'] else 0)
        match_list.append(matchRow)

    mat = numpy.array(match_list)
    sum_matches = numpy.sum(mat, axis=0)
    avg_team_matches = sum(sum_matches) / float(len(sum_matches))
    return mat[:, numpy.apply_along_axis(numpy.count_nonzero, 0, mat) > avg_team_matches - 2] 
Example 22
Project: pyku   Author: dubvulture   File: utils.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def compute_angle(pt0, pt1, pt2):
    """
    Given 3 points, compute the cosine of the angle from pt0
    :type pt0: numpy.array
    :type pt1: numpy.array
    :type pt2: numpy.array
    :return: cosine of angle
    """
    a = pt0 - pt1
    b = pt0 - pt2
    return (np.sum(a * b)) / (np.linalg.norm(a) * np.linalg.norm(b)) 
Example 23
Project: pyku   Author: dubvulture   File: digit_classifier.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def _zoning(image):
        """
        It works better with DSIZE = 28
        ~0.9967 precision and recall
        :param image:
        :return: #pixels/area ratio of each zone (7x7) as feature vector
        """
        zones = []
        for i in range(0, 28, 7):
            for j in range(0, 28, 7):
                roi = image[i:i+7, j:j+7]
                val = (np.sum(roi)/255) / 49.
                zones.append(val)
        return np.array(zones, np.float32) 
Example 24
Project: AutoML5   Author: djajetic   File: data_manager.py    (MIT License) View Source Project 5 votes vote down vote up
def getTypeProblem (self, solution_filename):
     		''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
		if 'task' not in self.info.keys():
			solution = np.array(data_converter.file_to_array(solution_filename))
			target_num = solution.shape[1]
			self.info['target_num']=target_num
			if target_num == 1: # if we have only one column
				solution = np.ravel(solution) # flatten
				nbr_unique_values = len(np.unique(solution))
				if nbr_unique_values < len(solution)/8:
					# Classification
					self.info['label_num'] = nbr_unique_values
					if nbr_unique_values == 2:
						self.info['task'] = 'binary.classification'
						self.info['target_type'] = 'Binary'
					else:
						self.info['task'] = 'multiclass.classification'
						self.info['target_type'] = 'Categorical'
				else:
					# Regression
					self.info['label_num'] = 0
					self.info['task'] = 'regression'
					self.info['target_type'] = 'Numerical'     
			else:
				# Multilabel or multiclass       
				self.info['label_num'] = target_num
				self.info['target_type'] = 'Binary' 
				if any(item > 1 for item in map(np.sum,solution.astype(int))):
					self.info['task'] = 'multilabel.classification'     
				else:
					self.info['task'] = 'multiclass.classification'        
		return self.info['task'] 
Example 25
Project: AutoML5   Author: djajetic   File: libscores.py    (MIT License) View Source Project 5 votes vote down vote up
def binarize_predictions(array, task='binary.classification'):
    ''' Turn predictions into decisions {0,1} by selecting the class with largest 
    score for multiclass problems and thresholding at 0.5 for other cases.'''
    # add a very small random value as tie breaker (a bit bad because this changes the score every time)
    # so to make sure we get the same result every time, we seed it    
    #eps = 1e-15
    #np.random.seed(sum(array.shape))
    #array = array + eps*np.random.rand(array.shape[0],array.shape[1])
    bin_array = np.zeros(array.shape)
    if (task != 'multiclass.classification') or (array.shape[1]==1): 
        bin_array[array>=0.5] = 1
    else:        
        sample_num=array.shape[0]
        for i in range(sample_num):
            j = np.argmax(array[i,:])
            bin_array[i,j] = 1        
    return bin_array 
Example 26
Project: AutoML5   Author: djajetic   File: libscores.py    (MIT License) View Source Project 5 votes vote down vote up
def acc_stat (solution, prediction):
	''' Return accuracy statistics TN, FP, TP, FN
     Assumes that solution and prediction are binary 0/1 vectors.'''
     # This uses floats so the results are floats
	TN = sum(np.multiply((1-solution), (1-prediction)))
	FN = sum(np.multiply(solution, (1-prediction)))
	TP = sum(np.multiply(solution, prediction))
	FP = sum(np.multiply((1-solution), prediction))
 	#print "TN =",TN
	#print "FP =",FP
	#print "TP =",TP
	#print "FN =",FN
	return (TN, FP, TP, FN) 
Example 27
Project: AutoML5   Author: djajetic   File: libscores.py    (MIT License) View Source Project 5 votes vote down vote up
def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score 
Example 28
Project: AutoML5   Author: djajetic   File: libscores.py    (MIT License) View Source Project 5 votes vote down vote up
def auc_metric(solution, prediction, task='binary.classification'):
    ''' Normarlized Area under ROC curve (AUC).
    Return Gini index = 2*AUC-1 for  binary classification problems.
    Should work for a vector of binary 0/1 (or -1/1)"solution" and any discriminant values
    for the predictions. If solution and prediction are not vectors, the AUC
    of the columns of the matrices are computed and averaged (with no weight).
    The same for all classification problems (in fact it treats well only the
    binary and multilabel classification problems).'''
    #auc = metrics.roc_auc_score(solution, prediction, average=None)
    # There is a bug in metrics.roc_auc_score: auc([1,0,0],[1e-10,0,0]) incorrect
    label_num=solution.shape[1]
    auc=np.empty(label_num)
    for k in range(label_num):
        r_ = tiedrank(prediction[:,k])
        s_ = solution[:,k]
        if sum(s_)==0: print('WARNING: no positive class example in class {}'.format(k+1))
        npos = sum(s_==1)
        nneg = sum(s_<1)
        auc[k] = (sum(r_[s_==1]) - npos*(npos+1)/2) / (nneg*npos)
    return 2*mvmean(auc)-1

    
### END CLASSIFICATION METRICS 
    
# ======= Specialized scores ========
# We run all of them for all tasks even though they don't make sense for some tasks 
Example 29
Project: AutoML5   Author: djajetic   File: libscores.py    (MIT License) View Source Project 5 votes vote down vote up
def prior_log_loss(frac_pos, task = 'binary.classification'):
    ''' Baseline log loss. For multiplr classes ot labels return the volues for each column'''
    eps = 1e-15   
    frac_pos_ = sp.maximum (eps, frac_pos)
    if (task != 'multiclass.classification'): # binary case
        frac_neg = 1-frac_pos
        frac_neg_ = sp.maximum (eps, frac_neg)
        pos_class_log_loss_ = - frac_pos * np.log(frac_pos_)
        neg_class_log_loss_ = - frac_neg * np.log(frac_neg_)
        base_log_loss = pos_class_log_loss_ + neg_class_log_loss_
        # base_log_loss = mvmean(base_log_loss)
        # print('binary {}'.format(base_log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else: # multiclass case
        fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case
        # Only ONE label is 1 in the multiclass case active for each line
        pos_class_log_loss_ = - frac_pos * np.log(fp)
        base_log_loss = np.sum(pos_class_log_loss_) 
    return base_log_loss
        
# sklearn implementations for comparison 
Example 30
Project: AutoML5   Author: djajetic   File: data_converter.py    (MIT License) View Source Project 5 votes vote down vote up
def num_lines (filename):
	''' Count the number of lines of file'''
	return sum(1 for line in open(filename)) 
Example 31
Project: AutoML5   Author: djajetic   File: data_converter.py    (MIT License) View Source Project 5 votes vote down vote up
def tp_filter(X, Y, feat_num=1000, verbose=True):
    ''' TP feature selection in the spirit of the winners of the KDD cup 2001
    Only for binary classification and sparse matrices'''
        
    if issparse(X) and len(Y.shape)==1  and len(set(Y))==2 and (sum(Y)/Y.shape[0])<0.1: 
        if verbose: print("========= Filtering features...")
        Posidx=Y>0
        #npos = sum(Posidx)
        #Negidx=Y<=0
        #nneg = sum(Negidx)
            
        nz=X.nonzero()
        mx=X[nz].max()
        if X[nz].min()==mx: # sparse binary
            if mx!=1: X[nz]=1
            tp=csr_matrix.sum(X[Posidx,:], axis=0)
            #fn=npos-tp
            #fp=csr_matrix.sum(X[Negidx,:], axis=0)
            #tn=nneg-fp
        else:
            tp=np.sum(X[Posidx,:]>0, axis=0)
            #tn=np.sum(X[Negidx,:]<=0, axis=0)
            #fn=np.sum(X[Posidx,:]<=0, axis=0)
            #fp=np.sum(X[Negidx,:]>0, axis=0)

        tp=np.ravel(tp)
        idx=sorted(range(len(tp)), key=tp.__getitem__, reverse=True)   
        return idx[0:feat_num]
    else:
        feat_num = X.shape[1]
        return range(feat_num) 
Example 32
Project: AutoML5   Author: djajetic   File: models.py    (MIT License) View Source Project 5 votes vote down vote up
def predict(self, X):
        prediction = self.predict_method(X)
        # Calibrate proba
        if self.task != 'regression' and self.postprocessor!=None:          
            prediction = self.postprocessor.predict_proba(prediction)
        # Keep only 2nd column because the second one is 1-first    
        if self.target_num==1 and len(prediction.shape)>1 and prediction.shape[1]>1:
            prediction = prediction[:,1]
        # Make sure the normalization is correct
        if self.task=='multiclass.classification':
            eps = 1e-15
            norma = np.sum(prediction, axis=1)
            for k in range(prediction.shape[0]):
                prediction[k,:] /= sp.maximum(norma[k], eps)  
        return prediction 
Example 33
Project: AutoML5   Author: djajetic   File: models.py    (MIT License) View Source Project 5 votes vote down vote up
def fit(self, X, Y):
        if len(Y.shape)==1: 
            Y = np.array([Y]).transpose() # Transform vector into column matrix
            # This is NOT what we want: Y = Y.reshape( -1, 1 ), because Y.shape[1] out of range
        self.n_target = Y.shape[1]                 # Num target values = num col of Y
        self.n_label = len(set(Y.ravel()))         # Num labels = num classes (categories of categorical var if n_target=1 or n_target if labels are binary )
        # Create the right number of copies of the predictor instance
        if len(self.predictors)!=self.n_target:
            predictorInstance = self.predictors[0]
            self.predictors = [predictorInstance]
            for i in range(1,self.n_target):
                self.predictors.append(copy.copy(predictorInstance))
        # Fit all predictors
        for i in range(self.n_target):
            # Update the number of desired prodictos
            if hasattr(self.predictors[i], 'n_estimators'):
                self.predictors[i].n_estimators=self.n_estimators
            # Subsample if desired
            if self.balance:
                pos = Y[:,i]>0
                neg = Y[:,i]<=0
                if sum(pos)<sum(neg): 
                    chosen = pos
                    not_chosen = neg
                else: 
                    chosen = neg
                    not_chosen = pos
                num = sum(chosen)
                idx=filter(lambda(x): x[1]==True, enumerate(not_chosen))
                idx=np.array(zip(*idx)[0])
                np.random.shuffle(idx)
                chosen[idx[0:min(num, len(idx))]]=True
                # Train with chosen samples            
                self.predictors[i].fit(X[chosen,:],Y[chosen,i])
            else:
                self.predictors[i].fit(X,Y[:,i])
        return 
Example 34
Project: lang-reps   Author: chaitanyamalaviya   File: sequence2sequence.py    (license) View Source Project 5 votes vote down vote up
def get_batch_loss(self, input_batch, output_batch):

        dynet.renew_cg()

        # Dimension: maxSentLength * minibatch_size
        wids = []
        wids_reversed = []

        # List of lists to store whether an input is
        # present(1)/absent(0) for an example at a time step
        # masks = [] # Dimension: maxSentLength * minibatch_size

        # tot_words = 0
        maxSentLength = max([len(sent) for sent in input_batch])
        for j in range(maxSentLength):
            wids.append([(self.src_vocab[sent[j]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])
            wids_reversed.append([(self.src_vocab[sent[len(sent)- j-1]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])
            # mask = [(1 if len(sent)>j else 0) for sent in input_batch]
            # masks.append(mask)
            #tot_words += sum(mask)

        embedded_batch = self.embed_batch_seq(wids)
        embedded_batch_reverse = self.embed_batch_seq(wids_reversed)
        encoded_batch = self.encode_batch_seq(embedded_batch, embedded_batch_reverse)

        # pass last hidden state of encoder to decoder
        return self.decode_batch(encoded_batch, output_batch) 
Example 35
Project: IntroToDeepLearning   Author: robb-brown   File: TensorFlowInterface.py    (MIT License) View Source Project 5 votes vote down vote up
def plotFields(layer,fieldShape=None,channel=None,figOffset=1,cmap=None,padding=0.01):
	# Receptive Fields Summary
	try:
		W = layer.W
	except:
		W = layer
	wp = W.eval().transpose();
	if len(np.shape(wp)) < 4:		# Fully connected layer, has no shape
		fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape)	
	else:			# Convolutional layer already has shape
		features, channels, iy, ix = np.shape(wp)
		if channel is not None:
			fields = wp[:,channel,:,:]
		else:
			fields = np.reshape(wp,[features*channels,iy,ix])

	perRow = int(math.floor(math.sqrt(fields.shape[0])))
	perColumn = int(math.ceil(fields.shape[0]/float(perRow)))

	fig = mpl.figure(figOffset); mpl.clf()
	
	# Using image grid
	from mpl_toolkits.axes_grid1 import ImageGrid
	grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')
	for i in range(0,np.shape(fields)[0]):
		im = grid[i].imshow(fields[i],cmap=cmap); 

	grid.cbar_axes[0].colorbar(im)
	mpl.title('%s Receptive Fields' % layer.name)
	
	# old way
	# fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])
	# tiled = []
	# for i in range(0,perColumn*perRow,perColumn):
	# 	tiled.append(np.hstack(fields2[i:i+perColumn]))
	# 
	# tiled = np.vstack(tiled)
	# mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();
	mpl.figure(figOffset+1); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar() 
Example 36
Project: IntroToDeepLearning   Author: robb-brown   File: TensorFlowInterface.py    (MIT License) View Source Project 5 votes vote down vote up
def plotFields(layer,fieldShape=None,channel=None,maxFields=25,figName='ReceptiveFields',cmap=None,padding=0.01):
	# Receptive Fields Summary
	W = layer.W
	wp = W.eval().transpose();
	if len(np.shape(wp)) < 4:		# Fully connected layer, has no shape
		fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape)
	else:			# Convolutional layer already has shape
		features, channels, iy, ix = np.shape(wp)
		if channel is not None:
			fields = wp[:,channel,:,:]
		else:
			fields = np.reshape(wp,[features*channels,iy,ix])

	fieldsN = min(fields.shape[0],maxFields)
	perRow = int(math.floor(math.sqrt(fieldsN)))
	perColumn = int(math.ceil(fieldsN/float(perRow)))

	fig = mpl.figure(figName); mpl.clf()

	# Using image grid
	from mpl_toolkits.axes_grid1 import ImageGrid
	grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')
	for i in range(0,fieldsN):
		im = grid[i].imshow(fields[i],cmap=cmap);

	grid.cbar_axes[0].colorbar(im)
	mpl.title('%s Receptive Fields' % layer.name)

	# old way
	# fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])
	# tiled = []
	# for i in range(0,perColumn*perRow,perColumn):
	# 	tiled.append(np.hstack(fields2[i:i+perColumn]))
	#
	# tiled = np.vstack(tiled)
	# mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();
	mpl.figure(figName+' Total'); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar() 
Example 37
Project: TDOSE   Author: kasperschmidt   File: tdose_utilities.py    (MIT License) View Source Project 5 votes vote down vote up
def analytic_convolution_gaussian(mu1,covar1,mu2,covar2):
    """
    The analytic vconvolution of two Gaussians is simply the sum of the two mean vectors
    and the two convariance matrixes

    --- INPUT ---
    mu1         The mean of the first gaussian
    covar1      The covariance matrix of of the first gaussian
    mu2         The mean of the second gaussian
    covar2      The covariance matrix of of the second gaussian

    """
    muconv    = mu1+mu2
    covarconv = covar1+covar2
    return muconv, covarconv

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
Example 38
Project: TDOSE   Author: kasperschmidt   File: tdose_utilities.py    (MIT License) View Source Project 5 votes vote down vote up
def reshape_array(array, newsize, pixcombine='sum'):
    """
    Reshape an array to a give size using either the sum, mean or median of the pixels binned

    Note that the old array dimensions have to be multiples of the new array dimensions

    --- INPUT ---
    array           Array to reshape (combine pixels)
    newsize         New size of array
    pixcombine      The method to combine the pixels with. Choices are sum, mean and median

    """
    sh = newsize[0],array.shape[0]//newsize[0],newsize[1],array.shape[1]//newsize[1]
    pdb.set_trace()
    if pixcombine == 'sum':
        reshapedarray = array.reshape(sh).sum(-1).sum(1)
    elif pixcombine == 'mean':
        reshapedarray = array.reshape(sh).mean(-1).mean(1)
    elif pixcombine == 'median':
        reshapedarray = array.reshape(sh).median(-1).median(1)

    return reshapedarray
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
Example 39
Project: pyballd   Author: Yurlungur   File: domain.py    (GNU Lesser General Public License v3.0) View Source Project 5 votes vote down vote up
def inner_product_to_infty(self,gf1,gf2):
        "Inner product on non-compact domain"
        factors = [s.get_scale_factor() for s in self.stencils]
        factor = np.prod(factors)
        integrand = (factor*gf1*self.weights2D*gf2*self.dRdX)
        integrand[-1] = 0
        integral = np.sum(integrand)
        return integral 
Example 40
Project: pyballd   Author: Yurlungur   File: orthopoly.py    (GNU Lesser General Public License v3.0) View Source Project 5 votes vote down vote up
def get_integration_weights(order,nodes=None):
    """
    Returns the integration weights for Gauss-Lobatto quadrature
    as a function of the order of the polynomial we want to
    represent.
    See: https://en.wikipedia.org/wiki/Gaussian_quadrature
    See: arXive:gr-qc/0609020v1
    """
    if np.all(nodes == False):
        nodes=get_quadrature_points(order)
    if poly == polynomial.chebyshev.Chebyshev:
        weights = np.empty((order+1))
        weights[1:-1] = np.pi/order
        weights[0] = np.pi/(2*order)
        weights[-1] = weights[0]
        return weights
    elif poly == polynomial.legendre.Legendre:
        interior_weights = 2/((order+1)*order*poly.basis(order)(nodes[1:-1])**2)
        boundary_weights = np.array([1-0.5*np.sum(interior_weights)])
        weights = np.concatenate((boundary_weights,
                                  interior_weights,
                                  boundary_weights))
        return weights
    else:
        raise ValueError("Not a known polynomial type.")
        return False 
Example 41
Project: pyballd   Author: Yurlungur   File: orthopoly.py    (GNU Lesser General Public License v3.0) View Source Project 5 votes vote down vote up
def inner_product(self,gf1,gf2):
        """Calculates the 2D inner product between grid functions
        gf1 and gf2 using the appropriate quadrature rule
        """
        factors = [s.get_scale_factor() for s in self.stencils]
        factor = np.prod(factors)
        integrand = gf1*self.weights2D*gf2
        integral_unit_cell = np.sum(integrand)
        integral_physical = integral_unit_cell*factor
        return integral_physical 
Example 42
Project: mpiFFT4py   Author: spectralDNS   File: spectral_dns_solver.py    (GNU Lesser General Public License v3.0) View Source Project 5 votes vote down vote up
def compute_rhs(rhs):
    U_dealiased = work[((3,) + FFT.work_shape(dealias), float, 0)]
    curl_dealiased = work[((3,) + FFT.work_shape(dealias), float, 1)]
    for i in range(3):
        U_dealiased[i] = FFT.ifftn(U_hat[i], U_dealiased[i], dealias)

    curl_dealiased = curl(U_hat, curl_dealiased)
    rhs = cross(U_dealiased, curl_dealiased, rhs)
    P_hat[:] = sum(rhs*K_over_K2, 0, out=P_hat)
    rhs -= P_hat*K
    rhs -= nu*K2*U_hat
    return rhs

# Initialize a Taylor Green vortex 
Example 43
Project: pylspm   Author: lseman   File: pylspm.py    (MIT License) View Source Project 5 votes vote down vote up
def gof(self):
        r2mean = np.mean(self.r2.T[self.endoexo()[0]].values)
        AVEmean = self.AVE().copy()

        totalblock = 0
        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            block = len(block.columns.values)
            totalblock += block
            AVEmean[self.latent[i]] = AVEmean[self.latent[i]] * block

        AVEmean = np.sum(AVEmean) / totalblock
        return np.sqrt(AVEmean * r2mean) 
Example 44
Project: pylspm   Author: lseman   File: pylspm.py    (MIT License) View Source Project 5 votes vote down vote up
def cr(self):
        # Composite Reliability
        composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            p = len(block.columns)

            if(p != 1):
                cor_mat = np.cov(block.T)
                evals, evecs = np.linalg.eig(cor_mat)
                U, S, V = np.linalg.svd(cor_mat, full_matrices=False)

                indices = np.argsort(evals)
                indices = indices[::-1]
                evecs = evecs[:, indices]
                evals = evals[indices]

                loadings = V[0, :] * np.sqrt(evals[0])

                numerador = np.sum(abs(loadings))**2
                denominador = numerador + (p - np.sum(loadings ** 2))
                cr = numerador / denominador
                composite[self.latent[i]] = cr

            else:
                composite[self.latent[i]] = 1

        composite = composite.T
        return(composite) 
Example 45
Project: pylspm   Author: lseman   File: pylspm.py    (MIT License) View Source Project 5 votes vote down vote up
def r2adjusted(self):
        n = len(self.data_)
        r2 = self.r2.values

        r2adjusted = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            p = sum(self.LVariables['target'] == self.latent[i])
            r2adjusted[self.latent[i]] = r2[i] - \
                (p * (1 - r2[i])) / (n - p - 1)

        return r2adjusted.T 
Example 46
Project: pylspm   Author: lseman   File: pylspm.py    (MIT License) View Source Project 5 votes vote down vote up
def AVE(self):
        # AVE
        return self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum()) 
Example 47
Project: pylspm   Author: lseman   File: pylspm.py    (MIT License) View Source Project 5 votes vote down vote up
def fornell(self):
        cor_ = pd.DataFrame.corr(self.fscores)**2
        AVE = self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())
        for i in range(len(cor_)):
            cor_.ix[i, i] = AVE[i]

        return(cor_) 
Example 48
Project: pylspm   Author: lseman   File: gac.py    (MIT License) View Source Project 5 votes vote down vote up
def fitness(self, data_, n_clusters, lvmodel, mvmodel, scheme, regression):

        output = pd.DataFrame(self.genes)
        output.columns = ['Split']
        dataSplit = pd.concat([data_, output], axis=1)
        f1 = []
        results = []
        for i in range(n_clusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, lvmodel, mvmodel, scheme,
                                      regression, 0, 50, HOC='true'))

                sumOuterResid = pd.DataFrame.sum(
                    pd.DataFrame.sum(results[i].residuals()[1]**2))
                sumInnerResid = pd.DataFrame.sum(
                    pd.DataFrame.sum(results[i].residuals()[2]**2))
                f1.append(sumOuterResid + sumInnerResid)
            except:
                f1.append(10000)

        print((1 / np.sum(f1)))
        return (1 / np.sum(f1)) 
Example 49
Project: pylspm   Author: lseman   File: gac.py    (MIT License) View Source Project 5 votes vote down vote up
def roulettewheel(pop, fit):
    fit = fit - min(fit)
    sumf = sum(fit)
    if(sumf == 0):
        return pop[0]
    prob = [(item + sum(fit[:index])) / sumf for index, item in enumerate(fit)]
    prob_ = uniform(0, 1)
#    print(prob)
    individuo = (int(BinSearch(prob, prob_, 0, len(prob) - 1)))
    return pop[individuo] 
Example 50
Project: GYM_DRL   Author: Kyushik   File: CartPole_DQN.py    (license) View Source Project 5 votes vote down vote up
def xavier_initializer(shape):
	dim_sum = np.sum(shape)
	if len(shape) == 1:
		dim_sum += 1
	bound = np.sqrt(2.0 / dim_sum)
	return tf.random_uniform(shape, minval=-bound, maxval=bound)

# Assigning network variables to target network variables