Python scipy.maximum() Examples

The following are 30 code examples of scipy.maximum(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy , or try the search function .
Example #1
Source File: libscores.py    From AutoML with MIT License 6 votes vote down vote up
def bac_metric (solution, prediction, task='binary.classification'):
    ''' Compute the normalized balanced accuracy. The binarization and 
    the normalization differ for the multi-label and multi-class case. '''
    label_num = solution.shape[1]
    score = np.zeros(label_num)
    bin_prediction = binarize_predictions(prediction, task)
    [tn,fp,tp,fn] = acc_stat(solution, bin_prediction)
    # Bounding to avoid division by 0
    eps = 1e-15
    tp = sp.maximum (eps, tp)
    pos_num = sp.maximum (eps, tp+fn)
    tpr = tp / pos_num # true positive rate (sensitivity)
    if (task != 'multiclass.classification') or (label_num==1):
        tn = sp.maximum (eps, tn)
        neg_num = sp.maximum (eps, tn+fp)
        tnr = tn / neg_num # true negative rate (specificity)
        bac = 0.5*(tpr + tnr)
        base_bac = 0.5     # random predictions for binary case
    else: 
        bac = tpr
        base_bac = 1./label_num # random predictions for multiclass case
    bac = mvmean(bac)     # average over all classes
    # Normalize: 0 for random, 1 for perfect
    score = (bac - base_bac) / sp.maximum(eps, (1 - base_bac))
    return score 
Example #2
Source File: libscores.py    From automl_gpu with MIT License 6 votes vote down vote up
def bac_metric (solution, prediction, task='binary.classification'):
    ''' Compute the normalized balanced accuracy. The binarization and 
    the normalization differ for the multi-label and multi-class case. '''
    label_num = solution.shape[1]
    score = np.zeros(label_num)
    bin_prediction = binarize_predictions(prediction, task)
    [tn,fp,tp,fn] = acc_stat(solution, bin_prediction)
    # Bounding to avoid division by 0
    eps = 1e-15
    tp = sp.maximum (eps, tp)
    pos_num = sp.maximum (eps, tp+fn)
    tpr = tp / pos_num # true positive rate (sensitivity)
    if (task != 'multiclass.classification') or (label_num==1):
        tn = sp.maximum (eps, tn)
        neg_num = sp.maximum (eps, tn+fp)
        tnr = tn / neg_num # true negative rate (specificity)
        bac = 0.5*(tpr + tnr)
        base_bac = 0.5     # random predictions for binary case
    else: 
        bac = tpr
        base_bac = 1./label_num # random predictions for multiclass case
    bac = mvmean(bac)     # average over all classes
    # Normalize: 0 for random, 1 for perfect
    score = (bac - base_bac) / sp.maximum(eps, (1 - base_bac))
    return score 
Example #3
Source File: math_util.py    From smappPy with GNU General Public License v2.0 6 votes vote down vote up
def log_loss(actual, predicted, epsilon=1e-15):
    """
    Calculates and returns the log loss (error) of a set of predicted probabilities
    (hint: see sklearn classifier's predict_proba methods).

    Source: https://www.kaggle.com/wiki/LogarithmicLoss
    
    In plain English, this error metric is typically used where you have to predict 
    that something is true or false with a probability (likelihood) ranging from 
    definitely true (1) to equally true (0.5) to definitely false(0).

    Note: also see (and use) scikitlearn: 
    http://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html#sklearn.metrics.log_loss
    """
    predicted = sp.maximum(epsilon, predicted)
    predicted = sp.minimum(1-epsilon, predicted)
    ll = sum(actual*sp.log(predicted) + sp.subtract(1,actual)*sp.log(sp.subtract(1,predicted)))
    ll = ll * -1.0/len(actual)
    return ll 
Example #4
Source File: optics.py    From REDPy with GNU General Public License v3.0 6 votes vote down vote up
def expandClusterOrder(SetOfObjects, point, epsilon):

    """
    Expands OPTICS ordered list of clustering structure
    
    SetofObjects: Instantiated and prepped instance of 'setOfObjects' class
    epsilon: Determines maximum object size that can be extracted. Smaller epsilons
        reduce run time.

    """
    
    if SetOfObjects._core_dist[point] <= epsilon:
        while not SetOfObjects._processed[point]:
            SetOfObjects._processed[point] = True
            SetOfObjects._ordered_list.append(point)
            point = set_reach_dist(SetOfObjects, point, epsilon)
    else:
        SetOfObjects._processed[point] = True 
Example #5
Source File: libscores.py    From automl_gpu with MIT License 6 votes vote down vote up
def prior_log_loss(frac_pos, task = 'binary.classification'):
    ''' Baseline log loss. For multiplr classes ot labels return the volues for each column'''
    eps = 1e-15   
    frac_pos_ = sp.maximum (eps, frac_pos)
    if (task != 'multiclass.classification'): # binary case
        frac_neg = 1-frac_pos
        frac_neg_ = sp.maximum (eps, frac_neg)
        pos_class_log_loss_ = - frac_pos * np.log(frac_pos_)
        neg_class_log_loss_ = - frac_neg * np.log(frac_neg_)
        base_log_loss = pos_class_log_loss_ + neg_class_log_loss_
        # base_log_loss = mvmean(base_log_loss)
        # print('binary {}'.format(base_log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else: # multiclass case
        fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case
        # Only ONE label is 1 in the multiclass case active for each line
        pos_class_log_loss_ = - frac_pos * np.log(fp)
        base_log_loss = np.sum(pos_class_log_loss_) 
    return base_log_loss
        
# sklearn implementations for comparison 
Example #6
Source File: optics.py    From REDPy with GNU General Public License v3.0 6 votes vote down vote up
def prep_optics(SetofObjects, epsilon):

    """
    Prep data set for main OPTICS loop
    
    SetofObjects: Instantiated instance of 'setOfObjects' class
    epsilon: Determines maximum object size that can be extracted. Smaller epsilons
        reduce run time.
    
    Returns modified setOfObjects tree structure
    
    """

    for j in SetofObjects._index:
        # Find smallest nonzero distance
        SetofObjects._core_dist[j] = np.sort(SetofObjects.data[j,:])[1] 
Example #7
Source File: libscores.py    From automl-phase-2 with MIT License 6 votes vote down vote up
def prior_log_loss(frac_pos, task = 'binary.classification'):
    ''' Baseline log loss. For multiplr classes ot labels return the volues for each column'''
    eps = 1e-15   
    frac_pos_ = sp.maximum (eps, frac_pos)
    if (task != 'multiclass.classification'): # binary case
        frac_neg = 1-frac_pos
        frac_neg_ = sp.maximum (eps, frac_neg)
        pos_class_log_loss_ = - frac_pos * np.log(frac_pos_)
        neg_class_log_loss_ = - frac_neg * np.log(frac_neg_)
        base_log_loss = pos_class_log_loss_ + neg_class_log_loss_
        # base_log_loss = mvmean(base_log_loss)
        # print('binary {}'.format(base_log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else: # multiclass case
        fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case
        # Only ONE label is 1 in the multiclass case active for each line
        pos_class_log_loss_ = - frac_pos * np.log(fp)
        base_log_loss = np.sum(pos_class_log_loss_) 
    return base_log_loss
        
# sklearn implementations for comparison 
Example #8
Source File: libscores.py    From AutoML with MIT License 6 votes vote down vote up
def prior_log_loss(frac_pos, task = 'binary.classification'):
    ''' Baseline log loss. For multiplr classes ot labels return the volues for each column'''
    eps = 1e-15   
    frac_pos_ = sp.maximum (eps, frac_pos)
    if (task != 'multiclass.classification'): # binary case
        frac_neg = 1-frac_pos
        frac_neg_ = sp.maximum (eps, frac_neg)
        pos_class_log_loss_ = - frac_pos * np.log(frac_pos_)
        neg_class_log_loss_ = - frac_neg * np.log(frac_neg_)
        base_log_loss = pos_class_log_loss_ + neg_class_log_loss_
        # base_log_loss = mvmean(base_log_loss)
        # print('binary {}'.format(base_log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else: # multiclass case
        fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case
        # Only ONE label is 1 in the multiclass case active for each line
        pos_class_log_loss_ = - frac_pos * np.log(fp)
        base_log_loss = np.sum(pos_class_log_loss_) 
    return base_log_loss
        
# sklearn implementations for comparison 
Example #9
Source File: libscores.py    From AutoML with MIT License 5 votes vote down vote up
def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score 
Example #10
Source File: np_utils.py    From seq2seq-keyphrase with MIT License 5 votes vote down vote up
def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res 
Example #11
Source File: np_utils.py    From KerasNeuralFingerprint with MIT License 5 votes vote down vote up
def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res 
Example #12
Source File: libscores.py    From AutoML with MIT License 5 votes vote down vote up
def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15
    
    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case
    
    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss 
Example #13
Source File: ml.py    From kaggle_avazu_benchmark with Apache License 2.0 5 votes vote down vote up
def llfun(act, pred):
    p_true = pred[:, 1]
    epsilon = 1e-15
    p_true = sp.maximum(epsilon, p_true)
    p_true = sp.minimum(1 - epsilon, p_true)
    ll = sum(act * sp.log(p_true) + sp.subtract(1, act) * sp.log(sp.subtract(1, p_true)))
    ll = ll * -1.0 / len(act)
    return ll 
Example #14
Source File: libscores.py    From automl_gpu with MIT License 5 votes vote down vote up
def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15
    
    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case
    
    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss 
Example #15
Source File: classify_nodes.py    From PyTorch-Luna16 with Apache License 2.0 5 votes vote down vote up
def logloss(act, pred):
    epsilon = 1e-15
    pred = sp.maximum(epsilon, pred)
    pred = sp.minimum(1-epsilon, pred)
    ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
    ll = ll * -1.0/len(act)
    return ll 
Example #16
Source File: libscores.py    From automl_gpu with MIT License 5 votes vote down vote up
def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score 
Example #17
Source File: np_utils.py    From CopyNet with MIT License 5 votes vote down vote up
def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res 
Example #18
Source File: log_loss.py    From classifier-calibration with MIT License 5 votes vote down vote up
def log_loss( act, pred ):
	epsilon = 1e-15
	pred = sp.maximum(epsilon, pred)
	pred = sp.minimum(1-epsilon, pred)
	ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
	ll = ll * -1.0/len(act)
	return ll 
Example #19
Source File: utils.py    From pCVR with Apache License 2.0 5 votes vote down vote up
def logloss(act, pred):
    '''
    官方给的损失函数
    :param act: 
    :param pred: 
    :return: 
    '''
    epsilon = 1e-15
    pred = sp.maximum(epsilon, pred)
    pred = sp.minimum(1 - epsilon, pred)
    ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred)))
    ll = ll * -1.0 / len(act)
    return ll 
Example #20
Source File: utils.py    From pCVR with Apache License 2.0 5 votes vote down vote up
def my_logloss(act, pred):
    epsilon = 1e-15
    pred = K.maximum(epsilon, pred)
    pred = K.minimum(1 - epsilon, pred)
    ll = K.sum(act * K.log(pred) + (1 - act) * K.log(1 - pred))
    ll = ll * -1.0 / K.shape(act)[0]

    return ll 
Example #21
Source File: optics.py    From REDPy with GNU General Public License v3.0 5 votes vote down vote up
def set_reach_dist(SetOfObjects, point_index, epsilon):

    """
    Sets reachability distance and ordering. This function is the primary workhorse of
    the OPTICS algorithm.
    
    SetofObjects: Instantiated and prepped instance of 'setOfObjects' class
    epsilon: Determines maximum object size that can be extracted. Smaller epsilons
        reduce run time. (float)

    """
    
    row = [SetOfObjects.data[point_index,:]]
    indices = np.argsort(row)
    distances = np.sort(row)

    if scipy.iterable(distances):

        unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T]
        rdistances = scipy.maximum(distances[(SetOfObjects._processed[indices] < 1)[0].T],
            SetOfObjects._core_dist[point_index])
        SetOfObjects._reachability[unprocessed] = scipy.minimum(
            SetOfObjects._reachability[unprocessed], rdistances)

        if unprocessed.size > 0:
            return unprocessed[np.argsort(np.array(SetOfObjects._reachability[
                unprocessed]))[0]]
        else:
            return point_index
    else:
        return point_index 
Example #22
Source File: optics.py    From REDPy with GNU General Public License v3.0 5 votes vote down vote up
def build_optics(SetOfObjects, epsilon):

    """
    Builds OPTICS ordered list of clustering structure
    
    SetofObjects: Instantiated and prepped instance of 'setOfObjects' class
    epsilon: Determines maximum object size that can be extracted. Smaller epsilons
        reduce run time.

    """

    for point in SetOfObjects._index:
        if not SetOfObjects._processed[point]:
            expandClusterOrder(SetOfObjects, point, epsilon) 
Example #23
Source File: metric.py    From mljar-supervised with MIT License 5 votes vote down vote up
def logloss(y_true, y_predicted):
    epsilon = 1e-6
    y_predicted = sp.maximum(epsilon, y_predicted)
    y_predicted = sp.minimum(1 - epsilon, y_predicted)
    ll = log_loss(y_true, y_predicted)
    return ll 
Example #24
Source File: models.py    From automl-phase-2 with MIT License 5 votes vote down vote up
def predict(self, X):
        prediction = self.predict_method(X)
        # Calibrate proba
        if self.task != 'regression' and self.postprocessor!=None:
            prediction = self.postprocessor.predict_proba(prediction)
        # Keep only 2nd column because the second one is 1-first
        if self.target_num==1 and len(prediction.shape)>1 and prediction.shape[1]>1:
            prediction = prediction[:,1]
        # Make sure the normalization is correct
        if self.task=='multiclass.classification':
            eps = 1e-15
            norma = np.sum(prediction, axis=1)
            for k in range(prediction.shape[0]):
                prediction[k,:] /= sp.maximum(norma[k], eps)
        return prediction 
Example #25
Source File: libscores.py    From automl-phase-2 with MIT License 5 votes vote down vote up
def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15
    
    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case
    
    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss 
Example #26
Source File: libscores.py    From automl-phase-2 with MIT License 5 votes vote down vote up
def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score 
Example #27
Source File: libscores.py    From automl-phase-2 with MIT License 5 votes vote down vote up
def bac_metric (solution, prediction, task='binary.classification'):
    ''' Compute the normalized balanced accuracy. The binarization and 
    the normalization differ for the multi-label and multi-class case. '''
    # Convert to 2d if necessary
    solution = np.array(solution, ndmin=2)
    if solution.shape[1] == 1:
        solution = solution.T
    prediction = np.array(prediction, ndmin=2)
    if prediction.shape[1] == 1:
        prediction = prediction.T
    # Carry on
    label_num = solution.shape[1]
    score = np.zeros(label_num)
    bin_prediction = binarize_predictions(prediction, task)
    [tn,fp,tp,fn] = acc_stat(solution, bin_prediction)
    # Bounding to avoid division by 0
    eps = 1e-15
    tp = sp.maximum (eps, tp)
    pos_num = sp.maximum (eps, tp+fn)
    tpr = tp / pos_num # true positive rate (sensitivity)
    if (task != 'multiclass.classification') or (label_num==1):
        tn = sp.maximum (eps, tn)
        neg_num = sp.maximum (eps, tn+fp)
        tnr = tn / neg_num # true negative rate (specificity)
        bac = 0.5*(tpr + tnr)
        base_bac = 0.5     # random predictions for binary case
    else: 
        bac = tpr
        base_bac = 1./label_num # random predictions for multiclass case
    bac = mvmean(bac)     # average over all classes
    # Normalize: 0 for random, 1 for perfect
    score = (bac - base_bac) / sp.maximum(eps, (1 - base_bac))
    return score 
Example #28
Source File: py_lh_20Sep2014.py    From Predict-click-through-rates-on-display-ads with MIT License 5 votes vote down vote up
def logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    ll = sum(y*sp.log(p) + sp.subtract(1,y)*sp.log(sp.subtract(1,p)))
    ll = ll * -1.0/len(y)
    return ll

# B. Apply hash trick of the original csv row
# for simplicity, we treat both integer and categorical features as categorical
# INPUT:
#     csv_row: a csv dictionary, ex: {'Lable': '1', 'I1': '357', 'I2': '', ...}
#     D: the max index that we can hash to
# OUTPUT:
#     x: a list of indices that its value is 1 
Example #29
Source File: np_utils.py    From CAPTCHA-breaking with MIT License 5 votes vote down vote up
def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res 
Example #30
Source File: libscores.py    From automl_gpu with MIT License 4 votes vote down vote up
def f1_metric (solution, prediction, task='binary.classification'):
    ''' Compute the normalized f1 measure. The binarization differs 
        for the multi-label and multi-class case. 
        A non-weighted average over classes is taken.
        The score is normalized.'''
    label_num = solution.shape[1]
    score = np.zeros(label_num)
    bin_prediction = binarize_predictions(prediction, task)
    [tn,fp,tp,fn] = acc_stat(solution, bin_prediction)
    # Bounding to avoid division by 0
    eps = 1e-15
    true_pos_num = sp.maximum (eps, tp+fn)
    found_pos_num = sp.maximum (eps, tp+fp)
    tp = sp.maximum (eps, tp)
    tpr = tp / true_pos_num      # true positive rate (recall)
    ppv = tp / found_pos_num     # positive predictive value (precision)
    arithmetic_mean = 0.5 * sp.maximum (eps, tpr+ppv)
    # Harmonic mean:
    f1 = tpr*ppv/arithmetic_mean
    # Average over all classes
    f1 = mvmean(f1)
    # Normalize: 0 for random, 1 for perfect
    if (task != 'multiclass.classification') or (label_num==1):
    # How to choose the "base_f1"?
    # For the binary/multilabel classification case, one may want to predict all 1.
    # In that case tpr = 1 and ppv = frac_pos. f1 = 2 * frac_pos / (1+frac_pos)
    #     frac_pos = mvmean(solution.ravel())
    #     base_f1 = 2 * frac_pos / (1+frac_pos)
    # or predict random values with probability 0.5, in which case
    #     base_f1 = 0.5
    # the first solution is better only if frac_pos > 1/3.
    # The solution in which we predict according to the class prior frac_pos gives
    # f1 = tpr = ppv = frac_pos, which is worse than 0.5 if frac_pos<0.5
    # So, because the f1 score is used if frac_pos is small (typically <0.1)
    # the best is to assume that base_f1=0.5
        base_f1 = 0.5
    # For the multiclass case, this is not possible (though it does not make much sense to
    # use f1 for multiclass problems), so the best would be to assign values at random to get 
    # tpr=ppv=frac_pos, where frac_pos=1/label_num
    else:
        base_f1=1./label_num
    score = (f1 - base_f1) / sp.maximum(eps, (1 - base_f1))
    return score