Python scipy.minimum() Examples

The following are 19 code examples of scipy.minimum(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy , or try the search function .
Example #1
Source File: data.py    From BERT with Apache License 2.0 6 votes vote down vote up
def load_question(params):
    df = pd.read_csv(config.QUESTION_FILE)
    df["words"] = df.words.str.split(" ").apply(lambda x: [_to_ind(z) for z in x])
    df["chars"] = df.chars.str.split(" ").apply(lambda x: [_to_ind(z) for z in x])
    Q = {}
    Q["seq_len_word"] = sp.minimum(df["words"].apply(len).values, params["max_seq_len_word"])
    Q["seq_len_char"] = sp.minimum(df["chars"].apply(len).values, params["max_seq_len_char"])
    Q["words"] = pad_sequences(df["words"],
                               maxlen=params["max_seq_len_word"],
                               padding=params["pad_sequences_padding"],
                               truncating=params["pad_sequences_truncating"],
                               value=config.PADDING_INDEX_WORD)
    Q["chars"] = pad_sequences(df["chars"],
                               maxlen=params["max_seq_len_char"],
                               padding=params["pad_sequences_padding"],
                               truncating=params["pad_sequences_truncating"],
                               value=config.PADDING_INDEX_CHAR)
    return Q 
Example #2
Source File: math_util.py    From smappPy with GNU General Public License v2.0 6 votes vote down vote up
def log_loss(actual, predicted, epsilon=1e-15):
    """
    Calculates and returns the log loss (error) of a set of predicted probabilities
    (hint: see sklearn classifier's predict_proba methods).

    Source: https://www.kaggle.com/wiki/LogarithmicLoss
    
    In plain English, this error metric is typically used where you have to predict 
    that something is true or false with a probability (likelihood) ranging from 
    definitely true (1) to equally true (0.5) to definitely false(0).

    Note: also see (and use) scikitlearn: 
    http://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html#sklearn.metrics.log_loss
    """
    predicted = sp.maximum(epsilon, predicted)
    predicted = sp.minimum(1-epsilon, predicted)
    ll = sum(actual*sp.log(predicted) + sp.subtract(1,actual)*sp.log(sp.subtract(1,predicted)))
    ll = ll * -1.0/len(actual)
    return ll 
Example #3
Source File: data.py    From tensorflow-DSMM with MIT License 6 votes vote down vote up
def load_question(params):
    df = pd.read_csv(config.QUESTION_FILE)
    df["words"] = df.words.str.split(" ").apply(lambda x: [_to_ind(z) for z in x])
    df["chars"] = df.chars.str.split(" ").apply(lambda x: [_to_ind(z) for z in x])
    Q = {}
    Q["seq_len_word"] = sp.minimum(df["words"].apply(len).values, params["max_seq_len_word"])
    Q["seq_len_char"] = sp.minimum(df["chars"].apply(len).values, params["max_seq_len_char"])
    Q["words"] = pad_sequences(df["words"],
                               maxlen=params["max_seq_len_word"],
                               padding=params["pad_sequences_padding"],
                               truncating=params["pad_sequences_truncating"],
                               value=config.PADDING_INDEX_WORD)
    Q["chars"] = pad_sequences(df["chars"],
                               maxlen=params["max_seq_len_char"],
                               padding=params["pad_sequences_padding"],
                               truncating=params["pad_sequences_truncating"],
                               value=config.PADDING_INDEX_CHAR)
    return Q 
Example #4
Source File: np_utils.py    From CopyNet with MIT License 5 votes vote down vote up
def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res 
Example #5
Source File: np_utils.py    From CAPTCHA-breaking with MIT License 5 votes vote down vote up
def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res 
Example #6
Source File: classify_nodes.py    From PyTorch-Luna16 with Apache License 2.0 5 votes vote down vote up
def logloss(act, pred):
    epsilon = 1e-15
    pred = sp.maximum(epsilon, pred)
    pred = sp.minimum(1-epsilon, pred)
    ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
    ll = ll * -1.0/len(act)
    return ll 
Example #7
Source File: ml.py    From kaggle_avazu_benchmark with Apache License 2.0 5 votes vote down vote up
def llfun(act, pred):
    p_true = pred[:, 1]
    epsilon = 1e-15
    p_true = sp.maximum(epsilon, p_true)
    p_true = sp.minimum(1 - epsilon, p_true)
    ll = sum(act * sp.log(p_true) + sp.subtract(1, act) * sp.log(sp.subtract(1, p_true)))
    ll = ll * -1.0 / len(act)
    return ll 
Example #8
Source File: libscores.py    From AutoML with MIT License 5 votes vote down vote up
def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15
    
    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case
    
    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss 
Example #9
Source File: np_utils.py    From KerasNeuralFingerprint with MIT License 5 votes vote down vote up
def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res 
Example #10
Source File: libscores.py    From automl_gpu with MIT License 5 votes vote down vote up
def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15
    
    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case
    
    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss 
Example #11
Source File: connectivity.py    From dynamo-release with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def mnn_from_list(knn_graph_list):
    """Apply reduce function to calculate the mutual kNN.
    """
    import functools

    mnn = (
        functools.reduce(scipy.sparse.csr.csr_matrix.minimum, knn_graph_list)
        if issparse(knn_graph_list[0])
        else functools.reduce(scipy.minimum, knn_graph_list)
    )

    return mnn 
Example #12
Source File: log_loss.py    From classifier-calibration with MIT License 5 votes vote down vote up
def log_loss( act, pred ):
	epsilon = 1e-15
	pred = sp.maximum(epsilon, pred)
	pred = sp.minimum(1-epsilon, pred)
	ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
	ll = ll * -1.0/len(act)
	return ll 
Example #13
Source File: utils.py    From pCVR with Apache License 2.0 5 votes vote down vote up
def logloss(act, pred):
    '''
    官方给的损失函数
    :param act: 
    :param pred: 
    :return: 
    '''
    epsilon = 1e-15
    pred = sp.maximum(epsilon, pred)
    pred = sp.minimum(1 - epsilon, pred)
    ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred)))
    ll = ll * -1.0 / len(act)
    return ll 
Example #14
Source File: utils.py    From pCVR with Apache License 2.0 5 votes vote down vote up
def my_logloss(act, pred):
    epsilon = 1e-15
    pred = K.maximum(epsilon, pred)
    pred = K.minimum(1 - epsilon, pred)
    ll = K.sum(act * K.log(pred) + (1 - act) * K.log(1 - pred))
    ll = ll * -1.0 / K.shape(act)[0]

    return ll 
Example #15
Source File: optics.py    From REDPy with GNU General Public License v3.0 5 votes vote down vote up
def set_reach_dist(SetOfObjects, point_index, epsilon):

    """
    Sets reachability distance and ordering. This function is the primary workhorse of
    the OPTICS algorithm.
    
    SetofObjects: Instantiated and prepped instance of 'setOfObjects' class
    epsilon: Determines maximum object size that can be extracted. Smaller epsilons
        reduce run time. (float)

    """
    
    row = [SetOfObjects.data[point_index,:]]
    indices = np.argsort(row)
    distances = np.sort(row)

    if scipy.iterable(distances):

        unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T]
        rdistances = scipy.maximum(distances[(SetOfObjects._processed[indices] < 1)[0].T],
            SetOfObjects._core_dist[point_index])
        SetOfObjects._reachability[unprocessed] = scipy.minimum(
            SetOfObjects._reachability[unprocessed], rdistances)

        if unprocessed.size > 0:
            return unprocessed[np.argsort(np.array(SetOfObjects._reachability[
                unprocessed]))[0]]
        else:
            return point_index
    else:
        return point_index 
Example #16
Source File: metric.py    From mljar-supervised with MIT License 5 votes vote down vote up
def logloss(y_true, y_predicted):
    epsilon = 1e-6
    y_predicted = sp.maximum(epsilon, y_predicted)
    y_predicted = sp.minimum(1 - epsilon, y_predicted)
    ll = log_loss(y_true, y_predicted)
    return ll 
Example #17
Source File: libscores.py    From automl-phase-2 with MIT License 5 votes vote down vote up
def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15
    
    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case
    
    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss 
Example #18
Source File: py_lh_20Sep2014.py    From Predict-click-through-rates-on-display-ads with MIT License 5 votes vote down vote up
def logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    ll = sum(y*sp.log(p) + sp.subtract(1,y)*sp.log(sp.subtract(1,p)))
    ll = ll * -1.0/len(y)
    return ll

# B. Apply hash trick of the original csv row
# for simplicity, we treat both integer and categorical features as categorical
# INPUT:
#     csv_row: a csv dictionary, ex: {'Lable': '1', 'I1': '357', 'I2': '', ...}
#     D: the max index that we can hash to
# OUTPUT:
#     x: a list of indices that its value is 1 
Example #19
Source File: np_utils.py    From seq2seq-keyphrase with MIT License 5 votes vote down vote up
def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res