Python sklearn.metrics.log_loss() Examples

The following are 30 code examples for showing how to use sklearn.metrics.log_loss(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .

Example 1
Project: edge2vec   Author: RoyZhengGao   File: multi_class_classification.py    License: BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def multi_class_classification(data_X,data_Y):
    '''
    calculate multi-class classification and return related evaluation metrics
    '''

    svc = svm.SVC(C=1, kernel='linear')
    # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) 
    clf = svc.fit(data_X, data_Y) #svm
    # array = svc.coef_
    # print array
    predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
    print "accuracy",metrics.accuracy_score(data_Y, predicted)
    print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') 
    print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') 
    print "precision score",metrics.precision_score(data_Y, predicted, average='macro') 
    print "recall score",metrics.recall_score(data_Y, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
    print "classification_report", metrics.classification_report(data_Y, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
    # print "log_loss", metrics.log_loss(data_Y, predicted)
    print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
    # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
    # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted) 
Example 2
Project: edge2vec   Author: RoyZhengGao   File: link_prediction.py    License: BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def evaluation_analysis(true_label,predicted): 
    '''
    return all metrics results
    '''
    print "accuracy",metrics.accuracy_score(true_label, predicted)
    print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')     
    print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro') 
    print "precision score",metrics.precision_score(true_label, predicted, average='macro') 
    print "recall score",metrics.recall_score(true_label, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(true_label, predicted)
    print "classification_report", metrics.classification_report(true_label, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
    print "log_loss", metrics.log_loss(true_label, predicted)
    print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
    print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
    print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted) 
Example 3
Project: rosetta_recsys2019   Author: rosetta-ai   File: run_lgb.py    License: Apache License 2.0 6 votes vote down vote up
def evaluate(val_df, clf):
    incorrect_session = {}
    val_df['scores'] = clf.predict(val_df.drop(data_drop_columns, axis=1))

    loss = log_loss(val_df.label.values, val_df.scores.values)
    grouped_val = val_df.groupby('session_id')
    rss_group = {i:[] for i in range(1,26)}
    rss = []
    for session_id, group in grouped_val:

        scores = group.scores
        sorted_arg = np.flip(np.argsort(scores))
        rss.append( group['label'].values[sorted_arg])
        rss_group[len(group)].append(group['label'].values[sorted_arg])
        if group['label'].values[sorted_arg][0] != 1:
            incorrect_session[session_id] = (sorted_arg.values, group['label'].values[sorted_arg])
    mrr = compute_mean_reciprocal_rank(rss)
    mrr_group = {i:(len(rss_group[i]), compute_mean_reciprocal_rank(rss_group[i])) for i in range(1,26)}
    print(mrr_group)
    if not configuration.debug:
        pickle.dump( incorrect_session, open(f'../output/{model_name}_val_incorrect_order.p','wb'))
    return mrr, mrr_group, loss 
Example 4
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_logistic.py    License: MIT License 6 votes vote down vote up
def test_logreg_predict_proba_multinomial():
    X, y = make_classification(n_samples=10, n_features=20, random_state=0,
                               n_classes=3, n_informative=10)

    # Predicted probabilities using the true-entropy loss should give a
    # smaller loss than those using the ovr method.
    clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs")
    clf_multi.fit(X, y)
    clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
    clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs")
    clf_ovr.fit(X, y)
    clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X))
    assert_greater(clf_ovr_loss, clf_multi_loss)

    # Predicted probabilities using the soft-max function should give a
    # smaller loss than those using the logistic function.
    clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
    clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X))
    assert_greater(clf_wrong_loss, clf_multi_loss) 
Example 5
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_logistic.py    License: MIT License 6 votes vote down vote up
def test_warm_start_converge_LR():
    # Test to see that the logistic regression converges on warm start,
    # with multi_class='multinomial'. Non-regressive test for #10836

    rng = np.random.RandomState(0)
    X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
    y = np.array([1] * 100 + [-1] * 100)
    lr_no_ws = LogisticRegression(multi_class='multinomial',
                                  solver='sag', warm_start=False,
                                  random_state=0)
    lr_ws = LogisticRegression(multi_class='multinomial',
                               solver='sag', warm_start=True,
                               random_state=0)

    lr_no_ws_loss = log_loss(y, lr_no_ws.fit(X, y).predict_proba(X))
    for i in range(5):
        lr_ws.fit(X, y)
    lr_ws_loss = log_loss(y, lr_ws.predict_proba(X))
    assert_allclose(lr_no_ws_loss, lr_ws_loss, rtol=1e-5) 
Example 6
Project: FATE   Author: FederatedAI   File: hetero_stepwise.py    License: Apache License 2.0 6 votes vote down vote up
def get_intercept_loss(self, model, data):
        y = np.array([x[1] for x in data.mapValues(lambda v: v.label).collect()])
        X = np.ones((len(y), 1))
        if model.model_name == 'HeteroLinearRegression' or model.model_name == 'HeteroPoissonRegression':
            intercept_model = LinearRegression(fit_intercept=False)
            trained_model = intercept_model.fit(X, y)
            pred = trained_model.predict(X)
            loss = metrics.mean_squared_error(y, pred) / 2
        elif model.model_name == 'HeteroLogisticRegression':
            intercept_model = LogisticRegression(penalty='l1', C=1e8, fit_intercept=False, solver='liblinear')
            trained_model = intercept_model.fit(X, y)
            pred = trained_model.predict(X)
            loss = metrics.log_loss(y, pred)
        else:
            raise ValueError("Unknown model received. Stepwise stopped.")
        self.intercept = intercept_model.intercept_
        return loss 
Example 7
Project: toxic_comments   Author: Donskov7   File: metrics.py    License: MIT License 6 votes vote down vote up
def calc_metrics(y_true, y_hat, max_steps=1000):
    y_true = np.array(y_true)
    y_hat = np.array(y_hat)
    metrics = {}
    metrics['Logloss'] = float(log_loss(y_true, y_hat))
    metrics['AUC'] = roc_auc_score(y_true, y_hat)
    metrics['F1'] = []
    metrics['Precision'] = []
    metrics['Recall'] = []
    for i in range(1, max_steps):
        threshold = float(i) / max_steps
        y_tmp = y_hat > threshold
        metrics['F1'].append(f1_score(y_true, y_tmp))
        metrics['Precision'].append(precision_score(y_true, y_tmp))
        metrics['Recall'].append(recall_score(y_true, y_tmp))
    max_idx = np.argmax(metrics['F1'])
    metrics['F1'] = metrics['F1'][max_idx]
    metrics['Precision'] = metrics['Precision'][max_idx]
    metrics['Recall'] = metrics['Recall'][max_idx]
    metrics['Threshold'] = float(max_idx + 1) / max_steps
    return metrics 
Example 8
Project: devol   Author: joeddav   File: devol.py    License: MIT License 6 votes vote down vote up
def _handle_broken_model(self, model, error):
        del model

        n = self.genome_handler.n_classes
        loss = log_loss(np.concatenate(([1], np.zeros(n - 1))), np.ones(n) / n)
        accuracy = 1 / n
        gc.collect()

        if K.backend() == 'tensorflow':
            K.clear_session()
            tf.reset_default_graph()

        print('An error occurred and the model could not train:')
        print(error)
        print(('Model assigned poor score. Please ensure that your model'
               'constraints live within your computational resources.'))
        return loss, accuracy 
Example 9
Project: kagglebook   Author: ghmagazine   File: ch06-06-wrapper.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def evaluate(features):
    dtrain = xgb.DMatrix(tr_x[features], label=tr_y)
    dvalid = xgb.DMatrix(va_x[features], label=va_y)
    params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71}
    num_round = 10  # 実際にはもっと多いround数が必要
    early_stopping_rounds = 3
    watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
    model = xgb.train(params, dtrain, num_round,
                      evals=watchlist, early_stopping_rounds=early_stopping_rounds,
                      verbose_eval=0)
    va_pred = model.predict(dvalid)
    score = log_loss(va_y, va_pred)

    return score


# ---------------------------------
# Greedy Forward Selection
# ---------------------------------- 
Example 10
Project: kagglebook   Author: ghmagazine   File: ch06-03-hopt_nn.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def score(params):
    # パラメータセットを指定したときに最小化すべき関数を指定する
    # モデルのパラメータ探索においては、モデルにパラメータを指定して学習・予測させた場合のスコアとする
    model = MLP(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    score = log_loss(va_y, va_pred)
    print(f'params: {params}, logloss: {score:.4f}')

    # 情報を記録しておく
    history.append((params, score))

    return {'loss': score, 'status': STATUS_OK}


# hyperoptによるパラメータ探索の実行 
Example 11
Project: kagglebook   Author: ghmagazine   File: ch06-01-hopt.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def score(params):
    # パラメータを与えたときに最小化する評価指標を指定する
    # 具体的には、モデルにパラメータを指定して学習・予測させた場合のスコアを返すようにする

    # max_depthの型を整数型に修正する
    params['max_depth'] = int(params['max_depth'])

    # Modelクラスを定義しているものとする
    # Modelクラスは、fitで学習し、predictで予測値の確率を出力する
    model = Model(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    score = log_loss(va_y, va_pred)
    print(f'params: {params}, logloss: {score:.4f}')

    # 情報を記録しておく
    history.append((params, score))

    return {'loss': score, 'status': STATUS_OK}


# 探索するパラメータの空間を指定する 
Example 12
Project: kaggle_otto   Author: ahara   File: utils.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def make_blender_cv(classifier, x, y, calibrate=False):
    skf = StratifiedKFold(y, n_folds=5, random_state=23)
    scores, predictions = [], None
    for train_index, test_index in skf:
        if calibrate:
            # Make training and calibration
            calibrated_classifier = CalibratedClassifierCV(classifier, method='isotonic', cv=get_cv(y[train_index]))
            fitted_classifier = calibrated_classifier.fit(x[train_index, :], y[train_index])
        else:
            fitted_classifier = classifier.fit(x[train_index, :], y[train_index])
        preds = fitted_classifier.predict_proba(x[test_index, :])

        # Free memory
        calibrated_classifier, fitted_classifier = None, None
        gc.collect()

        scores.append(log_loss(y[test_index], preds))
        predictions = np.append(predictions, preds, axis=0) if predictions is not None else preds
    return scores, predictions 
Example 13
Project: stacking   Author: ikki407   File: base.py    License: MIT License 6 votes vote down vote up
def eval_pred( y_true, y_pred, eval_type):
    if eval_type == 'logloss':#eval_typeはここに追加
        loss = ll( y_true, y_pred )
        print "logloss: ", loss
        return loss            
    
    elif eval_type == 'auc':
        loss = AUC( y_true, y_pred )
        print "AUC: ", loss
        return loss             
    
    elif eval_type == 'rmse':
        loss = np.sqrt(mean_squared_error(y_true, y_pred))
        print "rmse: ", loss
        return loss




######### BaseModel Class ######### 
Example 14
Project: Kaggler   Author: jeongyoonlee   File: classification.py    License: MIT License 5 votes vote down vote up
def logloss(y, p):
    """Bounded log loss error.

    Args:
        y (numpy.array): target
        p (numpy.array): prediction

    Returns:
        bounded log loss error
    """

    p[p < EPS] = EPS
    p[p > 1 - EPS] = 1 - EPS
    return log_loss(y, p) 
Example 15
Project: aboleth   Author: gradientinstitute   File: classification.py    License: Apache License 2.0 5 votes vote down vote up
def print_k_result(ys, Ep, ll, acc, name):
    acc.append(accuracy_score(ys, Ep.argmax(axis=1)))
    ll.append(log_loss(ys, Ep))
    print("{}: accuracy = {:.4g}, log-loss = {:.4g}"
          .format(name, acc[-1], ll[-1])) 
Example 16
Project: automl_comparison   Author: mljar   File: compute.py    License: Apache License 2.0 5 votes vote down vote up
def compute(package, dataset_id, seed):
    try:
        df = pd.read_csv('./data/{0}.csv'.format(dataset_id))
        x_cols = [c for c in df.columns if c != 'target']
        X = df[x_cols]
        y = df['target']

        X_train, X_test, y_train, y_test = \
            sklearn.model_selection.train_test_split(X, y, test_size = 0.3, random_state=seed)

        response = None
        if package == 'auto-sklearn':
            response = autosklearn_compute(X_train, y_train, X_test)
        elif package == 'h2o':
            response = h2o_compute(X_train, y_train, X_test)
        elif package == 'mljar':
            response = mljar_compute(X_train, y_train, X_test, dataset_id, seed)

        # Compute the logloss on test dataset
        ll = log_loss(y_test, response)

        with open('all_results.csv', 'a') as fout:
            fout.write('{0}, {1}, {2}, {3}'.format(package, dataset_id, seed, ll))

    except Exception as e:
        print 'Exception:', str(e) 
Example 17
Project: fairtest   Author: columbia   File: logit.py    License: Apache License 2.0 5 votes vote down vote up
def train_and_test_model1(features_train, labels_train, features_test,
                          labels_test, features_test_original):
    model = LogisticRegression()
    model.fit(features_train,labels_train)
    print("train", log_loss(labels_train, model.predict_proba(features_train.as_matrix())))

    cat_indexes = labels_test.cat.codes
    predict_probas = model.predict_proba(features_test.as_matrix())

    sumloss = .0
    losses = []
    for i in range(predict_probas.shape[0]):
      loss = (-1) * sp.log(max(min(predict_probas[i][cat_indexes[i]], 1 - 10**(-5)), 10**(-5)))
      sumloss += loss
      losses.append(loss)

    feature_list = features_test_original.columns.tolist()
    for feature in ["X","Y", "ZipCode", "Address", "Resolution", "Description",
                    "Dates", "Time", "Category", "Descript"]:
        if feature in feature_list:
            feature_list.remove(feature)
    feature_list_original  = ["X","Y", "ZipCode", "Address", "Resolution",
                              "Description", "Dates", "Time", "Category",
                              "Descript"]
    features_test_original = features_test_original[feature_list]
    print("Test Loss: %.5f" % (sumloss / predict_probas.shape[0]))
    print("test: %.5f" % log_loss(labels_test, model.predict_proba(features_test.as_matrix()))) 
Example 18
Project: gap   Author: sattree   File: exec.py    License: MIT License 5 votes vote down vote up
def acc_and_f1(preds, y_true, label_list):
    label_list = [0, 1, 2]
    acc = simple_accuracy(np.argmax(preds, axis=-1), y_true)
    f1 = f1_score(y_true=y_true, y_pred=np.argmax(preds, axis=-1), average='micro', labels=label_list)
    return {
        "acc": acc,
        "f1": f1,
        "acc_and_f1": (acc + f1) / 2,
        "log_loss": log_loss(y_true=y_true, y_pred=preds, labels=label_list),
    } 
Example 19
Project: gap   Author: sattree   File: mean_pool_model.py    License: MIT License 5 votes vote down vote up
def train_evaluate(self, 
                       X, 
                       X_val=None, 
                       X_tst=None, 
                       batch_size=32, 
                       verbose=1,
                       return_probs=False,
                       n_trials=None,
                       **parameters):
        
        self.init_graph(X, batch_size, device='gpu', **parameters)
        
        self.fit(X, 
                 X_val=X_val, 
                 verbose=verbose, 
                 batch_size=batch_size, 
                 **parameters)
        
        _, y_true_val, probs_val = self.predict(X_val, batch_size, verbose=verbose, **parameters)
        
        probs_tst = None
        if X_tst is not None:
            _, y_true_tst, probs_tst = self.predict(X_tst, batch_size, verbose=verbose, **parameters)
           
        if verbose:
            print('Validation score: ', self.best_score)
            if X_tst is not None:
                print('Test score: ', log_loss(y_true_tst, probs_tst))
        
        if return_probs:
            return AttrDict(locals())
        
        return -self.best_score 
Example 20
Project: gap   Author: sattree   File: mean_pool_model.py    License: MIT License 5 votes vote down vote up
def repeated_cv(self, 
                    X, 
                    X_val=None, 
                    X_tst=None, 
                    n_trials=5,
                    seed=None,
                    return_probs=True, 
                    **kwargs):
        
        if seed is None:
            seed = [seed]*n_trials
        probs = []
        probs_raw = []
        scores = []
        for i in range(n_trials):
            start = timer()
            res = self.train_evaluate_cv(X, 
                                         X_val, 
                                         X_tst,
                                         seed=seed[i],
                                         return_probs=True, 
                                         **kwargs)
            probs.append(res.probs)
            probs_raw += res.probs_raw
            y_true = res.y_true
            scores.append(res.score)
            if verbose:
                print('Trial {} done in {}'.format(i, timer()-start))
            start = timer()
        probs = np.mean(probs, axis=0)
        
        if return_probs:
            print('Repeated bag scores: ', scores)
            print('Repeated bag mean: {} +/- {}'.format(np.mean(scores), np.std(scores)))
            print('CV Bagged score: ', log_loss(y_true, probs))
            return AttrDict(locals())
        
        return -log_loss(y_true, probs) 
Example 21
Project: gap   Author: sattree   File: fit_fold.py    License: MIT License 5 votes vote down vote up
def fit_fold(fold_n, ckpt, model, X_trn, X_val, X_tst, batch_size, verbose, seed, parameters):
     start = timer()

     model = model(X_trn, ckpt, device='GPU:0', use_pretrained=True, use_swa=True, seed=seed, **parameters)
     model.fit(X_trn, X_val=X_val, verbose=verbose, batch_size=batch_size, use_swa=True, seed=seed)

     _, y_true_tst, probs_tst = model.predict(X_tst, batch_size, verbose=verbose, seed=seed, **parameters)

     tst_score = log_loss(y_true_tst, probs_tst)
     if verbose:
          print('Fold {} done in {}s. Test score - {}'.format(fold_n, int(timer()-start), tst_score))

     return model.best_score, model.best_score_epoch, y_true_tst, probs_tst, tst_score 
Example 22
Project: gap   Author: sattree   File: score.py    License: MIT License 5 votes vote down vote up
def get_score(probs, data):
    y_true = data['label']
    return round(log_loss(y_true, probs[:len(y_true), :])*100, 3) 
Example 23
Project: Very-deep-cnn-pytorch   Author: uvipen   File: utils.py    License: MIT License 5 votes vote down vote up
def get_evaluation(y_true, y_prob, list_metrics):
    y_pred = np.argmax(y_prob, -1)
    output = {}
    if 'accuracy' in list_metrics:
        output['accuracy'] = metrics.accuracy_score(y_true, y_pred)
    if 'loss' in list_metrics:
        try:
            output['loss'] = metrics.log_loss(y_true, y_prob)
        except ValueError:
            output['loss'] = -1
    if 'confusion_matrix' in list_metrics:
        output['confusion_matrix'] = str(metrics.confusion_matrix(y_true, y_pred))
    return output 
Example 24
Project: nyaggle   Author: nyanp   File: run.py    License: MIT License 5 votes vote down vote up
def _dispatch_eval_func(target_type: str, custom_eval: Optional[Callable] = None):
    default_eval_func = {
        'binary': roc_auc_score,
        'multiclass': log_loss,
        'continuous': mean_squared_error
    }
    return custom_eval if custom_eval is not None else default_eval_func[target_type] 
Example 25
Project: nyaggle   Author: nyanp   File: test_run.py    License: MIT License 5 votes vote down vote up
def test_experiment_sample_submission_multiclass(tmpdir_name):
    X, y = make_classification_df(n_classes=5)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    sample_df = pd.DataFrame()
    sample_df['target_id_abc'] = np.arange(len(y_test)) + 10000
    for i in range(5):
        sample_df['target_class_{}'.format(i)] = 0

    params = {
        'objective': 'multiclass',
        'max_depth': 8
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, sample_submission=sample_df)

    assert list(result.submission_df.columns) == ['target_id_abc',
                                                  'target_class_0',
                                                  'target_class_1',
                                                  'target_class_2',
                                                  'target_class_3',
                                                  'target_class_4'
                                                  ]
    log_loss_trianed = log_loss(y_test, result.submission_df.drop('target_id_abc', axis=1), labels=[0, 1, 2, 3, 4])
    log_loss_default = log_loss(y_test, np.full((len(y_test), 5), 0.2), labels=[0, 1, 2, 3, 4])
    assert log_loss_trianed < log_loss_default 
Example 26
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_classification.py    License: MIT License 5 votes vote down vote up
def test_log_loss_pandas_input():
    # case when input is a pandas series and dataframe gh-5715
    y_tr = np.array(["ham", "spam", "spam", "ham"])
    y_pr = np.array([[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]])
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TrueInputType, PredInputType in types:
        # y_pred dataframe, y_true series
        y_true, y_pred = TrueInputType(y_tr), PredInputType(y_pr)
        loss = log_loss(y_true, y_pred)
        assert_almost_equal(loss, 1.0383217, decimal=6) 
Example 27
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_logistic.py    License: MIT License 5 votes vote down vote up
def test_LogisticRegression_elastic_net_objective(C, l1_ratio):
    # Check that training with a penalty matching the objective leads
    # to a lower objective.
    # Here we train a logistic regression with l2 (a) and elasticnet (b)
    # penalties, and compute the elasticnet objective. That of a should be
    # greater than that of b (both objectives are convex).
    X, y = make_classification(n_samples=1000, n_classes=2, n_features=20,
                               n_informative=10, n_redundant=0,
                               n_repeated=0, random_state=0)
    X = scale(X)

    lr_enet = LogisticRegression(penalty='elasticnet', solver='saga',
                                 random_state=0, C=C, l1_ratio=l1_ratio,
                                 fit_intercept=False)
    lr_l2 = LogisticRegression(penalty='l2', solver='saga', random_state=0,
                               C=C, fit_intercept=False)
    lr_enet.fit(X, y)
    lr_l2.fit(X, y)

    def enet_objective(lr):
        coef = lr.coef_.ravel()
        obj = C * log_loss(y, lr.predict_proba(X))
        obj += l1_ratio * np.sum(np.abs(coef))
        obj += (1. - l1_ratio) * 0.5 * np.dot(coef, coef)
        return obj

    assert enet_objective(lr_enet) < enet_objective(lr_l2) 
Example 28
Project: AIAlpha   Author: VivekPa   File: rfmodel.py    License: MIT License 5 votes vote down vote up
def test_model(self, x, y, sample_weights=None):
        # model_acc = self.model.score(x, y, sample_weight=sample_weights)

        # zeros_count = y['y_values'].value_counts().loc[0]
        # null_acc = zeros_count/len(y)
        
        y_true = pd.DataFrame(index=y.index)
        y_true.loc[y['y_values'] == 1, 'up'] = 1
        y_true.loc[y['y_values'] == -1, 'down'] = 1
        y_true.loc[y['y_values'] == 0, 'no_ch'] = 1
        y_true = y_true.fillna(0)    

        y_pred = self.model.predict_proba(x)
        model_loss = log_loss(y_true, y_pred, sample_weight=sample_weights)

        base_case = pd.DataFrame(index=y.index)
        base_case['up'] = np.zeros(len(y))
        base_case['down'] = np.zeros(len(y))
        base_case['no_ch'] = np.ones(len(y))

        base_loss = log_loss(y_true, base_case)

        # print(f'Model accuracy: {model_acc}')
        # print(f'Null accuracy: {null_acc}')
        print(f'Model log loss: {model_loss}')
        print(f'Base log loss: {base_loss}') 
Example 29
Project: ibeis   Author: Erotemic   File: testem.py    License: Apache License 2.0 5 votes vote down vote up
def try_rf_classifier():
    # TODO: Evaluate TPOT
    # http://www.randalolson.com/2016/05/08/tpot-a-python-tool-for-automating-data-science/
    # https://www.reddit.com/r/MachineLearning/comments/4ij8dw/tpot_a_python_tool_for_automating_machine_learning/
    # http://keras.io/ --- unifies tensorflow / theano
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.calibration import CalibratedClassifierCV
    from sklearn.metrics import log_loss

    # http://scikit-learn.org/stable/auto_examples/calibration/plot_calibration_multiclass.html
    pairwise_feats, labels = random_case_set()
    X = pairwise_feats
    y = labels
    X_train, y_train = X[:600], y[:600]
    X_valid, y_valid = X[600:800], y[600:800]
    X_train_valid, y_train_valid = X[:800], y[:800]
    X_test, y_test = X[800:], y[800:]

    # Train uncalibrated random forest classifier on whole train and validation
    # data and evaluate on test data
    clf = RandomForestClassifier(n_estimators=25)
    clf.fit(X_train_valid, y_train_valid)
    clf_probs = clf.predict_proba(X_test)
    score = log_loss(y_test, clf_probs)
    print('score = %r' % (score,))

    # Train random forest classifier, calibrate on validation data and evaluate
    # on test data
    clf = RandomForestClassifier(n_estimators=25)
    clf.fit(X_train, y_train)
    clf_probs = clf.predict_proba(X_test)
    sig_clf = CalibratedClassifierCV(clf, method="sigmoid", cv="prefit")
    sig_clf.fit(X_valid, y_valid)
    sig_clf_probs = sig_clf.predict_proba(X_test)
    sig_score = log_loss(y_test, sig_clf_probs)
    print('sig_score = %r' % (sig_score,)) 
Example 30
Project: Recommender-Systems-Samples   Author: wyl6   File: nfm.py    License: MIT License 5 votes vote down vote up
def evaluate(self, feat_index, feat_val, label):
        y_pred = self.predict(feat_index, feat_val)
        print(type(y_pred), type(label))
        if self.metric_type == 'auc':
            return roc_auc_score(label, y_pred)
        elif self.metric_type == 'logloss':
            return log_loss(label, y_pred)
        elif self.metric_type == 'acc':
            return accuracy_score(label, (y_pred > 0.5).astype('int32'))