Python sklearn.metrics.log_loss() Examples

The following are 30 code examples of sklearn.metrics.log_loss(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .
Example #1
Source File: multi_class_classification.py    From edge2vec with BSD 3-Clause "New" or "Revised" License 11 votes vote down vote up
def multi_class_classification(data_X,data_Y):
    '''
    calculate multi-class classification and return related evaluation metrics
    '''

    svc = svm.SVC(C=1, kernel='linear')
    # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) 
    clf = svc.fit(data_X, data_Y) #svm
    # array = svc.coef_
    # print array
    predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
    print "accuracy",metrics.accuracy_score(data_Y, predicted)
    print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') 
    print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') 
    print "precision score",metrics.precision_score(data_Y, predicted, average='macro') 
    print "recall score",metrics.recall_score(data_Y, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
    print "classification_report", metrics.classification_report(data_Y, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
    # print "log_loss", metrics.log_loss(data_Y, predicted)
    print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
    # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
    # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted) 
Example #2
Source File: link_prediction.py    From edge2vec with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def evaluation_analysis(true_label,predicted): 
    '''
    return all metrics results
    '''
    print "accuracy",metrics.accuracy_score(true_label, predicted)
    print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')     
    print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro') 
    print "precision score",metrics.precision_score(true_label, predicted, average='macro') 
    print "recall score",metrics.recall_score(true_label, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(true_label, predicted)
    print "classification_report", metrics.classification_report(true_label, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
    print "log_loss", metrics.log_loss(true_label, predicted)
    print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
    print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
    print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted) 
Example #3
Source File: test_logistic.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_warm_start_converge_LR():
    # Test to see that the logistic regression converges on warm start,
    # with multi_class='multinomial'. Non-regressive test for #10836

    rng = np.random.RandomState(0)
    X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
    y = np.array([1] * 100 + [-1] * 100)
    lr_no_ws = LogisticRegression(multi_class='multinomial',
                                  solver='sag', warm_start=False,
                                  random_state=0)
    lr_ws = LogisticRegression(multi_class='multinomial',
                               solver='sag', warm_start=True,
                               random_state=0)

    lr_no_ws_loss = log_loss(y, lr_no_ws.fit(X, y).predict_proba(X))
    for i in range(5):
        lr_ws.fit(X, y)
    lr_ws_loss = log_loss(y, lr_ws.predict_proba(X))
    assert_allclose(lr_no_ws_loss, lr_ws_loss, rtol=1e-5) 
Example #4
Source File: test_logistic.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_logreg_predict_proba_multinomial():
    X, y = make_classification(n_samples=10, n_features=20, random_state=0,
                               n_classes=3, n_informative=10)

    # Predicted probabilities using the true-entropy loss should give a
    # smaller loss than those using the ovr method.
    clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs")
    clf_multi.fit(X, y)
    clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
    clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs")
    clf_ovr.fit(X, y)
    clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X))
    assert_greater(clf_ovr_loss, clf_multi_loss)

    # Predicted probabilities using the soft-max function should give a
    # smaller loss than those using the logistic function.
    clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
    clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X))
    assert_greater(clf_wrong_loss, clf_multi_loss) 
Example #5
Source File: base.py    From stacking with MIT License 6 votes vote down vote up
def eval_pred( y_true, y_pred, eval_type):
    if eval_type == 'logloss':#eval_typeはここに追加
        loss = ll( y_true, y_pred )
        print "logloss: ", loss
        return loss            
    
    elif eval_type == 'auc':
        loss = AUC( y_true, y_pred )
        print "AUC: ", loss
        return loss             
    
    elif eval_type == 'rmse':
        loss = np.sqrt(mean_squared_error(y_true, y_pred))
        print "rmse: ", loss
        return loss




######### BaseModel Class ######### 
Example #6
Source File: run_lgb.py    From rosetta_recsys2019 with Apache License 2.0 6 votes vote down vote up
def evaluate(val_df, clf):
    incorrect_session = {}
    val_df['scores'] = clf.predict(val_df.drop(data_drop_columns, axis=1))

    loss = log_loss(val_df.label.values, val_df.scores.values)
    grouped_val = val_df.groupby('session_id')
    rss_group = {i:[] for i in range(1,26)}
    rss = []
    for session_id, group in grouped_val:

        scores = group.scores
        sorted_arg = np.flip(np.argsort(scores))
        rss.append( group['label'].values[sorted_arg])
        rss_group[len(group)].append(group['label'].values[sorted_arg])
        if group['label'].values[sorted_arg][0] != 1:
            incorrect_session[session_id] = (sorted_arg.values, group['label'].values[sorted_arg])
    mrr = compute_mean_reciprocal_rank(rss)
    mrr_group = {i:(len(rss_group[i]), compute_mean_reciprocal_rank(rss_group[i])) for i in range(1,26)}
    print(mrr_group)
    if not configuration.debug:
        pickle.dump( incorrect_session, open(f'../output/{model_name}_val_incorrect_order.p','wb'))
    return mrr, mrr_group, loss 
Example #7
Source File: utils.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def make_blender_cv(classifier, x, y, calibrate=False):
    skf = StratifiedKFold(y, n_folds=5, random_state=23)
    scores, predictions = [], None
    for train_index, test_index in skf:
        if calibrate:
            # Make training and calibration
            calibrated_classifier = CalibratedClassifierCV(classifier, method='isotonic', cv=get_cv(y[train_index]))
            fitted_classifier = calibrated_classifier.fit(x[train_index, :], y[train_index])
        else:
            fitted_classifier = classifier.fit(x[train_index, :], y[train_index])
        preds = fitted_classifier.predict_proba(x[test_index, :])

        # Free memory
        calibrated_classifier, fitted_classifier = None, None
        gc.collect()

        scores.append(log_loss(y[test_index], preds))
        predictions = np.append(predictions, preds, axis=0) if predictions is not None else preds
    return scores, predictions 
Example #8
Source File: hetero_stepwise.py    From FATE with Apache License 2.0 6 votes vote down vote up
def get_intercept_loss(self, model, data):
        y = np.array([x[1] for x in data.mapValues(lambda v: v.label).collect()])
        X = np.ones((len(y), 1))
        if model.model_name == 'HeteroLinearRegression' or model.model_name == 'HeteroPoissonRegression':
            intercept_model = LinearRegression(fit_intercept=False)
            trained_model = intercept_model.fit(X, y)
            pred = trained_model.predict(X)
            loss = metrics.mean_squared_error(y, pred) / 2
        elif model.model_name == 'HeteroLogisticRegression':
            intercept_model = LogisticRegression(penalty='l1', C=1e8, fit_intercept=False, solver='liblinear')
            trained_model = intercept_model.fit(X, y)
            pred = trained_model.predict(X)
            loss = metrics.log_loss(y, pred)
        else:
            raise ValueError("Unknown model received. Stepwise stopped.")
        self.intercept = intercept_model.intercept_
        return loss 
Example #9
Source File: metrics.py    From toxic_comments with MIT License 6 votes vote down vote up
def calc_metrics(y_true, y_hat, max_steps=1000):
    y_true = np.array(y_true)
    y_hat = np.array(y_hat)
    metrics = {}
    metrics['Logloss'] = float(log_loss(y_true, y_hat))
    metrics['AUC'] = roc_auc_score(y_true, y_hat)
    metrics['F1'] = []
    metrics['Precision'] = []
    metrics['Recall'] = []
    for i in range(1, max_steps):
        threshold = float(i) / max_steps
        y_tmp = y_hat > threshold
        metrics['F1'].append(f1_score(y_true, y_tmp))
        metrics['Precision'].append(precision_score(y_true, y_tmp))
        metrics['Recall'].append(recall_score(y_true, y_tmp))
    max_idx = np.argmax(metrics['F1'])
    metrics['F1'] = metrics['F1'][max_idx]
    metrics['Precision'] = metrics['Precision'][max_idx]
    metrics['Recall'] = metrics['Recall'][max_idx]
    metrics['Threshold'] = float(max_idx + 1) / max_steps
    return metrics 
Example #10
Source File: ch06-01-hopt.py    From kagglebook with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def score(params):
    # パラメータを与えたときに最小化する評価指標を指定する
    # 具体的には、モデルにパラメータを指定して学習・予測させた場合のスコアを返すようにする

    # max_depthの型を整数型に修正する
    params['max_depth'] = int(params['max_depth'])

    # Modelクラスを定義しているものとする
    # Modelクラスは、fitで学習し、predictで予測値の確率を出力する
    model = Model(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    score = log_loss(va_y, va_pred)
    print(f'params: {params}, logloss: {score:.4f}')

    # 情報を記録しておく
    history.append((params, score))

    return {'loss': score, 'status': STATUS_OK}


# 探索するパラメータの空間を指定する 
Example #11
Source File: devol.py    From devol with MIT License 6 votes vote down vote up
def _handle_broken_model(self, model, error):
        del model

        n = self.genome_handler.n_classes
        loss = log_loss(np.concatenate(([1], np.zeros(n - 1))), np.ones(n) / n)
        accuracy = 1 / n
        gc.collect()

        if K.backend() == 'tensorflow':
            K.clear_session()
            tf.reset_default_graph()

        print('An error occurred and the model could not train:')
        print(error)
        print(('Model assigned poor score. Please ensure that your model'
               'constraints live within your computational resources.'))
        return loss, accuracy 
Example #12
Source File: ch06-03-hopt_nn.py    From kagglebook with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def score(params):
    # パラメータセットを指定したときに最小化すべき関数を指定する
    # モデルのパラメータ探索においては、モデルにパラメータを指定して学習・予測させた場合のスコアとする
    model = MLP(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    score = log_loss(va_y, va_pred)
    print(f'params: {params}, logloss: {score:.4f}')

    # 情報を記録しておく
    history.append((params, score))

    return {'loss': score, 'status': STATUS_OK}


# hyperoptによるパラメータ探索の実行 
Example #13
Source File: ch06-06-wrapper.py    From kagglebook with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def evaluate(features):
    dtrain = xgb.DMatrix(tr_x[features], label=tr_y)
    dvalid = xgb.DMatrix(va_x[features], label=va_y)
    params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71}
    num_round = 10  # 実際にはもっと多いround数が必要
    early_stopping_rounds = 3
    watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
    model = xgb.train(params, dtrain, num_round,
                      evals=watchlist, early_stopping_rounds=early_stopping_rounds,
                      verbose_eval=0)
    va_pred = model.predict(dvalid)
    score = log_loss(va_y, va_pred)

    return score


# ---------------------------------
# Greedy Forward Selection
# ---------------------------------- 
Example #14
Source File: stack.py    From kaggle-avito with MIT License 5 votes vote down vote up
def cv_method():
    tr_X, tr_y_true, te_X, te_y_true = get_train_data()
    if "nn" in args.model:
        tr_X = np.array(tr_X).astype(np.float32)
        tr_y_true = np.array(tr_y_true).astype(np.int32)
        model = get_nn_model(tr_X.shape)
        model.fit(tr_X, tr_y_true)
        write_dump("%s_model.dump"%args.model, model)
        if te_X:
            te_X = np.array(te_X).astype(np.float32)
            preds = model.predict_proba(te_X)[:, 1]
            np.savetxt("nn_preds.txt", preds)
            print log_loss(te_y_true, preds)
    elif "xgb" in args.model:
        dtrain = xgb.DMatrix(tr_X, label=tr_y_true)
        if args.predict == "cv":
            if te_X:
                dtest = xgb.DMatrix(te_X, label=te_y_true)
            param = {
                    'max_depth':3,
                    'eta':0.1,
                    'silent':1,
                    'objective':'binary:logistic',
                    "eval_metric": "logloss",
                    "nthread": 9,
                    }
            if te_X:
                watchlist  = [(dtrain,'train'), (dtest, "eval")]
            else:
                watchlist  = [(dtrain,'train'),]
            num_round = 132
            bst = xgb.train(param, dtrain, num_round, watchlist)
            bst.save_model("%s_model.dump"%args.model)
            if te_X:
                preds = bst.predict(dtest)
                np.savetxt("xgb_preds.txt", preds) 
Example #15
Source File: negative_log_likelihood.py    From ramp-workflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __call__(self, y_true_proba, y_proba):
        score = log_loss(y_true_proba, y_proba)
        return score 
Example #16
Source File: model.py    From RecommenderSystems with MIT License 5 votes vote down vote up
def evaluate(self, Xi, Xv, Xi_genre, Xv_genre, y):
        """
        :param Xi: list of list of feature indices of each sample in the dataset
        :param Xv: list of list of feature values of each sample in the dataset
        :param y: label of each sample in the dataset
        :return: metric of the evaluation
        """
        y_pred = self.predict(Xi, Xv, Xi_genre, Xv_genre)
        y_pred = np.clip(y_pred,1e-6,1-1e-6)
        return self.eval_metric(y, y_pred), log_loss(y, y_pred) 
Example #17
Source File: perturb_importance.py    From jh-kaggle-util with Apache License 2.0 5 votes vote down vote up
def calculate_importance_perturb(model):
  fit_type = jhkaggle.jhkaggle_config['FIT_TYPE']

  x = jhkaggle.util.load_pandas("train-joined-{}.pkl".format(model.data_source))

  mask_test = np.array(x['fold'] == 1)
  x = x[mask_test]

  x.drop("id",axis=1,inplace=True)
  x.drop("fold",axis=1,inplace=True)
  y = x['target']
  x.drop("target",axis=1,inplace=True)
  columns = x.columns
  x = x.values

  errors = []

  for i in tqdm(range(x.shape[1])):

    hold = np.array(x[:, i])
    np.random.shuffle(x[:, i])
    
    pred = model.predict_model(model.model,x)
    if fit_type == jhkaggle.const.FIT_TYPE_REGRESSION:
        error = metrics.mean_squared_error(y, pred)
    else:
        error = metrics.log_loss(y, pred)
        
    errors.append(error)
    x[:, i] = hold
    
  max_error = np.max(errors)
  importance = [e/max_error for e in errors]

  data = {'name':columns,'error':errors,'importance':importance}
  result = pd.DataFrame(data, columns = ['name','error','importance'])
  result.sort_values(by=['importance'], ascending=[0], inplace=True)
  result.reset_index(inplace=True, drop=True)
  return result 
Example #18
Source File: utils.py    From open-solution-toxic-comments with MIT License 5 votes vote down vote up
def multi_log_loss(y_true, y_pred):
    assert y_true.shape == y_pred.shape
    columns = y_true.shape[1]
    column_losses = []
    for i in range(0, columns):
        column_losses.append(log_loss(y_true[:, i], y_pred[:, i]))
    return np.array(column_losses).mean() 
Example #19
Source File: util.py    From jh-kaggle-util with Apache License 2.0 5 votes vote down vote up
def _run_single(self):
        print("Training data: X_train: {}, Y_train: {}, X_test: {}".format(self.x_train.shape, len(self.y_train),
                                                                               self.x_submit.shape))
        self.model = self.train_model(self.x_train, self.y_train, None, None)

#        if not self.run_single_fold:
#            self.preds_oos = self.predict_model(self.model, self.x_train)

        #score = 0 #log_loss(fold_y_valid, self.preds_oos)

        #self.final_preds_train = self.preds_oos
        self.final_preds_submit = self.predict_model(self.model, self.x_submit)
        self.pred_denom = 1 
Example #20
Source File: util.py    From jh-kaggle-util with Apache License 2.0 5 votes vote down vote up
def model_score(y_pred,y_valid):
    final_eval = jhkaggle.jhkaggle_config['FINAL_EVAL']
    if final_eval == jhkaggle.const.EVAL_R2:
        return r2_score(y_valid, y_pred)
    elif final_eval == jhkaggle.const.EVAL_LOGLOSS:
        return log_loss(y_valid, y_pred)
    elif final_eval == jhkaggle.const.EVAL_AUC:
        fpr, tpr, thresholds = roc_curve(y_valid, y_pred, pos_label=1)
        return auc(fpr, tpr)
    else:
        raise Exception(f"Unknown FINAL_EVAL: {final_eval}") 
Example #21
Source File: runner.py    From kagglebook with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def train_fold(self, i_fold: Union[int, str]) -> Tuple[
        Model, Optional[np.array], Optional[np.array], Optional[float]]:
        """クロスバリデーションでのfoldを指定して学習・評価を行う

        他のメソッドから呼び出すほか、単体でも確認やパラメータ調整に用いる

        :param i_fold: foldの番号(すべてのときには'all'とする)
        :return: (モデルのインスタンス、レコードのインデックス、予測値、評価によるスコア)のタプル
        """
        # 学習データの読込
        validation = i_fold != 'all'
        train_x = self.load_x_train()
        train_y = self.load_y_train()

        if validation:
            # 学習データ・バリデーションデータをセットする
            tr_idx, va_idx = self.load_index_fold(i_fold)
            tr_x, tr_y = train_x.iloc[tr_idx], train_y.iloc[tr_idx]
            va_x, va_y = train_x.iloc[va_idx], train_y.iloc[va_idx]

            # 学習を行う
            model = self.build_model(i_fold)
            model.train(tr_x, tr_y, va_x, va_y)

            # バリデーションデータへの予測・評価を行う
            va_pred = model.predict(va_x)
            score = log_loss(va_y, va_pred, eps=1e-15, normalize=True)

            # モデル、インデックス、予測値、評価を返す
            return model, va_idx, va_pred, score
        else:
            # 学習データ全てで学習を行う
            model = self.build_model(i_fold)
            model.train(train_x, train_y)

            # モデルを返す
            return model, None, None, None 
Example #22
Source File: validate_sorted.py    From adversarial-validation with MIT License 5 votes vote down vote up
def train_and_evaluate( y_train, x_train, y_val, x_val ):

	lr = LR()
	lr.fit( x_train, y_train )

	p = lr.predict_proba( x_val )
	p_bin = lr.predict( x_val )

	acc = accuracy( y_val, p_bin )
	auc = AUC( y_val, p[:,1] )
	ll = log_loss( y_val, p[:,1] )
	
	return ( auc, acc, ll ) 
Example #23
Source File: average.py    From fnc-1 with Apache License 2.0 5 votes vote down vote up
def stack_cv(param):
    
    #x_meta, y_meta = load_data()
    sumw = param['w0'] + param['w1'] 
    pred_agree = (x_meta[:,0]*param['w0'] + x_meta[:,4]*param['w1']) / sumw
    pred_disagree = (x_meta[:,1]*param['w0'] + x_meta[:,5]*param['w1']) / sumw
    pred_discuss = (x_meta[:,2]*param['w0'] + x_meta[:,6]*param['w1']) / sumw
    pred_unrelated = (x_meta[:,3]*param['w0'] + x_meta[:,7]*param['w1']) / sumw

    pred_y = np.hstack([pred_agree.reshape((-1,1)), pred_disagree.reshape((-1,1)), pred_discuss.reshape((-1,1)), pred_unrelated.reshape((-1,1))])
    print 'pred_agree.shape:'
    print pred_agree.shape
    print 'pred_disagree.shape:'
    print pred_disagree.shape
    print 'pred_discuss.shape:'
    print pred_discuss.shape
    print 'pred_unrelated.shape:'
    print pred_unrelated.shape

    print 'pred_y.shape:'
    print pred_y.shape
    print 'y_meta.shape:'
    print y_meta.shape
    
    pred_y_label = np.argmax(pred_y, axis=1)
    predicted = [LABELS[int(a)] for a in pred_y_label]
    actual = [LABELS[int(a)] for a in y_meta]    

    score, _ = score_submission(actual, predicted)
    s_perf, _ = score_submission(actual, actual)

    cost = float(score) / s_perf

    #cost = log_loss(y_meta, pred_y, labels = [0, 1, 2, 3])
    
    return -1.0 * cost 
Example #24
Source File: test_basic.py    From ngboost with Apache License 2.0 5 votes vote down vote up
def test_classification():
    from sklearn.datasets import load_breast_cancer
    from sklearn.metrics import roc_auc_score, log_loss

    data, target = load_breast_cancer(True)
    x_train, x_test, y_train, y_test = train_test_split(
        data, target, test_size=0.2, random_state=42
    )
    ngb = NGBClassifier(Dist=Bernoulli, verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = roc_auc_score(y_test, preds)
    assert score >= 0.95

    preds = ngb.predict_proba(x_test)
    score = log_loss(y_test, preds)
    assert score <= 0.20

    score = ngb.score(x_test, y_test)
    assert score <= 0.20

    dist = ngb.pred_dist(x_test)
    assert isinstance(dist, Bernoulli)

    score = roc_auc_score(y_test, preds[:, 1])
    assert score >= 0.95 
Example #25
Source File: classifier_utils.py    From human-rl with MIT License 5 votes vote down vote up
def predict_proba_with_loss(self, X, y):
        y_pred = self.predict_proba(X)
        loss = log_loss(y,y_pred)
        return y_pred, loss
        
    # smallest prob given to an actual catastrophe 
Example #26
Source File: classification.py    From Kaggler with MIT License 5 votes vote down vote up
def logloss(y, p):
    """Bounded log loss error.

    Args:
        y (numpy.array): target
        p (numpy.array): prediction

    Returns:
        bounded log loss error
    """

    p[p < EPS] = EPS
    p[p > 1 - EPS] = 1 - EPS
    return log_loss(y, p) 
Example #27
Source File: metrics.py    From knowledge_graph_attention_network with MIT License 5 votes vote down vote up
def logloss(ground_truth, prediction):
    # preds = [max(min(p, 1. - 10e-12), 10e-12) for p in prediction]
    logloss = log_loss(np.asarray(ground_truth), np.asarray(prediction))
    return logloss 
Example #28
Source File: utils.py    From QuickDraw with MIT License 5 votes vote down vote up
def get_evaluation(y_true, y_prob, list_metrics):
    y_pred = np.argmax(y_prob, -1)
    output = {}
    if 'accuracy' in list_metrics:
        output['accuracy'] = metrics.accuracy_score(y_true, y_pred)
    if 'loss' in list_metrics:
        try:
            output['loss'] = metrics.log_loss(y_true, y_prob)
        except ValueError:
            output['loss'] = -1
    if 'confusion_matrix' in list_metrics:
        output['confusion_matrix'] = str(metrics.confusion_matrix(y_true, y_pred))
    return output 
Example #29
Source File: check_automl_with_regression.py    From mljar-supervised with MIT License 5 votes vote down vote up
def test_fit_and_predict(self):
        seed = 1709

        df = pd.read_csv(
            "./tests/data/housing_regression_missing_values_missing_target.csv"
        )
        print(df.columns)
        x_cols = [c for c in df.columns if c != "MEDV"]
        X = df[x_cols]
        y = df["MEDV"]

        X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
            X, y, test_size=0.3, random_state=seed
        )
        automl = AutoML(
            total_time_limit=10,
            algorithms=["Xgboost"],  # ["LightGBM", "RF", "NN", "CatBoost", "Xgboost"],
            start_random_models=1,
            hill_climbing_steps=0,
            top_models_to_improve=0,
            train_ensemble=True,
            verbose=True,
        )
        automl.fit(X_train, y_train)

        response = automl.predict(X_test)  # ["p_1"]
        print("Response", response)

        # Compute the logloss on test dataset
        # ll = log_loss(y_test, response)
        # print("(*) Dataset id {} logloss {}".format(dataset_id, ll)) 
Example #30
Source File: rgf.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def score(self, X, y, sample_weight=None):
        return log_loss(y, self.predict_proba(X))