Python sklearn.cross_validation.cross_val_predict() Examples

The following are code examples for showing how to use sklearn.cross_validation.cross_val_predict(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: scattertext   Author: JasonKessler   File: TermDocMatrix.py    Apache License 2.0 6 votes vote down vote up
def get_logistic_regression_coefs_l2(self, category,
                                         clf=RidgeClassifierCV()):
        ''' Computes l2-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        category : str
            category name to score
        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        try:
            from sklearn.cross_validation import cross_val_predict
        except:
            from sklearn.model_selection import cross_val_predict
        y = self._get_mask_from_category(category)
        X = TfidfTransformer().fit_transform(self._X)
        clf.fit(X, y)
        y_hat = cross_val_predict(clf, X, y)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        return clf.coef_[0], acc, baseline 
Example 2
Project: Yelp-Rating-Prediction   Author: akshaykamath   File: SVM_KFold_CrossValidation.py    MIT License 6 votes vote down vote up
def learn_model(reviews, stars):
    svm = OneVsRestClassifier(SVC(C=1, kernel='linear', gamma=1, verbose=False, probability=False))

    print "-" * 60, "\n"
    print "Results with 10-fold cross validation:\n"
    print "-" * 60, "\n"

    predicted = cross_validation.cross_val_predict(svm, reviews, stars, cv=10, n_jobs=1)
    print "*" * 20
    print "\t Accuracy Score\t", metrics.accuracy_score(stars, predicted)
    print "*" * 20

    print "Precision Score\t", metrics.precision_score(stars, predicted)
    print "Recall Score\t", metrics.recall_score(stars, predicted)
    print "\nClassification Report:\n\n", metrics.classification_report(stars, predicted)
    print "\nConfusion Matrix:\n\n", metrics.confusion_matrix(stars, predicted) 
Example 3
Project: jamespy_py3   Author: jskDr   File: linear_model.py    MIT License 6 votes vote down vote up
def fit(self, xy_file, fname_out):
		"""
		All grid results will be saved later,
		although only the best result is saved.
		"""

		df = read_csv( xy_file)
		X = df['X'].values
		y = df['y'].values
		
		super().fit( X, y)

		yp = cross_validation.cross_val_predict( self.best_estimator_, X, y)

		m_idx = pd.MultiIndex.from_product([['yp'], df['y'].columns])
		yp_df = pd.DataFrame( yp, index = df.index, columns=m_idx)
		df_out = pd.concat([df, yp_df], axis = 1)

		df_out.to_csv( fname_out)

		return self 
Example 4
Project: jamespy_py3   Author: jskDr   File: _jgrid_r0.py    MIT License 6 votes vote down vote up
def _cv_r0( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
	"""
	method can be 'Ridge', 'Lasso'
	cross validation is performed so as to generate prediction output for all input molecules
	"""	
	print(xM.shape, yV.shape)

	clf = getattr( linear_model, method)( alpha = alpha)
	kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
	yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

	if graph:
		print('The prediction output using cross-validation is given by:')
		jutil.cv_show( yV, yV_pred, grid_std = grid_std)

	return yV_pred 
Example 5
Project: jamespy_py3   Author: jskDr   File: _jgrid_r0.py    MIT License 6 votes vote down vote up
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
	"""
	method can be 'Ridge', 'Lasso'
	cross validation is performed so as to generate prediction output for all input molecules
	"""	
	print(xM.shape, yV.shape)

	clf = getattr( linear_model, method)( alpha = alpha)
	kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle)
	yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

	if graph:
		print('The prediction output using cross-validation is given by:')
		jutil.cv_show( yV, yV_pred, grid_std = grid_std)

	return yV_pred 
Example 6
Project: jamespy_py3   Author: jskDr   File: _jgrid_r0.py    MIT License 6 votes vote down vote up
def _cv_LOO_r0( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
	"""
	method can be 'Ridge', 'Lasso'
	cross validation is performed so as to generate prediction output for all input molecules
	"""	
	n_folds = xM.shape[0]

	print(xM.shape, yV.shape)

	clf = getattr( linear_model, method)( alpha = alpha)
	kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds)
	yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

	if graph:
		print('The prediction output using cross-validation is given by:')
		jutil.cv_show( yV, yV_pred, grid_std = grid_std)

	return yV_pred 
Example 7
Project: jamespy_py3   Author: jskDr   File: jgrid (james-90X3A's conflicted copy 2016-04-21).py    MIT License 6 votes vote down vote up
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
	"""
	method can be 'Ridge', 'Lasso'
	cross validation is performed so as to generate prediction output for all input molecules
	"""	
	print(xM.shape, yV.shape)

	clf = getattr( linear_model, method)( alpha = alpha)
	kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
	yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

	if graph:
		print('The prediction output using cross-validation is given by:')
		jutil.cv_show( yV, yV_pred, grid_std = grid_std)

	return yV_pred 
Example 8
Project: scattertext   Author: JasonKessler   File: TermDocMatrix.py    Apache License 2.0 5 votes vote down vote up
def get_logistic_regression_coefs_l1(self, category,
                                         clf=LassoCV(alphas=[0.1, 0.001],
                                                     max_iter=10000,
                                                     n_jobs=-1)):
        ''' Computes l1-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        try:
            from sklearn.cross_validation import cross_val_predict
        except:
            from sklearn.model_selection import cross_val_predict
        y = self._get_mask_from_category(category)
        y_continuous = self._get_continuous_version_boolean_y(y)
        # X = TfidfTransformer().fit_transform(self._X)
        X = self._X

        clf.fit(X, y_continuous)
        y_hat = (cross_val_predict(clf, X, y_continuous) > 0)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        clf.fit(X, y_continuous)
        return clf.coef_, acc, baseline 
Example 9
Project: res   Author: bustios   File: cross_val_scores.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def cross_val_scores(clf, X, y, n_iters=10, n_folds=10, n_jobs=1):
    scores = np.zeros((n_iters, 2))
    for iter in range(n_iters):
        # random_state=iter to control the randomness for reproducibility
        cv = StratifiedKFold(y, n_folds, shuffle=True, random_state=iter)
        y_pred = cross_val_predict(clf, X, y, cv, n_jobs=n_jobs)
        scores[iter, 0] = accuracy_score(y, y_pred)
        scores[iter, 1] = cohen_kappa_score(y, y_pred)

    return (scores[:,0].mean(), scores[:,0].std(),
            scores[:,1].mean(), scores[:,1].std()) 
Example 10
Project: sparkonda   Author: moutai   File: test_helper.py    ISC License 5 votes vote down vote up
def d(x):
    from sklearn import datasets
    from sklearn.cross_validation import cross_val_predict
    from sklearn import linear_model
    lr = linear_model.LinearRegression()
    boston = datasets.load_boston()
    y = boston.target

    # cross_val_predict returns an array of the same size as `y` where each entry
    # is a prediction obtained by cross validated:
    predicted = cross_val_predict(lr, boston.data, y, cv=10)
    return predicted 
Example 11
Project: linear_neuron   Author: uglyboxer   File: test_cross_validation.py    MIT License 5 votes vote down vote up
def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = cval.KFold(len(boston.target))

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv:
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cval.cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = cval.LeaveOneOut(len(y))
    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cval.cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cval.cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    def bad_cv():
        for i in range(4):
            yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cval.cross_val_predict, est, X, y, cv=bad_cv()) 
Example 12
Project: Weiss   Author: WangWenjun559   File: test_cross_validation.py    Apache License 2.0 5 votes vote down vote up
def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = cval.KFold(len(boston.target))

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv:
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cval.cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = cval.LeaveOneOut(len(y))
    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cval.cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cval.cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    def bad_cv():
        for i in range(4):
            yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cval.cross_val_predict, est, X, y, cv=bad_cv()) 
Example 13
Project: Weiss   Author: WangWenjun559   File: test_cross_validation.py    Apache License 2.0 5 votes vote down vote up
def test_cross_val_predict_input_types():
    clf = Ridge()
    # Smoke test
    predictions = cval.cross_val_predict(clf, X, y)
    assert_equal(predictions.shape, (10,))

    # test with multioutput y
    predictions = cval.cross_val_predict(clf, X_sparse, X)
    assert_equal(predictions.shape, (10, 2))

    predictions = cval.cross_val_predict(clf, X_sparse, y)
    assert_array_equal(predictions.shape, (10,))

    # test with multioutput y
    predictions = cval.cross_val_predict(clf, X_sparse, X)
    assert_array_equal(predictions.shape, (10, 2))

    # test with X and y as list
    list_check = lambda x: isinstance(x, list)
    clf = CheckingClassifier(check_X=list_check)
    predictions = cval.cross_val_predict(clf, X.tolist(), y.tolist())

    clf = CheckingClassifier(check_y=list_check)
    predictions = cval.cross_val_predict(clf, X, y.tolist())

    # test with 3d X and
    X_3d = X[:, :, np.newaxis]
    check_3d = lambda x: x.ndim == 3
    clf = CheckingClassifier(check_X=check_3d)
    predictions = cval.cross_val_predict(clf, X_3d, y)
    assert_array_equal(predictions.shape, (10,)) 
Example 14
Project: Weiss   Author: WangWenjun559   File: test_cross_validation.py    Apache License 2.0 5 votes vote down vote up
def test_cross_val_predict_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cval.cross_val_predict(clf, X_df, y_ser) 
Example 15
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_cross_validation.py    Apache License 2.0 5 votes vote down vote up
def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = cval.KFold(len(boston.target))

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv:
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cval.cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = cval.LeaveOneOut(len(y))
    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cval.cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cval.cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    def bad_cv():
        for i in range(4):
            yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cval.cross_val_predict, est, X, y, cv=bad_cv()) 
Example 16
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_cross_validation.py    Apache License 2.0 5 votes vote down vote up
def test_cross_val_predict_input_types():
    clf = Ridge()
    # Smoke test
    predictions = cval.cross_val_predict(clf, X, y)
    assert_equal(predictions.shape, (10,))

    # test with multioutput y
    with ignore_warnings(category=ConvergenceWarning):
        predictions = cval.cross_val_predict(clf, X_sparse, X)
    assert_equal(predictions.shape, (10, 2))

    predictions = cval.cross_val_predict(clf, X_sparse, y)
    assert_array_equal(predictions.shape, (10,))

    # test with multioutput y
    with ignore_warnings(category=ConvergenceWarning):
        predictions = cval.cross_val_predict(clf, X_sparse, X)
    assert_array_equal(predictions.shape, (10, 2))

    # test with X and y as list
    list_check = lambda x: isinstance(x, list)
    clf = CheckingClassifier(check_X=list_check)
    predictions = cval.cross_val_predict(clf, X.tolist(), y.tolist())

    clf = CheckingClassifier(check_y=list_check)
    predictions = cval.cross_val_predict(clf, X, y.tolist())

    # test with 3d X and
    X_3d = X[:, :, np.newaxis]
    check_3d = lambda x: x.ndim == 3
    clf = CheckingClassifier(check_X=check_3d)
    predictions = cval.cross_val_predict(clf, X_3d, y)
    assert_array_equal(predictions.shape, (10,)) 
Example 17
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_cross_validation.py    Apache License 2.0 5 votes vote down vote up
def test_cross_val_predict_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cval.cross_val_predict(clf, X_df, y_ser) 
Example 18
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_cross_validation.py    Apache License 2.0 5 votes vote down vote up
def test_cross_val_predict_sparse_prediction():
    # check that cross_val_predict gives same result for sparse and dense input
    X, y = make_multilabel_classification(n_classes=2, n_labels=1,
                                          allow_unlabeled=False,
                                          return_indicator=True,
                                          random_state=1)
    X_sparse = csr_matrix(X)
    y_sparse = csr_matrix(y)
    classif = OneVsRestClassifier(SVC(kernel='linear'))
    preds = cval.cross_val_predict(classif, X, y, cv=10)
    preds_sparse = cval.cross_val_predict(classif, X_sparse, y_sparse, cv=10)
    preds_sparse = preds_sparse.toarray()
    assert_array_almost_equal(preds_sparse, preds) 
Example 19
Project: Ambiruptor   Author: Ambiruptor   File: core.py    GNU General Public License v3.0 5 votes vote down vote up
def cross_validation_scores(self, train_data, cv=10):
        """Wrapper of the scikit-learn cross validation function."""
        
        y_correct = train_data.senses
        y_predict = cross_validation.cross_val_predict(
            self.model,
            train_data.data,
            train_data.senses,
            cv=cv)
        """
        result = dict()
        for s in list(scores[0]):
            result[s] = np.average([ score[s] for score in scores ])"""
        return Learner.scoring(y_predict, y_correct) 
Example 20
Project: jamespy_py3   Author: jskDr   File: pdlearn.py    MIT License 5 votes vote down vote up
def cross_val_predict(self, fname_out = None):
		"""
		This function is added to save the result of the predicted values. 
		"""
		yp = cross_validation.cross_val_predict( self.best_estimator_, self.X, self.y)

		idx = pd.MultiIndex.from_product([['yp'], self.df['y'].columns])
		yp_df = pd.DataFrame( yp, index = self.df.index, columns=idx)
		df_out_org = self.df.merge( yp_df, left_index = True, right_index = True)
		self.df_out = DataFrame( df_out_org[["X", "y", "yp", "param"]])
		# df_out = pd.concat([self.df, yp_df], axis = 1)

		self.df_out.to_csv_excel( '_out', self.fname, fname_out)		

		return yp 
Example 21
Project: jamespy_py3   Author: jskDr   File: _jgrid_r0.py    MIT License 5 votes vote down vote up
def cv_BIKE_Ridge( A_list, yV, alpha = 0.5, XX = None, n_folds = 5, n_jobs = -1, grid_std = None):

	clf = binary_model.BIKE_Ridge( A_list, XX, alpha = alpha)
	ln = A_list[0].shape[0] # ls is the number of molecules.
	kf_n = cross_validation.KFold( ln, n_folds=n_folds, shuffle=True)

	AX_idx = np.array([list(range( ln))]).T
	yV_pred = cross_validation.cross_val_predict( clf, AX_idx, yV, cv = kf_n, n_jobs = n_jobs)

	print('The prediction output using cross-validation is given by:')
	jutil.cv_show( yV, yV_pred, grid_std = grid_std)

	return yV_pred 
Example 22
Project: jamespy_py3   Author: jskDr   File: jgrid (james-90X3A's conflicted copy 2016-04-21).py    MIT License 5 votes vote down vote up
def cv_Ridge_BIKE( A_list, yV, XX = None, alpha = 0.5, n_folds = 5, n_jobs = -1, grid_std = None):

	clf = binary_model.BIKE_Ridge( A_list, XX, alpha = alpha)
	ln = A_list[0].shape[0] # ls is the number of molecules.
	kf_n = cross_validation.KFold( ln, n_folds=n_folds, shuffle=True)

	AX_idx = np.array([list(range( ln))]).T
	yV_pred = cross_validation.cross_val_predict( clf, AX_idx, yV, cv = kf_n, n_jobs = n_jobs)

	print('The prediction output using cross-validation is given by:')
	jutil.cv_show( yV, yV_pred, grid_std = grid_std)

	return yV_pred 
Example 23
Project: jamespy_py3   Author: jskDr   File: jgrid (james-90X3A's conflicted copy 2016-04-21).py    MIT License 5 votes vote down vote up
def cv_BIKE_Ridge( A_list, yV, alpha = 0.5, XX = None, n_folds = 5, n_jobs = -1, grid_std = None):

	clf = binary_model.BIKE_Ridge( A_list, XX, alpha = alpha)
	ln = A_list[0].shape[0] # ls is the number of molecules.
	kf_n = cross_validation.KFold( ln, n_folds=n_folds, shuffle=True)

	AX_idx = np.array([list(range( ln))]).T
	yV_pred = cross_validation.cross_val_predict( clf, AX_idx, yV, cv = kf_n, n_jobs = n_jobs)

	print('The prediction output using cross-validation is given by:')
	jutil.cv_show( yV, yV_pred, grid_std = grid_std)

	return yV_pred 
Example 24
Project: jamespy_py3   Author: jskDr   File: pdlearn.py    MIT License 5 votes vote down vote up
def cross_val_predict(self, fname_out = None):
		"""
		This function is added to save the result of the predicted values. 
		"""
		yp = cross_validation.cross_val_predict( self.best_estimator_, self.X, self.y)

		idx = pd.MultiIndex.from_product([['yp'], self.df['y'].columns])
		yp_df = pd.DataFrame( yp, index = self.df.index, columns=idx)
		df_out_org = self.df.merge( yp_df, left_index = True, right_index = True)
		self.df_out = DataFrame( df_out_org[["X", "y", "yp", "param"]])
		# df_out = pd.concat([self.df, yp_df], axis = 1)

		self.df_out.to_csv_excel( '_out', self.fname, fname_out)		

		return yp