Python sklearn.linear_model.RidgeCV() Examples

The following are 21 code examples of sklearn.linear_model.RidgeCV(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.linear_model , or try the search function .
Example #1
Source File: spark-sklearn-app.py    From Data_Analytics_with_Hadoop with MIT License 7 votes vote down vote up
def build(path):
    """
    Computes a linear regression using Ridge regularization.
    """
    print "Building the linear model using Ridge regression"
    start = time.time()

    # Load the data, the target is the last column.
    data  = np.loadtxt(path, delimiter=',')
    y = data[:,-1]
    X = data[:,0:-1]

    # Instantiate and fit the model.
    model = RidgeCV()
    model.fit(X, y)

    print "Finished training the linear model in {:0.3f} seconds".format(time.time() - start)
    return model 
Example #2
Source File: 02_fit_predict_plot_employee_salaries.py    From dirty_cat with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def make_pipeline(encoding_method):
    # static transformers from the other columns
    transformers = [(enc + '_' + col, encoders_dict[enc], [col])
                    for col, enc in clean_columns.items()]
    # adding the encoded column
    transformers += [(encoding_method, encoders_dict[encoding_method],
                      [dirty_column])]
    pipeline = Pipeline([
        # Use ColumnTransformer to combine the features
        ('union', ColumnTransformer(
            transformers=transformers,
            remainder='drop')),
        ('scaler', StandardScaler(with_mean=False)),
        ('clf', RidgeCV())
    ])
    return pipeline


#########################################################################
# Fitting each encoding methods with a RidgeCV
# --------------------------------------------
# Eventually, we loop over the different encoding methods,
# instantiate each time a new pipeline, fit it
# and store the returned cross-validation score: 
Example #3
Source File: beamformers_electrodes_tweak.py    From mmvt with GNU General Public License v3.0 6 votes vote down vote up
def plot_reconstruction_for_different_freqs(event_id, electrode, two_electrodes, from_t, to_t, time_split,
        gk_sigma=3, bipolar=True, electrodes_positive=False, electrodes_normalize=False, njobs=4):
    cond = utils.first_key(event_id)
    electrodes = get_all_electrodes_names(bipolar)
    elec_data = load_electrodes_data(event_id, bipolar, electrodes, from_t, to_t,
        subtract_min=electrodes_positive, normalize_data=electrodes_normalize)
    meg_data_dic = load_all_dics(freqs_bin, event_id, bipolar, electrodes, from_t, to_t, gk_sigma, njobs=njobs)
    reconstruct_meg(event_id, [electrode], from_t, to_t, time_split, plot_results=True, all_meg_data=meg_data_dic,
        elec_data=elec_data, title='{}: {}'.format(cond, electrode))
    reconstruct_meg(event_id, two_electrodes, from_t, to_t, time_split, optimization_method='RidgeCV',
        plot_results=True, all_meg_data=meg_data_dic,elec_data=elec_data,
        title='{}: {} and {}'.format(cond, two_electrodes[0], two_electrodes[1]))
    freqs_inds = np.array([2, 6, 9, 10, 11, 15, 16])
    plt.plot(elec_data[electrode][cond])
    plt.plot(meg_data_dic[electrode][freqs_inds, :].T, '--')
    plt.legend([electrode] + np.array(CSD_FREQS)[freqs_inds].tolist())
    # plt.title('{}: {}'.format(cond, electrode))
    plt.show() 
Example #4
Source File: ewa.py    From pycobra with MIT License 6 votes vote down vote up
def load_default(self, machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm']):
        """
        Loads 4 different scikit-learn regressors by default.

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.

        """
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue 
Example #5
Source File: spark-sklearn-app.py    From hadoop-fundamentals with MIT License 6 votes vote down vote up
def build(path):
    """
    Computes a linear regression using Ridge regularization.
    """
    print "Building the linear model using Ridge regression"
    start = time.time()

    # Load the data, the target is the last column.
    data  = np.loadtxt(path, delimiter=',')
    y = data[:,-1]
    X = data[:,0:-1]

    # Instantiate and fit the model.
    model = RidgeCV()
    model.fit(X, y)

    print "Finished training the linear model in {:0.3f} seconds".format(time.time() - start)
    return model 
Example #6
Source File: rapm_adjust.py    From NBA_Tutorials with MIT License 5 votes vote down vote up
def lambda_to_alpha(lambda_value, samples):
    return (lambda_value * samples) / 2.0


# Convert RidgeCV alpha back into a lambda value 
Example #7
Source File: poiRegression.py    From python-urbanPlanning with MIT License 5 votes vote down vote up
def regularization_m(X_re,y_re,predFeat=False):
    n_alphas=200
    alphas=np.logspace(1, 8, n_alphas)
    coefs=[]
    n=0
    for a in alphas:
        n+=1
        ridge=Ridge(alpha=a, fit_intercept=False)
        ridge.fit(X_re,y_re)
        coefs.append(ridge.coef_)
#    print(n,coefs)
    ax = plt.gca()
    ax.plot(alphas, coefs)
    ax.set_xscale('log')
    ax.set_xlim(ax.get_xlim()[::-1])  # reverse axis
    plt.xlabel('alpha')
    plt.ylabel('weights')
    plt.title('Ridge coefficients as a function of the regularization')
    plt.axis('tight')
    plt.show()   
        
    ridge=Ridge(alpha=28.6)  #Ridge预先确定a值
    ridge.fit(X_re,y_re)
    print(ridge.coef_,ridge.intercept_,ridge.alpha)
    
    redgecv=RidgeCV(alphas=alphas) #输入多个a值,模型自行择优选取
    redgecv.fit(X_re,y_re)
    print(redgecv.coef_,redgecv.intercept_,redgecv.alpha_)
    
    lasso=Lasso(alpha=0.01)
    lasso.fit(X_re,y_re)
    print(lasso.coef_,lasso.intercept_ ,lasso.alpha)
    
    elasticnet=ElasticNet(alpha=1.0,l1_ratio=0.5)
    elasticnet.fit(X_re,y_re)
    print(elasticnet.coef_,elasticnet.intercept_ ,elasticnet.alpha)
    
    if type(predFeat).__module__=='numpy':
        return redgecv.predict(predFeat) 
Example #8
Source File: beamformers_electrodes_tweak.py    From mmvt with GNU General Public License v3.0 5 votes vote down vote up
def find_best_freqs_subset(event_id, bipolar, freqs_bins, from_t, to_t, time_split, combs,
        optimization_method='RidgeCV', optimization_params={}, k=3, gk_sigma=3, njobs=6):
    freqs_bins = sorted(freqs_bins)
    all_electrodes = get_all_electrodes_names(bipolar)
    elec_data = load_electrodes_data(event_id, bipolar, all_electrodes, from_t, to_t,
            subtract_min=False, normalize_data=False)
    meg_data_dic = load_all_dics(freqs_bins, event_id, bipolar, all_electrodes, from_t, to_t, gk_sigma,
        dont_calc_new_csd=True, njobs=njobs)

    uuid = utils.rand_letters(5)
    results_fol = get_results_fol(optimization_method)
    partial_results_fol = os.path.join(results_fol, 'best_freqs_subset_{}'.format(uuid))
    utils.make_dir(results_fol)
    utils.make_dir(partial_results_fol)

    cond = utils.first_key(event_id)
    all_freqs_bins_subsets = list(utils.superset(freqs_bins))
    random.shuffle(all_freqs_bins_subsets)
    N = len(all_freqs_bins_subsets)
    print('There are {} freqs subsets'.format(N))
    all_freqs_bins_subsets_chunks = utils.chunks(all_freqs_bins_subsets, int(len(all_freqs_bins_subsets) / njobs))
    params = [Bunch(event_id=event_id, bipolar=bipolar, freqs_bins_chunks=freqs_bins_subsets_chunk, cond=cond,
            from_t=from_t, to_t=to_t, freqs_bins=freqs_bins, partial_results_fol=partial_results_fol,
            time_split=time_split, only_sig_electrodes=False, only_from_same_lead=True, electrodes_positive=False,
            electrodes_normalize=False, gk_sigma=gk_sigma, k=k, do_plot_results=False, do_save_partial_results=False,
            optimization_params=optimization_params, check_only_pred_score=True, njobs=1, N=int(N / njobs),
            elec_data=elec_data, meg_data_dic=meg_data_dic, all_electrodes=all_electrodes,
            optimization_method=optimization_method, error_calc_method='rol_corr', error_threshold=30, combs=combs) for
            freqs_bins_subsets_chunk in all_freqs_bins_subsets_chunks]
    results = utils.run_parallel(_find_best_freqs_subset_parallel, params, njobs)
    all_results = []
    for chunk_results in results:
        all_results.extend(chunk_results)
    params_suffix = utils.params_suffix(optimization_params)
    output_file = os.path.join(results_fol, 'best_freqs_subset_{}_{}_{}{}.pkl'.format(cond, uuid, k, params_suffix))
    print('saving results to {}'.format(output_file))
    utils.save((chunk_results, freqs_bins), output_file) 
Example #9
Source File: beamformers_electrodes_tweak.py    From mmvt with GNU General Public License v3.0 5 votes vote down vote up
def calc_optimization_features(optimization_method, freqs_bins, cond, meg_data_dic, elec_data, electrodes, from_t, to_t, optimization_params={}):
    # scorer = make_scorer(rol_corr, False)
    cv_parameters = []
    if optimization_method in ['Ridge', 'RidgeCV', 'Lasso', 'LassoCV', 'ElasticNet', 'ElasticNetCV']:
        # vstack all meg data, such that X.shape = T*n X F, where n is the electrodes num
        # Y is T*n * 1
        X = np.hstack((meg_data_dic[electrode][:, from_t:to_t] for electrode in electrodes))
        Y = np.hstack((elec_data[electrode][cond][from_t:to_t] for electrode in electrodes))
        funcs_dic = {'Ridge': Ridge(alpha=0.1), 'RidgeCV':RidgeCV(np.logspace(0, -10, 11)), # scoring=scorer
            'Lasso': Lasso(alpha=1.0/X.shape[0]), 'LassoCV':LassoCV(alphas=np.logspace(0, -10, 11), max_iter=1000),
            'ElasticNetCV': ElasticNetCV(alphas= np.logspace(0, -10, 11), l1_ratio=np.linspace(0, 1, 11))}
        clf = funcs_dic[optimization_method]
        clf.fit(X.T, Y)
        p = clf.coef_
        if len(p) != len(freqs_bins):
            raise Exception('{} (len(clf.coef)) != {} (len(freqs_bin))!!!'.format(len(p), len(freqs_bins)))
        if optimization_method in ['RidgeCV', 'LassoCV']:
            cv_parameters = clf.alpha_
        elif optimization_method == 'ElasticNetCV':
            cv_parameters = [clf.alpha_, clf.l1_ratio_]
        args = [(meg_pred(p, meg_data_dic[electrode][:, from_t:to_t]), elec_data[electrode][cond][from_t:to_t]) for electrode in electrodes]
        p0 = leastsq(post_ridge_err_func, [1], args=args, maxfev=0)[0]
        p = np.hstack((p0, p))
    elif optimization_method in ['leastsq', 'dtw', 'minmax', 'diff_rms', 'rol_corr']:
        args = ([(meg_data_dic[electrode][:, from_t:to_t], elec_data[electrode][cond][from_t:to_t]) for electrode in electrodes], optimization_params)
        p0 = np.ones((1, len(freqs_bins)+1))
        funcs_dic = {'leastsq': partial(leastsq, func=err_func, x0=p0, args=args),
                     'dtw': partial(minimize, fun=dtw_err_func, x0=p0, args=args),
                     'minmax': partial(minimize, fun=minmax_err_func, x0=p0, args=args),
                     'diff_rms': partial(minimize, fun=min_diff_rms_err_func, x0=p0, args=args),
                     'rol_corr': partial(minimize, fun=max_rol_corr, x0=p0, args=args)}
        res = funcs_dic[optimization_method]()
        p = res[0] if optimization_method=='leastsq' else res.x
        cv_parameters = optimization_params
    else:
        raise Exception('Unknown optimization_method! {}'.format(optimization_method))
    return p, cv_parameters 
Example #10
Source File: test_linear_model.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.linear_model.ARDRegression, lm.ARDRegression)
        self.assertIs(df.linear_model.BayesianRidge, lm.BayesianRidge)
        self.assertIs(df.linear_model.ElasticNet, lm.ElasticNet)
        self.assertIs(df.linear_model.ElasticNetCV, lm.ElasticNetCV)

        self.assertIs(df.linear_model.HuberRegressor, lm.HuberRegressor)

        self.assertIs(df.linear_model.Lars, lm.Lars)
        self.assertIs(df.linear_model.LarsCV, lm.LarsCV)
        self.assertIs(df.linear_model.Lasso, lm.Lasso)
        self.assertIs(df.linear_model.LassoCV, lm.LassoCV)
        self.assertIs(df.linear_model.LassoLars, lm.LassoLars)
        self.assertIs(df.linear_model.LassoLarsCV, lm.LassoLarsCV)
        self.assertIs(df.linear_model.LassoLarsIC, lm.LassoLarsIC)

        self.assertIs(df.linear_model.LinearRegression, lm.LinearRegression)
        self.assertIs(df.linear_model.LogisticRegression, lm.LogisticRegression)
        self.assertIs(df.linear_model.LogisticRegressionCV, lm.LogisticRegressionCV)
        self.assertIs(df.linear_model.MultiTaskLasso, lm.MultiTaskLasso)
        self.assertIs(df.linear_model.MultiTaskElasticNet, lm.MultiTaskElasticNet)
        self.assertIs(df.linear_model.MultiTaskLassoCV, lm.MultiTaskLassoCV)
        self.assertIs(df.linear_model.MultiTaskElasticNetCV, lm.MultiTaskElasticNetCV)

        self.assertIs(df.linear_model.OrthogonalMatchingPursuit, lm.OrthogonalMatchingPursuit)
        self.assertIs(df.linear_model.OrthogonalMatchingPursuitCV, lm.OrthogonalMatchingPursuitCV)
        self.assertIs(df.linear_model.PassiveAggressiveClassifier, lm.PassiveAggressiveClassifier)
        self.assertIs(df.linear_model.PassiveAggressiveRegressor, lm.PassiveAggressiveRegressor)

        self.assertIs(df.linear_model.Perceptron, lm.Perceptron)
        self.assertIs(df.linear_model.RandomizedLasso, lm.RandomizedLasso)
        self.assertIs(df.linear_model.RandomizedLogisticRegression, lm.RandomizedLogisticRegression)
        self.assertIs(df.linear_model.RANSACRegressor, lm.RANSACRegressor)
        self.assertIs(df.linear_model.Ridge, lm.Ridge)
        self.assertIs(df.linear_model.RidgeClassifier, lm.RidgeClassifier)
        self.assertIs(df.linear_model.RidgeClassifierCV, lm.RidgeClassifierCV)
        self.assertIs(df.linear_model.RidgeCV, lm.RidgeCV)
        self.assertIs(df.linear_model.SGDClassifier, lm.SGDClassifier)
        self.assertIs(df.linear_model.SGDRegressor, lm.SGDRegressor)
        self.assertIs(df.linear_model.TheilSenRegressor, lm.TheilSenRegressor) 
Example #11
Source File: kernelcobra.py    From pycobra with MIT License 5 votes vote down vote up
def load_default(self, machine_list='basic'):
        """
        Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. 
        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded. 
            Default is basic,
        Returns
        -------
        self : returns an instance of self.
        """
        if machine_list == 'basic':
            machine_list = ['tree', 'ridge', 'random_forest', 'svm']
        if machine_list == 'advanced':
            machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd']

        self.estimators_ = {}
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = SVR().fit(self.X_k_, self.y_k_)
                if machine == 'sgd':
                    self.estimators_['sgd'] = linear_model.SGDRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'bayesian_ridge':
                    self.estimators_['bayesian_ridge'] = linear_model.BayesianRidge().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue
        return self 
Example #12
Source File: cobra.py    From pycobra with MIT License 5 votes vote down vote up
def load_default(self, machine_list='basic'):
        """
        Loads 4 different scikit-learn regressors by default. The advanced list adds more machines. 

        Parameters
        ----------
        machine_list: optional, list of strings
            List of default machine names to be loaded.
        Returns
        -------
        self : returns an instance of self.
        """

        if machine_list == 'basic':
            machine_list = ['tree', 'ridge', 'random_forest', 'svm']
        if machine_list == 'advanced':
            machine_list=['lasso', 'tree', 'ridge', 'random_forest', 'svm', 'bayesian_ridge', 'sgd']

        self.estimators_ = {}
        for machine in machine_list:
            try:
                if machine == 'lasso':
                    self.estimators_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'tree':
                    self.estimators_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'ridge':
                    self.estimators_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_)
                if machine == 'random_forest':
                    self.estimators_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'svm':
                    self.estimators_['svm'] = LinearSVR(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'sgd':
                    self.estimators_['sgd'] = linear_model.SGDRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
                if machine == 'bayesian_ridge':
                    self.estimators_['bayesian_ridge'] = linear_model.BayesianRidge().fit(self.X_k_, self.y_k_)
            except ValueError:
                continue
        return self 
Example #13
Source File: rapm_adjust.py    From NBA_Tutorials with MIT License 5 votes vote down vote up
def calculate_rapm(train_x, train_y, possessions, lambdas, name, players):
    # convert our lambdas to alphas
    alphas = [lambda_to_alpha(l, train_x.shape[0]) for l in lambdas]

    # create a 5 fold CV ridgeCV model. Our target data is not centered at 0, so we want to fit to an intercept.
    clf = RidgeCV(alphas=alphas, cv=5, fit_intercept=True, normalize=False)

    # fit our training data
    model = clf.fit(train_x, train_y, sample_weight=possessions)

    # convert our list of players into a mx1 matrix
    player_arr = np.transpose(np.array(players).reshape(1, len(players)))

    # extract our coefficients into the offensive and defensive parts
    coef_offensive_array = np.transpose(model.coef_[:, 0:len(players)])
    coef_defensive_array = np.transpose(model.coef_[:, len(players):])

    # concatenate the offensive and defensive values with the playey ids into a mx3 matrix
    player_id_with_coef = np.concatenate([player_arr, coef_offensive_array, coef_defensive_array], axis=1)
    # build a dataframe from our matrix
    players_coef = pd.DataFrame(player_id_with_coef)
    intercept = model.intercept_

    # apply new column names
    players_coef.columns = ['playerId', '{0}__Off'.format(name), '{0}__Def'.format(name)]

    # Add the offesnive and defensive components together (we should really be weighing this to the number of offensive and defensive possession played as they are often not equal).
    players_coef[name] = players_coef['{0}__Off'.format(name)] + players_coef['{0}__Def'.format(name)]

    # rank the values
    players_coef['{0}_Rank'.format(name)] = players_coef[name].rank(ascending=False)
    players_coef['{0}__Off_Rank'.format(name)] = players_coef['{0}__Off'.format(name)].rank(ascending=False)
    players_coef['{0}__Def_Rank'.format(name)] = players_coef['{0}__Def'.format(name)].rank(ascending=False)

    return players_coef, intercept 
Example #14
Source File: rapm_with_prior.py    From NBA_Tutorials with MIT License 5 votes vote down vote up
def lambda_to_alpha(lambda_value, samples):
    return (lambda_value * samples) / 2.0


# Convert RidgeCV alpha back into a lambda value 
Example #15
Source File: rapm.py    From NBA_Tutorials with MIT License 5 votes vote down vote up
def calculate_rapm(train_x, train_y, possessions, lambdas, name, players):
    # convert our lambdas to alphas
    alphas = [lambda_to_alpha(l, train_x.shape[0]) for l in lambdas]

    # create a 5 fold CV ridgeCV model. Our target data is not centered at 0, so we want to fit to an intercept.
    clf = RidgeCV(alphas=alphas, cv=5, fit_intercept=True, normalize=False)

    # fit our training data
    model = clf.fit(train_x, train_y, sample_weight=possessions)

    # convert our list of players into a mx1 matrix
    player_arr = np.transpose(np.array(players).reshape(1, len(players)))

    # extract our coefficients into the offensive and defensive parts
    coef_offensive_array = np.transpose(model.coef_[:, 0:len(players)])
    coef_defensive_array = np.transpose(model.coef_[:, len(players):])

    # concatenate the offensive and defensive values with the playey ids into a mx3 matrix
    player_id_with_coef = np.concatenate([player_arr, coef_offensive_array, coef_defensive_array], axis=1)
    # build a dataframe from our matrix
    players_coef = pd.DataFrame(player_id_with_coef)
    intercept = model.intercept_

    # apply new column names
    players_coef.columns = ['playerId', '{0}__Off'.format(name), '{0}__Def'.format(name)]

    # Add the offesnive and defensive components together (we should really be weighing this to the number of offensive and defensive possession played as they are often not equal).
    players_coef[name] = players_coef['{0}__Off'.format(name)] + players_coef['{0}__Def'.format(name)]

    # rank the values
    players_coef['{0}_Rank'.format(name)] = players_coef[name].rank(ascending=False)
    players_coef['{0}__Off_Rank'.format(name)] = players_coef['{0}__Off'.format(name)].rank(ascending=False)
    players_coef['{0}__Def_Rank'.format(name)] = players_coef['{0}__Def'.format(name)].rank(ascending=False)

    # add the intercept for reference
    players_coef['{0}__intercept'.format(name)] = intercept[0]

    return players_coef, intercept


# a list of lambdas for cross validation 
Example #16
Source File: rapm.py    From NBA_Tutorials with MIT License 5 votes vote down vote up
def lambda_to_alpha(lambda_value, samples):
    return (lambda_value * samples) / 2.0


# Convert RidgeCV alpha back into a lambda value 
Example #17
Source File: rapm_adjust.py    From NBA_Tutorials with MIT License 5 votes vote down vote up
def calculate_rapm(train_x, train_y, possessions, lambdas, name, players):
    # convert our lambdas to alphas
    alphas = [lambda_to_alpha(l, train_x.shape[0]) for l in lambdas]

    # create a 5 fold CV ridgeCV model. Our target data is not centered at 0, so we want to fit to an intercept.
    clf = RidgeCV(alphas=alphas, cv=5, fit_intercept=True, normalize=False)

    # fit our training data
    model = clf.fit(train_x, train_y, sample_weight=possessions)

    # convert our list of players into a mx1 matrix
    player_arr = np.transpose(np.array(players).reshape(1, len(players)))

    # extract our coefficients into the offensive and defensive parts
    coef_offensive_array = np.transpose(model.coef_[:, 0:len(players)])
    coef_defensive_array = np.transpose(model.coef_[:, len(players):])

    # concatenate the offensive and defensive values with the playey ids into a mx3 matrix
    player_id_with_coef = np.concatenate([player_arr, coef_offensive_array, coef_defensive_array], axis=1)
    # build a dataframe from our matrix
    players_coef = pd.DataFrame(player_id_with_coef)
    intercept = model.intercept_

    # apply new column names
    players_coef.columns = ['playerId', '{0}__Off'.format(name), '{0}__Def'.format(name)]

    # Add the offesnive and defensive components together (we should really be weighing this to the number of offensive and defensive possession played as they are often not equal).
    players_coef[name] = players_coef['{0}__Off'.format(name)] + players_coef['{0}__Def'.format(name)]

    # rank the values
    players_coef['{0}_Rank'.format(name)] = players_coef[name].rank(ascending=False)
    players_coef['{0}__Off_Rank'.format(name)] = players_coef['{0}__Off'.format(name)].rank(ascending=False)
    players_coef['{0}__Def_Rank'.format(name)] = players_coef['{0}__Def'.format(name)].rank(ascending=False)

    return players_coef, intercept 
Example #18
Source File: rapm_adjust.py    From NBA_Tutorials with MIT License 5 votes vote down vote up
def lambda_to_alpha(lambda_value, samples):
    return (lambda_value * samples) / 2.0


# Convert RidgeCV alpha back into a lambda value 
Example #19
Source File: scikit_wrapper.py    From CatLearn with GNU General Public License v3.0 5 votes vote down vote up
def _ridge(self):
        """Function to do ridge regression."""
        # Fit a linear ridge regression model.
        regr = RidgeCV(fit_intercept=True, normalize=True)
        model = regr.fit(X=self.train_matrix, y=self.train_target)
        coeff = regr.coef_

        # Make the linear prediction.
        pred = None
        if self.predict:
            data = model.predict(self.test_matrix)
            pred = get_error(prediction=data,
                             target=self.test_target)['average']

        return coeff, pred 
Example #20
Source File: Booster.py    From Fast-and-Accurate-Least-Mean-Squares-Solvers with MIT License 5 votes vote down vote up
def get_new_clf(solver, folds=3, alphas=100):
    kf=KFold(n_splits=folds,shuffle=False)
    if "linear" == solver:
        clf = linear_model.LinearRegression(fit_intercept=False)
    if "ridge" == solver:
        alphas =  np.arange(1/alphas, 10+ 1/alphas, 10/alphas)
        clf = linear_model.RidgeCV(alphas=alphas, fit_intercept=False, cv=kf)
    elif "lasso" == solver:
        clf=linear_model.LassoCV(n_alphas=alphas, fit_intercept=False, cv=kf)
    elif "elastic" == solver:
        clf = linear_model.ElasticNetCV(n_alphas=alphas, fit_intercept=False, cv=kf)
    return clf 
Example #21
Source File: penalty_utils.py    From SparseSC with MIT License 4 votes vote down vote up
def RidgeCVSolution(M, control_units, controls_as_goals, extra_goals, V, w_pens=None, separate=None):
    import scipy.linalg #superset of np.linalg and also optimized compiled
    from sklearn.linear_model import RidgeCV
    #Could return the weights too
    if separate is None:
        separate = (M.shape[1] > 2) #problems if 1, might be unstable with 2.
    if w_pens is None:
        w_pens = np.logspace(start=-5, stop=5, num=40)
    M_c = M[control_units,:]
    features = np.empty((0,0))
    targets = np.empty((0,))
    n_targets = len(control_units) if controls_as_goals else 0
    if extra_goals is not None:
        n_targets = n_targets + len(extra_goals)
    mse = np.empty((n_targets, len(w_pens)))
    if controls_as_goals:
        for i in range(len(control_units)):
            M_c_i = np.delete(M_c, i, axis=0)
            features_i = (M_c_i*np.sqrt(V)).T #K* x (N0-1) 
            targets_i = ((M_c[i,:]-M_c_i.mean(axis=0))*np.sqrt(V)).T #K*1

            if not separate:
                features = scipy.linalg.block_diag(features, features_i) #pylint: disable=no-member
                targets = np.hstack((targets, targets_i))
            else:
                ridgecvfit_i = RidgeCV(alphas=w_pens, fit_intercept=False, store_cv_values=True).fit(features_i, targets_i)
                mse[i,:] = ridgecvfit_i.cv_values_.mean(axis=0) #as n_samples x n_alphas
    if extra_goals is not None:
        i_offset = len(control_units) if controls_as_goals else 0
        for i, extra_goal in enumerate(extra_goals):
            features_i = (M_c*np.sqrt(V)).T #K* x (N0-1) 
            targets_i = ((M[extra_goal,:]-M_c.mean(axis=0))*np.sqrt(V)).T #K*x1

            if not separate:
                features = scipy.linalg.block_diag(features, features_i) #pylint: disable=no-member
                targets = np.hstack((targets, targets_i))
            else:
                ridgecvfit_i = RidgeCV(alphas=w_pens, fit_intercept=False, store_cv_values=True).fit(features_i, targets_i)
                mse[i+i_offset,:] = ridgecvfit_i.cv_values_.mean(axis=0) #as n_samples x n_alphas

    if not separate:
        ridgecvfit = RidgeCV(alphas=w_pens, fit_intercept=False).fit(features, targets) #Use the generalized cross-validation
        best_w_pen = ridgecvfit.alpha_
    else:
        best_w_pen = w_pens[mse.mean(axis=0).argmin()]
    #print("joint: " + str(joint_best_w_pen) + ". separate: " + str(sep_best_w_pen))
    return best_w_pen