from __future__ import print_function from __future__ import division import numpy as np from scipy import sparse from scipy.optimize import OptimizeResult, nnls from sklearn.utils import check_X_y from sklearn.base import RegressorMixin from sklearn.exceptions import ConvergenceWarning from sklearn.utils.extmath import safe_sparse_dot from sklearn.linear_model.base import LinearModel def _rescale_data(X, y, sample_weight): """Rescale data so as to support sample_weight""" n_samples = X.shape[0] sample_weight = sample_weight * np.ones(n_samples) sample_weight = np.sqrt(sample_weight) sw_matrix = sparse.dia_matrix((sample_weight, 0), shape=(n_samples, n_samples)) X = safe_sparse_dot(sw_matrix, X) y = safe_sparse_dot(sw_matrix, y) return X, y class NonNegativeLinearRegression(LinearModel, RegressorMixin): """Non-negative least squares linear regression. Parameters ---------- fit_intercept : boolean, optional whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (e.g. data is expected to be already centered). normalize : boolean, optional, default False If True, the regressors X will be normalized before regression. This parameter is ignored when `fit_intercept` is set to False. When the regressors are normalized, note that this makes the estimated coefficients more robust and almost independent of the number of samples. The same property is not valid for standardized data. However, if you wish to standardize, please use `preprocessing.StandardScaler` before calling `fit` on an estimator with `normalize=False`. copy_X : boolean, optional, default True If True, X will be copied; else, it may be overwritten. Attributes ---------- coef_ : array, shape (n_features, ) Estimated coefficients for the linear regression problem. intercept_ : array Independent term in the linear model. opt_result_ : OptimizeResult Result of non-negative least squares optimization """ def __init__(self, fit_intercept=True, normalize=False, copy_X=True): self.fit_intercept = fit_intercept self.normalize = normalize self.copy_X = copy_X def fit(self, X, y, sample_weight=None): """Fit non-negative linear model. Parameters ---------- X : numpy array or sparse matrix of shape [n_samples, n_features] Training data y : numpy array of shape [n_samples,] Target values sample_weight : numpy array of shape [n_samples] Individual weights for each sample Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, y_numeric=True, multi_output=False) if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1: raise ValueError("Sample weights must be 1D array or scalar") X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) self.coef_, result = nnls(X, y.squeeze()) if np.all(self.coef_ == 0): raise ConvergenceWarning("All coefficients estimated to be zero in" " the non-negative least squares fit.") self._set_intercept(X_offset, y_offset, X_scale) self.opt_result_ = OptimizeResult(success=True, status=0, x=self.coef_, fun=result) return self