Python sklearn.metrics() Examples
The following are 30
code examples of sklearn.metrics().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn
, or try the search function
.

Example #1
Source File: __init__.py From deepchem with MIT License | 6 votes |
def accuracy_score(y, y_pred): """Compute accuracy score Computes accuracy score for classification tasks. Works for both binary and multiclass classification. Parameters ---------- y: np.ndarray Of shape `(N_samples,)` y_pred: np.ndarray Of shape `(N_samples,)` Returns ------- score: float The fraction of correctly classified samples. A number between 0 and 1. """ y = _ensure_class_labels(y) y_pred = _ensure_class_labels(y_pred) return sklearn.metrics.accuracy_score(y, y_pred)
Example #2
Source File: model_eval.py From healthcareai-py with MIT License | 6 votes |
def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test): """ Given a trained estimator, calculate metrics. Args: trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()` y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions) x_test (numpy.ndarray): A 2d numpy array of the x_test set (features) Returns: dict: A dictionary of metrics objects """ # Get predictions predictions = trained_sklearn_estimator.predict(x_test) # Calculate individual metrics mean_squared_error = skmetrics.mean_squared_error(y_test, predictions) mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions) result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error} return result
Example #3
Source File: auxiliaries.py From Deep-Metric-Learning-Baselines with Apache License 2.0 | 6 votes |
def log(self, main_keys, metric_keys, values): """ Actually log new values in csv and Progress Saver dict internally. Args: main_keys: Main key in which data will be stored. Normally is either 'train' for training metrics or 'val' for validation metrics. metric_keys: Needs to follow the list length of self.progress_saver[main_key(s)]. List of metric keys that are extended with new values. values: Needs to be a list of the same structure as metric_keys. Actual values that are appended. """ if not isinstance(main_keys, list): main_keys = [main_keys] if not isinstance(metric_keys, list): metric_keys = [metric_keys] if not isinstance(values, list): values = [values] #Log data to progress saver dict. for main_key in main_keys: for value, metric_key in zip(values, metric_keys): self.progress_saver[main_key][metric_key].append(value) #Append data to csv. self.csv_loggers[main_key].log(values)
Example #4
Source File: auxiliaries_nofaiss.py From Deep-Metric-Learning-Baselines with Apache License 2.0 | 6 votes |
def log(self, main_keys, metric_keys, values): """ Actually log new values in csv and Progress Saver dict internally. Args: main_keys: Main key in which data will be stored. Normally is either 'train' for training metrics or 'val' for validation metrics. metric_keys: Needs to follow the list length of self.progress_saver[main_key(s)]. List of metric keys that are extended with new values. values: Needs to be a list of the same structure as metric_keys. Actual values that are appended. """ if not isinstance(main_keys, list): main_keys = [main_keys] if not isinstance(metric_keys, list): metric_keys = [metric_keys] if not isinstance(values, list): values = [values] #Log data to progress saver dict. for main_key in main_keys: for value, metric_key in zip(values, metric_keys): self.progress_saver[main_key][metric_key].append(value) #Append data to csv. self.csv_loggers[main_key].log(values)
Example #5
Source File: utils.py From redshells with MIT License | 6 votes |
def optimize_model(task, param_name, test_size: float, binary=False) -> None: x, y = task.create_train_data() def objective(trial): train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=test_size) param = redshells.factory.get_optuna_param(param_name, trial) model = task.create_model() model.set_params(**param) model.fit(train_x, train_y) predictions = model.predict(test_x) if binary: predictions = np.rint(predictions) return 1.0 - sklearn.metrics.accuracy_score(test_y, predictions) study = optuna.create_study() study.optimize(objective, n_trials=100) task.dump(dict(best_params=study.best_params, best_value=study.best_value))
Example #6
Source File: perf_data.py From AMPL with MIT License | 6 votes |
def compute_perf_metrics(self, per_task=False): """Returns the ROC_AUC metrics for each task based on the accumulated predictions. If per_task is False, returns the average ROC AUC over tasks. Args: per_task (bool): Whether to return individual ROC AUC scores for each task Returns: A tuple (roc_auc, std): roc_auc: A numpy array of ROC AUC scores, if per_task is True. Otherwise, a float giving the mean ROC AUC score over tasks. std: Placeholder for an array of standard deviations. Always None for this class. """ roc_auc_scores = self.perf_metrics[0] if per_task or self.num_tasks == 1: return (roc_auc_scores, None) else: return (roc_auc_scores.mean(), None)
Example #7
Source File: catalogueconstructor.py From tridesclous with MIT License | 6 votes |
def trash_small_cluster(self, **kargs): cleancluster.trash_small_cluster(self, **kargs) #~ def compute_spike_waveforms_similarity(self, method='cosine_similarity', size_max = 1e7): #~ """This compute the similarity spike by spike. #~ """ #~ spike_waveforms_similarity = None #~ if self.some_waveforms is not None: #~ wf = self.some_waveforms #~ wf = wf.reshape(wf.shape[0], -1) #~ if wf.size<size_max: #~ spike_waveforms_similarity = metrics.compute_similarity(wf, method) #~ if spike_waveforms_similarity is None: #~ self.arrays.detach_array('spike_waveforms_similarity') #~ self.spike_waveforms_similarity = None #~ else: #~ self.arrays.add_array('spike_waveforms_similarity', spike_waveforms_similarity.astype('float32'), self.memory_mode) #~ return self.spike_waveforms_similarity
Example #8
Source File: catalogueconstructor.py From tridesclous with MIT License | 6 votes |
def compute_cluster_similarity(self, method='cosine_similarity_with_max'): if self.centroids_median is None: self.compute_all_centroid() #~ t1 = time.perf_counter() labels = self.cluster_labels mask = labels>=0 wfs = self.centroids_median[mask, :, :] wfs = wfs.reshape(wfs.shape[0], -1) if wfs.size == 0: cluster_similarity = None else: cluster_similarity = metrics.cosine_similarity_with_max(wfs) if cluster_similarity is None: self.arrays.detach_array('cluster_similarity') self.cluster_similarity = None else: self.arrays.add_array('cluster_similarity', cluster_similarity.astype('float32'), self.memory_mode) #~ t2 = time.perf_counter() #~ print('compute_cluster_similarity', t2-t1)
Example #9
Source File: catalogueconstructor.py From tridesclous with MIT License | 6 votes |
def compute_spike_silhouette(self, size_max=1e7): #~ t1 = time.perf_counter() spike_silhouette = None #~ wf = self.some_waveforms if self.some_peaks_index is not None: wf = self.get_some_waveforms(peaks_index=self.some_peaks_index) wf = wf.reshape(wf.shape[0], -1) labels = self.all_peaks['cluster_label'][self.some_peaks_index] if wf.size<size_max: spike_silhouette = metrics.compute_silhouette(wf, labels, metric='euclidean') if spike_silhouette is None: self.arrays.detach_array('spike_silhouette') self.spike_silhouette = None else: self.arrays.add_array('spike_silhouette', spike_silhouette.astype('float32'), self.memory_mode) #~ t2 = time.perf_counter() #~ print('compute_spike_silhouette', t2-t1)
Example #10
Source File: scoring.py From skorch with BSD 3-Clause "New" or "Revised" License | 6 votes |
def convert_sklearn_metric_function(scoring): """If ``scoring`` is a sklearn metric function, convert it to a sklearn scorer and return it. Otherwise, return ``scoring`` unchanged.""" if callable(scoring): module = getattr(scoring, '__module__', None) # those are scoring objects returned by make_scorer starting # from sklearn 0.22 scorer_names = ('_PredictScorer', '_ProbaScorer', '_ThresholdScorer') if ( hasattr(module, 'startswith') and module.startswith('sklearn.metrics.') and not module.startswith('sklearn.metrics.scorer') and not module.startswith('sklearn.metrics.tests.') and not scoring.__class__.__name__ in scorer_names ): return make_scorer(scoring) return scoring
Example #11
Source File: lens.py From sakmapper with MIT License | 6 votes |
def apply_lens(df, lens='pca', dist='euclidean', n_dim=2, **kwargs): """ input: N x F dataframe of observations output: N x n_dim image of input data under lens function """ if n_dim != 2: raise 'error: image of data set must be two-dimensional' if dist not in ['euclidean', 'correlation']: raise 'error: only euclidean and correlation distance metrics are supported' if lens == 'pca' and dist != 'euclidean': raise 'error: PCA requires the use of euclidean distance metric' if lens == 'pca': df_lens = pd.DataFrame(decomposition.PCA(n_components=n_dim, **kwargs).fit_transform(df), df.index) elif lens == 'mds': D = metrics.pairwise.pairwise_distances(df, metric=dist) df_lens = pd.DataFrame(manifold.MDS(n_components=n_dim, **kwargs).fit_transform(D), df.index) elif lens == 'neighbor': D = metrics.pairwise.pairwise_distances(df, metric=dist) df_lens = pd.DataFrame(manifold.SpectralEmbedding(n_components=n_dim, **kwargs).fit_transform(D), df.index) else: raise 'error: only PCA, MDS, neighborhood lenses are supported' return df_lens
Example #12
Source File: metric_utils.py From neuqe with GNU General Public License v3.0 | 6 votes |
def score(metrics, pred, ref): """ Function to score and print custom metrics """ score_dict = OrderedDict() if metrics: for metric in metrics: if metric == 'pc': score_dict[metric] = pearson_correlation(pred,ref) elif metric == 'mae': score_dict[metric] = mean_absolute_error(pred, ref) elif metric == 'mse': score_dict[metric] = mean_squared_error(pred, ref) elif metric == 'rmse': score_dict[metric] = root_mean_squared_error(pred, ref) else: logger.error('Invalid metric: %s',metric) return score_dict
Example #13
Source File: sklearns.py From pytorch-lightning with Apache License 2.0 | 6 votes |
def __init__( self, metric_name: str, reduce_group: Any = group.WORLD, reduce_op: Any = ReduceOp.SUM, **kwargs, ): """ Args: metric_name: the metric name to import and compute from scikit-learn.metrics reduce_group: the process group for DDP reduces (only needed for DDP training). Defaults to all processes (world) reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). Defaults to sum. **kwargs: additonal keyword arguments (will be forwarded to metric call) """ super().__init__(name=metric_name, reduce_group=reduce_group, reduce_op=reduce_op) self.metric_kwargs = kwargs lightning_logger.debug( f'Metric {self.__class__.__name__} is using Sklearn as backend, meaning that' ' every metric call will cause a GPU synchronization, which may slow down your code' )
Example #14
Source File: scoring.py From professional-services with Apache License 2.0 | 6 votes |
def analyze(probas, target): """Analyzes predictions and returns results. Computes different metrics (specified by `constants.METRICS`) comparing predictions to true labels. Args: probas: `np.array` with predicted probabilities. target: `np.array` of `int` with true labels. Returns: Dictionary of `str` to `float` mapping metric names to the corresponding scores. """ results = {} for metric_type, sub_metrics in _METRICS.iteritems(): for metric_name in sub_metrics: metric = getattr(metrics, metric_name) results[metric_name] = metric( target, (probas if metric_type == _CONTINUOUS_TYPE else probas > _ACCURACY_THRESHOLD)) return results
Example #15
Source File: model.py From FeatureHub with MIT License | 6 votes |
def compute_metrics_cv(self, X, Y): """Compute cross-validated metrics. Trains this model on data X with labels Y. Returns a MetricList with the name, scoring type, and value for each Metric. Note that these values may be numpy floating points, and should be converted prior to insertion in a database. Parameters ---------- X : numpy array-like or pd.DataFrame data Y : numpy array-like or pd.DataFrame or pd.DataSeries labels """ scorings, scorings_ = self._get_scorings() # compute scores scores = self.cv_score_mean(X, Y, scorings_) # unpack into MetricList metric_list = self.scores_to_metriclist(scorings, scores) return metric_list
Example #16
Source File: model.py From FeatureHub with MIT License | 6 votes |
def compute_metrics_train_test(self, X, Y, n): """Compute metrics on test set. """ X, Y = Model._format_matrices(X, Y) X_train, Y_train = X[:n], Y[:n] X_test, Y_test = X[n:], Y[n:] scorings, scorings_ = self._get_scorings() # Determine binary/multiclass classification classes = np.unique(Y) params = self._get_params(classes) # fit model on entire training set self.model.fit(X_train, Y_train) scores = {} for scoring in scorings_: scores[scoring] = self._do_scoring(scoring, params, self.model, X_test, Y_test) metric_list = self.scores_to_metriclist(scorings, scores) return metric_list
Example #17
Source File: baselines.py From AirBnbPricePrediction with MIT License | 6 votes |
def print_evaluation_metrics(trained_model, trained_model_name, X_test, y_test): print('--------- For Model: ', trained_model_name, ' ---------\n') predicted_values = trained_model.predict(X_test) print("Mean absolute error: ", metrics.mean_absolute_error(y_test, predicted_values)) print("Median absolute error: ", metrics.median_absolute_error(y_test, predicted_values)) print("Mean squared error: ", metrics.mean_squared_error( y_test, predicted_values)) print("R2: ", metrics.r2_score(y_test, predicted_values)) plt.scatter(y_test, predicted_values, color='black') # plt.plot(x, y_pred, color='blue', linewidth=3) plt.title(trained_model_name) plt.xlabel('$y_{test}$') plt.ylabel('$y_{predicted}/y_{test}$') plt.savefig('%s.png' %trained_model_name, bbox_inches='tight') print("---------------------------------------\n")
Example #18
Source File: baselines.py From AirBnbPricePrediction with MIT License | 6 votes |
def print_evaluation_metrics2(trained_model, trained_model_name, X_test, y_test): print('--------- For Model: ', trained_model_name, ' --------- (Train Data)\n') predicted_values = trained_model.predict(X_test) print("Mean absolute error: ", metrics.mean_absolute_error(y_test, predicted_values)) print("Median absolute error: ", metrics.median_absolute_error(y_test, predicted_values)) print("Mean squared error: ", metrics.mean_squared_error( y_test, predicted_values)) print("R2: ", metrics.r2_score(y_test, predicted_values)) plt.scatter(y_test, predicted_values/y_test, color='black') # plt.plot(x, y_pred, color='blue', linewidth=3) plt_name = trained_model_name + " (Train Data)" plt.title(plt_name) plt.xlabel('$y_{test}$') plt.ylabel('$y_{predicted}/y_{test}$') plt.savefig('%s.png' %plt_name, bbox_inches='tight') print("---------------------------------------\n")
Example #19
Source File: baselines.py From AirBnbPricePrediction with MIT License | 6 votes |
def LinearModel(X_train, y_train, X_val, y_val): regr = linear_model.LinearRegression(n_jobs=int(0.8*n_cores)).fit(X_train, y_train) y_pred = regr.predict(X_val) # print('--------- For Model: LinearRegression --------- \n') # print('Coefficients: \n', regr.coef_) print("Mean squared error: %.2f" % mean_squared_error(y_val, y_pred)) print("R2: ", sklearn.metrics.r2_score(y_val, y_pred)) # ============================================================================= # plt.scatter(y_val, y_pred/y_val, color='black') # # plt.plot(x, y_pred, color='blue', linewidth=3) # plt.title('Linear Model Baseline') # plt.xlabel('$y_{test}$') # plt.ylabel('$y_{predicted}/y_{test}$') # plt.savefig('Linear Model Baseline.png', bbox_inches='tight') # ============================================================================= return
Example #20
Source File: __init__.py From deepchem with MIT License | 5 votes |
def roc_auc_score(y, y_pred): """Area under the receiver operating characteristic curve.""" if y.shape != y_pred.shape: y = _ensure_one_hot(y) return sklearn.metrics.roc_auc_score(y, y_pred)
Example #21
Source File: __init__.py From deepchem with MIT License | 5 votes |
def balanced_accuracy_score(y, y_pred): """Computes balanced accuracy score.""" num_positive = float(np.count_nonzero(y)) num_negative = float(len(y) - num_positive) pos_weight = num_negative / num_positive weights = np.ones_like(y) weights[y != 0] = pos_weight return sklearn.metrics.balanced_accuracy_score( y, y_pred, sample_weight=weights)
Example #22
Source File: model_eval.py From healthcareai-py with MIT License | 5 votes |
def calculate_binary_classification_metrics(trained_sklearn_estimator, x_test, y_test): """ Given a trained estimator, calculate metrics. Args: trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()` x_test (numpy.ndarray): A 2d numpy array of the x_test set (features) y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions) Returns: dict: A dictionary of metrics objects """ # Squeeze down y_test to 1D y_test = np.squeeze(y_test) _validate_predictions_and_labels_are_equal_length(x_test, y_test) # Get binary and probability classification predictions binary_predictions = np.squeeze(trained_sklearn_estimator.predict(x_test)) probability_predictions = np.squeeze(trained_sklearn_estimator.predict_proba(x_test)[:, 1]) # Calculate accuracy accuracy = skmetrics.accuracy_score(y_test, binary_predictions) roc = compute_roc(y_test, probability_predictions) pr = compute_pr(y_test, probability_predictions) # Unpack the roc and pr dictionaries so the metric lookup is easier for plot and ensemble methods return {'accuracy': accuracy, **roc, **pr}
Example #23
Source File: auxiliaries.py From Deep-Metric-Learning-Baselines with Apache License 2.0 | 5 votes |
def set_checkpoint(model, opt, progress_saver, savepath): """ Store relevant parameters (model and progress saver, as well as parameter-namespace). Can be easily extend for other stuff. Args: model: PyTorch network, network whose parameters are to be saved. opt: argparse.Namespace, includes all training-specific parameters progress_saver: subclass of LOGGER-class, contains a running memory of all training metrics. savepath: str, where to save checkpoint. Returns: Nothing! """ torch.save({'state_dict':model.state_dict(), 'opt':opt, 'progress':progress_saver}, savepath)
Example #24
Source File: auxiliaries.py From Deep-Metric-Learning-Baselines with Apache License 2.0 | 5 votes |
def __init__(self, save_path, columns): """ Args: save_path: str, where to store the csv file columns: list of str, name of csv columns under which the resp. metrics are stored. Returns: Nothing! """ self.save_path = save_path self.columns = columns with open(self.save_path, "a") as csv_file: writer = csv.writer(csv_file, delimiter=",") writer.writerow(self.columns)
Example #25
Source File: auxiliaries_nofaiss.py From Deep-Metric-Learning-Baselines with Apache License 2.0 | 5 votes |
def set_checkpoint(model, opt, progress_saver, savepath): """ Store relevant parameters (model and progress saver, as well as parameter-namespace). Can be easily extend for other stuff. Args: model: PyTorch network, network whose parameters are to be saved. opt: argparse.Namespace, includes all training-specific parameters progress_saver: subclass of LOGGER-class, contains a running memory of all training metrics. savepath: str, where to save checkpoint. Returns: Nothing! """ torch.save({'state_dict':model.state_dict(), 'opt':opt, 'progress':progress_saver}, savepath)
Example #26
Source File: grid_graph.py From dgl with Apache License 2.0 | 5 votes |
def distance_sklearn_metrics(z, k=4, metric='euclidean'): """Compute pairwise distances""" #d = sklearn.metrics.pairwise.pairwise_distances(z, metric=metric, n_jobs=-2) d = sklearn.metrics.pairwise.pairwise_distances(z, metric=metric, n_jobs=1) # k-NN idx = np.argsort(d)[:,1:k+1] d.sort() d = d[:,1:k+1] return d, idx
Example #27
Source File: textpro.py From comparable-text-miner with Apache License 2.0 | 5 votes |
def build_features(doc_feat_grams, corpus_feat_grams): doc_grams = set(doc_feat_grams) feats = dict([(word, True) for word in doc_grams if word in corpus_feat_grams]) return feats ################################################################################### # evaluate predicted results using true values. # evaluation metrics are acccuracy, precicion, recall and f-measure.
Example #28
Source File: build_model.py From gordo with GNU Affero General Public License v3.0 | 5 votes |
def metrics_from_list(metric_list: Optional[List[str]] = None) -> List[Callable]: """ Given a list of metric function paths. ie. sklearn.metrics.r2_score or simple function names which are expected to be in the ``sklearn.metrics`` module, this will return a list of those loaded functions. Parameters ---------- metrics: Optional[List[str]] List of function paths to use as metrics for the model Defaults to those specified in :class:`gordo.workflow.config_components.NormalizedConfig` sklearn.metrics.explained_variance_score, sklearn.metrics.r2_score, sklearn.metrics.mean_squared_error, sklearn.metrics.mean_absolute_error Returns ------- List[Callable] A list of the functions loaded Raises ------ AttributeError: If the function cannot be loaded. """ defaults = NormalizedConfig.DEFAULT_CONFIG_GLOBALS["evaluation"]["metrics"] funcs = list() for func_path in metric_list or defaults: func = pydoc.locate(func_path) if func is None: # Final attempt, load function from sklearn.metrics module. funcs.append(getattr(metrics, func_path)) else: funcs.append(func) return funcs
Example #29
Source File: perf_data.py From AMPL with MIT License | 5 votes |
def model_choice_score(self, score_type='r2'): """Computes a score function based on the accumulated predicted values, to be used for selecting the best training epoch and other hyperparameters. Args: score_type (str): The name of the scoring metric to be used, e.g. 'r2', 'neg_mean_squared_error', 'neg_mean_absolute_error', etc.; see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter and sklearn.metrics.SCORERS.keys() for a complete list of options. Larger values of the score function indicate better models. Returns: score (float): A score function value. For multitask models, this will be averaged over tasks. """ ids, pred_vals, stds = self.get_pred_values() real_vals = self.get_real_values(ids) weights = self.get_weights(ids) scores = [] for i in range(self.num_tasks): nzrows = np.where(weights[:,i] != 0)[0] task_real_vals = np.squeeze(real_vals[nzrows,i]) task_pred_vals = np.squeeze(pred_vals[nzrows,i]) scores.append(regr_score_func[score_type](task_real_vals, task_pred_vals)) self.model_score = float(np.mean(scores)) if score_type in loss_funcs: self.model_score = -self.model_score return self.model_score # **************************************************************************************** # class RegressionPerfData
Example #30
Source File: perf_data.py From AMPL with MIT License | 5 votes |
def compute_perf_metrics(self, per_task=False): """Computes the R-squared metrics for each task based on the accumulated values, averaged over training folds, along with standard deviations of the scores. If per_task is False, the scores are averaged over tasks and the overall standard deviation is reported instead. Args: per_task (bool): True if calculating per-task metrics, False otherwise. Returns: A tuple (r2_mean, r2_std): r2_mean: A numpy array of mean R^2 scores for each task, averaged over folds, if per_task is True. Otherwise, a float giving the R^2 score averaged over both folds and tasks. r2_std: A numpy array of standard deviations over folds of R^2 values, if per_task is True. Otherwise, a float giving the overall standard deviation. """ r2_scores = np.stack(self.perf_metrics) if per_task: r2_mean = np.mean(r2_scores, axis=0) r2_std = np.std(r2_scores, axis=0) else: r2_mean = np.mean(r2_scores.flatten()) r2_std = np.std(r2_scores.flatten()) return (r2_mean, r2_std) # ****************************************************************************************