Python sklearn.metrics() Examples

The following are 30 code examples of sklearn.metrics(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn , or try the search function .
Example #1
Source File: baselines.py    From AirBnbPricePrediction with MIT License 6 votes vote down vote up
def LinearModel(X_train, y_train, X_val, y_val):
    regr = linear_model.LinearRegression(n_jobs=int(0.8*n_cores)).fit(X_train, y_train)
    y_pred = regr.predict(X_val)

    # print('--------- For Model: LinearRegression --------- \n')
    # print('Coefficients: \n', regr.coef_)
    print("Mean squared error: %.2f" % mean_squared_error(y_val, y_pred))
    print("R2: ", sklearn.metrics.r2_score(y_val, y_pred))

# =============================================================================
#     plt.scatter(y_val, y_pred/y_val, color='black')
#     # plt.plot(x, y_pred, color='blue', linewidth=3)
#     plt.title('Linear Model Baseline')
#     plt.xlabel('$y_{test}$')
#     plt.ylabel('$y_{predicted}/y_{test}$')
#     plt.savefig('Linear Model Baseline.png', bbox_inches='tight')
# =============================================================================
    
    return 
Example #2
Source File: auxiliaries.py    From Deep-Metric-Learning-Baselines with Apache License 2.0 6 votes vote down vote up
def log(self, main_keys, metric_keys, values):
        """
        Actually log new values in csv and Progress Saver dict internally.
        Args:
            main_keys:      Main key in which data will be stored. Normally is either 'train' for training metrics or 'val' for validation metrics.
            metric_keys:    Needs to follow the list length of self.progress_saver[main_key(s)]. List of metric keys that are extended with new values.
            values:         Needs to be a list of the same structure as metric_keys. Actual values that are appended.
        """
        if not isinstance(main_keys, list):   main_keys = [main_keys]
        if not isinstance(metric_keys, list): metric_keys = [metric_keys]
        if not isinstance(values, list):      values = [values]

        #Log data to progress saver dict.
        for main_key in main_keys:
            for value, metric_key in zip(values, metric_keys):
                self.progress_saver[main_key][metric_key].append(value)

        #Append data to csv.
        self.csv_loggers[main_key].log(values) 
Example #3
Source File: auxiliaries_nofaiss.py    From Deep-Metric-Learning-Baselines with Apache License 2.0 6 votes vote down vote up
def log(self, main_keys, metric_keys, values):
        """
        Actually log new values in csv and Progress Saver dict internally.
        Args:
            main_keys:      Main key in which data will be stored. Normally is either 'train' for training metrics or 'val' for validation metrics.
            metric_keys:    Needs to follow the list length of self.progress_saver[main_key(s)]. List of metric keys that are extended with new values.
            values:         Needs to be a list of the same structure as metric_keys. Actual values that are appended.
        """
        if not isinstance(main_keys, list):   main_keys = [main_keys]
        if not isinstance(metric_keys, list): metric_keys = [metric_keys]
        if not isinstance(values, list):      values = [values]

        #Log data to progress saver dict.
        for main_key in main_keys:
            for value, metric_key in zip(values, metric_keys):
                self.progress_saver[main_key][metric_key].append(value)

        #Append data to csv.
        self.csv_loggers[main_key].log(values) 
Example #4
Source File: model_eval.py    From healthcareai-py with MIT License 6 votes vote down vote up
def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test):
    """
    Given a trained estimator, calculate metrics.

    Args:
        trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()`
        y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions)
        x_test (numpy.ndarray): A 2d numpy array of the x_test set (features)

    Returns:
        dict: A dictionary of metrics objects
    """
    # Get predictions
    predictions = trained_sklearn_estimator.predict(x_test)

    # Calculate individual metrics
    mean_squared_error = skmetrics.mean_squared_error(y_test, predictions)
    mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions)

    result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error}

    return result 
Example #5
Source File: utils.py    From redshells with MIT License 6 votes vote down vote up
def optimize_model(task, param_name, test_size: float, binary=False) -> None:
    x, y = task.create_train_data()

    def objective(trial):
        train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=test_size)
        param = redshells.factory.get_optuna_param(param_name, trial)
        model = task.create_model()
        model.set_params(**param)
        model.fit(train_x, train_y)
        predictions = model.predict(test_x)

        if binary:
            predictions = np.rint(predictions)

        return 1.0 - sklearn.metrics.accuracy_score(test_y, predictions)

    study = optuna.create_study()
    study.optimize(objective, n_trials=100)
    task.dump(dict(best_params=study.best_params, best_value=study.best_value)) 
Example #6
Source File: perf_data.py    From AMPL with MIT License 6 votes vote down vote up
def compute_perf_metrics(self, per_task=False):
        """Returns the ROC_AUC metrics for each task based on the accumulated predictions. If
        per_task is False, returns the average ROC AUC over tasks.
        
        Args:
            per_task (bool): Whether to return individual ROC AUC scores for each task

        Returns:
            A tuple (roc_auc, std):
                roc_auc: A numpy array of ROC AUC scores, if per_task is True. Otherwise,
                         a float giving the mean ROC AUC score over tasks.

                std:     Placeholder for an array of standard deviations. Always None for this class.

        """
        roc_auc_scores = self.perf_metrics[0]
        if per_task or self.num_tasks == 1:
            return (roc_auc_scores, None)
        else:
            return (roc_auc_scores.mean(), None) 
Example #7
Source File: catalogueconstructor.py    From tridesclous with MIT License 6 votes vote down vote up
def trash_small_cluster(self, **kargs):
        cleancluster.trash_small_cluster(self, **kargs)

    #~ def compute_spike_waveforms_similarity(self, method='cosine_similarity', size_max = 1e7):
        #~ """This compute the similarity spike by spike.
        #~ """
        #~ spike_waveforms_similarity = None
        #~ if self.some_waveforms is not None:
            #~ wf = self.some_waveforms
            #~ wf = wf.reshape(wf.shape[0], -1)
            #~ if wf.size<size_max:
                #~ spike_waveforms_similarity = metrics.compute_similarity(wf, method)
        
        #~ if spike_waveforms_similarity is None:
            #~ self.arrays.detach_array('spike_waveforms_similarity')
            #~ self.spike_waveforms_similarity = None
        #~ else:
            #~ self.arrays.add_array('spike_waveforms_similarity', spike_waveforms_similarity.astype('float32'), self.memory_mode)

        #~ return self.spike_waveforms_similarity 
Example #8
Source File: catalogueconstructor.py    From tridesclous with MIT License 6 votes vote down vote up
def compute_cluster_similarity(self, method='cosine_similarity_with_max'):
        if self.centroids_median is None:
            self.compute_all_centroid()
        
        #~ t1 = time.perf_counter()
        
        labels = self.cluster_labels
        mask = labels>=0
        
        wfs = self.centroids_median[mask, :,  :]
        wfs = wfs.reshape(wfs.shape[0], -1)
        
        if wfs.size == 0:
            cluster_similarity = None
        else:
            cluster_similarity = metrics.cosine_similarity_with_max(wfs)

        if cluster_similarity is None:
            self.arrays.detach_array('cluster_similarity')
            self.cluster_similarity = None
        else:
            self.arrays.add_array('cluster_similarity', cluster_similarity.astype('float32'), self.memory_mode)

        #~ t2 = time.perf_counter()
        #~ print('compute_cluster_similarity', t2-t1) 
Example #9
Source File: catalogueconstructor.py    From tridesclous with MIT License 6 votes vote down vote up
def compute_spike_silhouette(self, size_max=1e7):
        #~ t1 = time.perf_counter()
        
        spike_silhouette = None
        #~ wf = self.some_waveforms
        if self.some_peaks_index is not None:
            wf = self.get_some_waveforms(peaks_index=self.some_peaks_index)
            wf = wf.reshape(wf.shape[0], -1)
            labels = self.all_peaks['cluster_label'][self.some_peaks_index]
            if wf.size<size_max:
                spike_silhouette = metrics.compute_silhouette(wf, labels, metric='euclidean')

        if spike_silhouette is None:
            self.arrays.detach_array('spike_silhouette')
            self.spike_silhouette = None
        else:
            self.arrays.add_array('spike_silhouette', spike_silhouette.astype('float32'), self.memory_mode)


        #~ t2 = time.perf_counter()
        #~ print('compute_spike_silhouette', t2-t1) 
Example #10
Source File: scoring.py    From skorch with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def convert_sklearn_metric_function(scoring):
    """If ``scoring`` is a sklearn metric function, convert it to a
    sklearn scorer and return it. Otherwise, return ``scoring`` unchanged."""
    if callable(scoring):
        module = getattr(scoring, '__module__', None)

        # those are scoring objects returned by make_scorer starting
        # from sklearn 0.22
        scorer_names = ('_PredictScorer', '_ProbaScorer', '_ThresholdScorer')
        if (
                hasattr(module, 'startswith') and
                module.startswith('sklearn.metrics.') and
                not module.startswith('sklearn.metrics.scorer') and
                not module.startswith('sklearn.metrics.tests.') and
                not scoring.__class__.__name__ in scorer_names
        ):
            return make_scorer(scoring)
    return scoring 
Example #11
Source File: metric_utils.py    From neuqe with GNU General Public License v3.0 6 votes vote down vote up
def score(metrics, pred, ref):
    """ Function to score and print custom metrics """
    score_dict = OrderedDict()
    if metrics:
        for metric in metrics:
            if metric == 'pc':
                score_dict[metric] = pearson_correlation(pred,ref)
            elif metric == 'mae':
                score_dict[metric] = mean_absolute_error(pred, ref)
            elif metric == 'mse':
                score_dict[metric] = mean_squared_error(pred, ref)
            elif metric == 'rmse':
                score_dict[metric] = root_mean_squared_error(pred, ref)
            else:
                logger.error('Invalid metric: %s',metric)

    return score_dict 
Example #12
Source File: sklearns.py    From pytorch-lightning with Apache License 2.0 6 votes vote down vote up
def __init__(
            self,
            metric_name: str,
            reduce_group: Any = group.WORLD,
            reduce_op: Any = ReduceOp.SUM,
            **kwargs,
    ):
        """
        Args:
            metric_name: the metric name to import and compute from scikit-learn.metrics
            reduce_group: the process group for DDP reduces (only needed for DDP training).
                Defaults to all processes (world)
            reduce_op: the operation to perform during reduction within DDP (only needed for DDP training).
                Defaults to sum.
            **kwargs: additonal keyword arguments (will be forwarded to metric call)
        """
        super().__init__(name=metric_name,
                         reduce_group=reduce_group,
                         reduce_op=reduce_op)

        self.metric_kwargs = kwargs
        lightning_logger.debug(
            f'Metric {self.__class__.__name__} is using Sklearn as backend, meaning that'
            ' every metric call will cause a GPU synchronization, which may slow down your code'
        ) 
Example #13
Source File: scoring.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def analyze(probas, target):
  """Analyzes predictions and returns results.

  Computes different metrics (specified by `constants.METRICS`) comparing
  predictions to true labels.

  Args:
    probas: `np.array` with predicted probabilities.
    target: `np.array` of `int` with true labels.

  Returns:
    Dictionary of `str` to `float` mapping metric names to the corresponding
      scores.
  """

  results = {}
  for metric_type, sub_metrics in _METRICS.iteritems():
    for metric_name in sub_metrics:
      metric = getattr(metrics, metric_name)

      results[metric_name] = metric(
          target,
          (probas if metric_type == _CONTINUOUS_TYPE
           else probas > _ACCURACY_THRESHOLD))
  return results 
Example #14
Source File: model.py    From FeatureHub with MIT License 6 votes vote down vote up
def compute_metrics_cv(self, X, Y):
        """Compute cross-validated metrics.

        Trains this model on data X with labels Y.

        Returns a MetricList with the name, scoring type, and value for each
        Metric. Note that these values may be numpy floating points, and should
        be converted prior to insertion in a database.

        Parameters
        ----------
        X : numpy array-like or pd.DataFrame
            data
        Y : numpy array-like or pd.DataFrame or pd.DataSeries
            labels
        """

        scorings, scorings_ = self._get_scorings()

        # compute scores
        scores = self.cv_score_mean(X, Y, scorings_)

        # unpack into MetricList
        metric_list = self.scores_to_metriclist(scorings, scores)
        return metric_list 
Example #15
Source File: model.py    From FeatureHub with MIT License 6 votes vote down vote up
def compute_metrics_train_test(self, X, Y, n):
        """Compute metrics on test set.
        """

        X, Y = Model._format_matrices(X, Y)

        X_train, Y_train = X[:n], Y[:n]
        X_test, Y_test = X[n:], Y[n:]

        scorings, scorings_ = self._get_scorings()

        # Determine binary/multiclass classification
        classes = np.unique(Y)
        params = self._get_params(classes)

        # fit model on entire training set
        self.model.fit(X_train, Y_train)

        scores = {}
        for scoring in scorings_:
            scores[scoring] = self._do_scoring(scoring, params, self.model,
                    X_test, Y_test)

        metric_list = self.scores_to_metriclist(scorings, scores)
        return metric_list 
Example #16
Source File: lens.py    From sakmapper with MIT License 6 votes vote down vote up
def apply_lens(df, lens='pca', dist='euclidean', n_dim=2, **kwargs):
    """
    input: N x F dataframe of observations
    output: N x n_dim image of input data under lens function
    """
    if n_dim != 2:
        raise 'error: image of data set must be two-dimensional'
    if dist not in ['euclidean', 'correlation']:
        raise 'error: only euclidean and correlation distance metrics are supported'
    if lens == 'pca' and dist != 'euclidean':
        raise 'error: PCA requires the use of euclidean distance metric'

    if lens == 'pca':
        df_lens = pd.DataFrame(decomposition.PCA(n_components=n_dim, **kwargs).fit_transform(df), df.index)
    elif lens == 'mds':
        D = metrics.pairwise.pairwise_distances(df, metric=dist)
        df_lens = pd.DataFrame(manifold.MDS(n_components=n_dim, **kwargs).fit_transform(D), df.index)
    elif lens == 'neighbor':
        D = metrics.pairwise.pairwise_distances(df, metric=dist)
        df_lens = pd.DataFrame(manifold.SpectralEmbedding(n_components=n_dim, **kwargs).fit_transform(D), df.index)
    else:
        raise 'error: only PCA, MDS, neighborhood lenses are supported'
    
    return df_lens 
Example #17
Source File: baselines.py    From AirBnbPricePrediction with MIT License 6 votes vote down vote up
def print_evaluation_metrics(trained_model, trained_model_name, X_test, y_test):
    print('--------- For Model: ', trained_model_name, ' ---------\n')
    predicted_values = trained_model.predict(X_test)
    print("Mean absolute error: ",
          metrics.mean_absolute_error(y_test, predicted_values))
    print("Median absolute error: ",
          metrics.median_absolute_error(y_test, predicted_values))
    print("Mean squared error: ", metrics.mean_squared_error(
        y_test, predicted_values))
    print("R2: ", metrics.r2_score(y_test, predicted_values))
    plt.scatter(y_test, predicted_values, color='black')
    # plt.plot(x, y_pred, color='blue', linewidth=3)
    plt.title(trained_model_name)
    plt.xlabel('$y_{test}$')
    plt.ylabel('$y_{predicted}/y_{test}$')
    plt.savefig('%s.png' %trained_model_name, bbox_inches='tight')
    print("---------------------------------------\n") 
Example #18
Source File: baselines.py    From AirBnbPricePrediction with MIT License 6 votes vote down vote up
def print_evaluation_metrics2(trained_model, trained_model_name, X_test, y_test):
    print('--------- For Model: ', trained_model_name, ' --------- (Train Data)\n')
    predicted_values = trained_model.predict(X_test)
    print("Mean absolute error: ",
          metrics.mean_absolute_error(y_test, predicted_values))
    print("Median absolute error: ",
          metrics.median_absolute_error(y_test, predicted_values))
    print("Mean squared error: ", metrics.mean_squared_error(
        y_test, predicted_values))
    print("R2: ", metrics.r2_score(y_test, predicted_values))
    plt.scatter(y_test, predicted_values/y_test, color='black')
    # plt.plot(x, y_pred, color='blue', linewidth=3)
    plt_name = trained_model_name + " (Train Data)"
    plt.title(plt_name)
    plt.xlabel('$y_{test}$')
    plt.ylabel('$y_{predicted}/y_{test}$')
    plt.savefig('%s.png' %plt_name, bbox_inches='tight')
    print("---------------------------------------\n") 
Example #19
Source File: __init__.py    From deepchem with MIT License 6 votes vote down vote up
def accuracy_score(y, y_pred):
  """Compute accuracy score

  Computes accuracy score for classification tasks. Works for both
  binary and multiclass classification.

  Parameters
  ----------
  y: np.ndarray
    Of shape `(N_samples,)`
  y_pred: np.ndarray
    Of shape `(N_samples,)`

  Returns
  -------
  score: float
    The fraction of correctly classified samples. A number between 0
    and 1.
  """
  y = _ensure_class_labels(y)
  y_pred = _ensure_class_labels(y_pred)
  return sklearn.metrics.accuracy_score(y, y_pred) 
Example #20
Source File: sklearns.py    From pytorch-lightning with Apache License 2.0 5 votes vote down vote up
def metric_fn(self):
        import sklearn.metrics
        return getattr(sklearn.metrics, self.name) 
Example #21
Source File: sklearns.py    From pytorch-lightning with Apache License 2.0 5 votes vote down vote up
def __init__(
            self,
            average: Optional[str] = 'macro',
            reduce_group: Any = group.WORLD,
            reduce_op: Any = ReduceOp.SUM,
    ):
        """
        Args:
            average: If None, the scores for each class are returned. Otherwise, this determines the type of
                averaging performed on the data:

                * If 'micro': Calculate metrics globally by considering each element of the label indicator
                  matrix as a label.
                * If 'macro': Calculate metrics for each label, and find their unweighted mean.
                  This does not take label imbalance into account.
                * If 'weighted': Calculate metrics for each label, and find their average, weighted by
                  support (the number of true instances for each label).
                * If 'samples': Calculate metrics for each instance, and find their average.

            reduce_group: the process group for DDP reduces (only needed for DDP training).
                Defaults to all processes (world)
            reduce_op: the operation to perform during reduction within DDP (only needed for DDP training).
                Defaults to sum.
        """
        super().__init__('roc_auc_score',
                         reduce_group=reduce_group,
                         reduce_op=reduce_op,
                         average=average) 
Example #22
Source File: predict.py    From Azimuth with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def extract_NDCG_for_fold(metrics, fold, i, predictions, truth, y_ground_truth, test, y_pred, learn_options):
    NDCG_fold = ranking_metrics.ndcg_at_k_ties(y_ground_truth[test].flatten(), y_pred.flatten(), learn_options["NDGC_k"])
    metrics.append(NDCG_fold) 
Example #23
Source File: metric_utils.py    From neuqe with GNU General Public License v3.0 5 votes vote down vote up
def mean_squared_error(pred,ref):
    """ Computes mean squared error """
    import sklearn.metrics
    mse = sklearn.metrics.regression.mean_squared_error(pred, ref)
    return mse 
Example #24
Source File: metric_utils.py    From neuqe with GNU General Public License v3.0 5 votes vote down vote up
def mean_absolute_error(pred, ref):
    """ Computes mean absolute error  """
    import sklearn.metrics
    mae = sklearn.metrics.regression.mean_absolute_error(pred, ref)
    return mae 
Example #25
Source File: textpro.py    From comparable-text-miner with Apache License 2.0 5 votes vote down vote up
def build_features(doc_feat_grams, corpus_feat_grams):
	doc_grams = set(doc_feat_grams)
	feats = dict([(word, True) for word in doc_grams if word in corpus_feat_grams])
	return feats
###################################################################################

# evaluate predicted results using true values.
# evaluation metrics are acccuracy, precicion, recall and f-measure. 
Example #26
Source File: vgg_fine_tune.py    From grammar-activity-prediction with MIT License 5 votes vote down vote up
def subactivity_baseline_svm(metadata_root):
    def get_f1_score(precision, recall):
        return 2 * (precision * recall) / (precision + recall)

    train_path = metadata_root + 'data/train'
    val_path = metadata_root + 'data/val'
    x_sk_sq_train_path = train_path + '/sk_sq_train.npy'
    x_sk_sq_val_path = val_path + '/sk_sq_val.npy'
    y_train_path = train_path + '/subactivity_train.npy'
    y_val_path = val_path + '/subactivity_val.npy'
    y_train = np.load(y_train_path)
    y_train_one = np.zeros(y_train.shape[0])
    y_val = np.load(y_val_path)
    gt_val = np.zeros(y_val.shape[0])
    rand_index = np.random.permutation(len(y_train))
    for i in range(y_train.shape[0]):
        y_train_one[i] = np.argmax(y_train[i, :])
    for i in range(y_val.shape[0]):
        gt_val[i] = np.argmax(y_val[i, :])
    x_sk_sq_train = np.load(x_sk_sq_train_path)
    x_sk_sq_val = np.load(x_sk_sq_val_path)
    clf = SVC(decision_function_shape='ovr')
    clf.fit(x_sk_sq_train[rand_index], y_train_one[rand_index])
    prediction = clf.decision_function(x_sk_sq_val)
    pred = np.zeros(prediction.shape[0])
    for i in range(len(pred)):
        pred[i] = np.argmax(prediction[i, :])
    precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(gt_val, pred, labels=range(10), average='micro')
    print 'micro result'
    print precision, recall, beta_score, support
    print get_f1_score(precision, recall)
    precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(gt_val, pred,
                                                                                             labels=range(10),
                                                                                             average='macro')
    print 'macro result'
    print precision, recall, beta_score, support
    print get_f1_score(precision, recall) 
Example #27
Source File: vgg_fine_tune.py    From grammar-activity-prediction with MIT License 5 votes vote down vote up
def subactivity_train_lstm(metadata_root):
    nb_epoch = 100
    nb_classes = 10
    batch_size = 32
    train_path = metadata_root + 'data/train'
    val_path = metadata_root + 'data/val'
    model_path = metadata_root + 'models/cnn/'
    x_train_path = train_path + '/subactivity_lstm_feature_train.npy'
    x_val_path = val_path + '/subactivity_lstm_feature_val.npy'
    y_train_path = train_path + '/subactivity_lstm_gt_train.npy'
    y_val_path = val_path + '/subactivity_lstm_gt_val.npy'
    model_name = 'subactivity_lstm_epoch_100_sequencelen_50.h5'
    print 'loading the data'
    x_train = np.load(x_train_path)
    x_val = np.load(x_val_path)
    y_train = np.load(y_train_path)
    y_val = np.load(y_val_path)
    print 'successful initializing the model'
    final_model = lstm_model(x_train.shape[2], max_len=50)
    optimizer = rmsprop(lr=0.001)
    print 'compiling'
    final_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    print 'saving the model figure'
    plot(final_model, to_file=model_path + model_name[:-3] + '.png', show_shapes=True)
    print 'fitting'
    final_model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch,
                    validation_data=(x_val, y_val))
    final_model.save(model_path + model_name) 
Example #28
Source File: vgg_fine_tune.py    From grammar-activity-prediction with MIT License 5 votes vote down vote up
def fine_tune(metadata_root):
    nb_classes = 10
    nb_epoch = 50
    train_path = metadata_root + 'data/train'
    val_path = metadata_root + 'data/val'
    model_path = metadata_root + 'models/cnn/'
    x_train_path = train_path + '/img_train.txt'
    x_val_path = val_path + '/img_val.txt'
    y_train_path = train_path + '/subactivity_train.npy'
    y_val_path = val_path + '/subactivity_val.npy'
    bdb_train_path = train_path + '/bdb_train.npy'
    bdb_val_path = val_path + '/bdb_val.npy'
    batch_size = 32
    nb_train = np.load(y_train_path).shape[0]
    nb_val = np.load(y_val_path).shape[0]
    train_generator = img_from_list(batch_size, x_train_path, y_train_path, bdb_train_path, nb_classes)
    val_generator = img_from_list(batch_size, x_val_path, y_val_path, bdb_val_path, nb_classes)
    model = vgg_16(model_path + 'vgg16_weights.h5')
    for layer in model.layers[:25]:
        layer.trainable = False
    model.pop()
    model.add(Dense(nb_classes, activation='softmax'))
    model_name = 'vgg_tune_subactivity_train_134_learning_rate_-5_.h5'
    early_stopping = EarlyStopping(verbose=1, patience=30, monitor='acc')
    model_checkpoint = ModelCheckpoint(
        model_path + model_name, save_best_only=True,
        save_weights_only=True,
        monitor='acc')
    callbacks_list = [early_stopping, model_checkpoint]
    plot(model, to_file=model_path + model_name[:-3] + '.png', show_shapes=True)
    model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-5, decay=1e-6, momentum=0.9, nesterov=True), metrics=['accuracy'])
    model.fit_generator(train_generator, samples_per_epoch=(nb_train//batch_size)*batch_size, nb_epoch=nb_epoch, validation_data=val_generator, nb_val_samples=(nb_val//batch_size)*batch_size, callbacks=callbacks_list)
    model.save(model_path + model_name) 
Example #29
Source File: blstm.py    From semeval2017-scienceie with Apache License 2.0 5 votes vote down vote up
def build_lstm(output_dim, embeddings):

    loss_function = "categorical_crossentropy"

    # this is the placeholder tensor for the input sequences
    sequence = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype="int32")

    # this embedding layer will transform the sequences of integers
    embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=MAX_SEQUENCE_LENGTH, weights=[embeddings], trainable=True)(sequence)

    # 4 convolution layers (each 1000 filters)
    cnn = [Convolution1D(filter_length=filters, nb_filter=1000, border_mode="same") for filters in [2, 3, 5, 7]]
    # concatenate
    merged_cnn = merge([cnn(embedded) for cnn in cnn], mode="concat")
    # create attention vector from max-pooled convoluted
    maxpool = Lambda(lambda x: keras_backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
    attention_vector = maxpool(merged_cnn)

    forwards = AttentionLSTM(64, attention_vector)(embedded)
    backwards = AttentionLSTM(64, attention_vector, go_backwards=True)(embedded)

    # concatenate the outputs of the 2 LSTM layers
    bi_lstm = merge([forwards, backwards], mode="concat", concat_axis=-1)

    after_dropout = Dropout(0.5)(bi_lstm)

    # softmax output layer
    output = Dense(output_dim=output_dim, activation="softmax")(after_dropout)

    # the complete omdel
    model = Model(input=sequence, output=output)

    # try using different optimizers and different optimizer configs
    model.compile("adagrad", loss_function, metrics=["accuracy"])

    return model 
Example #30
Source File: evaluation.py    From embeddings with Apache License 2.0 5 votes vote down vote up
def __init__(self, preds, true_vals, ranks, raw_ranks):
		self.preds = preds
		self.ranks = ranks
		self.true_vals = true_vals
		self.raw_ranks = raw_ranks

		#Test if not all the prediction are the same, sometimes happens with overfitting,
		#and leads scikit-learn to output incorrect average precision (i.e ap=1)
		if not (preds == preds[0]).all() :
			#Due to the use of np.isclose in sklearn.metrics.ranking._binary_clf_curve (called by following metrics function),
			#I have to rescale the predictions if they are too small:
			preds_rescaled = preds

			diffs = np.diff(np.sort(preds))
			min_diff = min(abs(diffs[np.nonzero(diffs)]))
			if min_diff < 1e-8 : #Default value of absolute tolerance of np.isclose
				preds_rescaled = (preds * ( 1e-7 / min_diff )).astype('d')

			self.ap = sklearn.metrics.average_precision_score(true_vals,preds_rescaled)
			self.precision, self.recall, self.thresholds = sklearn.metrics.precision_recall_curve(true_vals,preds_rescaled) 
		else:
			logger.warning("All prediction scores are equal, probable overfitting, replacing scores by random scores")
			self.ap = (true_vals == 1).sum() / float(len(true_vals))
			self.thresholds = preds[0]
			self.precision = (true_vals == 1).sum() / float(len(true_vals))
			self.recall = 0.5
		
		
		self.mrr =-1
		self.raw_mrr =-1

		if ranks is not None:
			self.mrr = np.mean(1.0 / ranks)
			self.raw_mrr = np.mean(1.0 / raw_ranks)