python source code of training

import numpy as np
from tqdm import tqdm
from typing import Tuple, Optional, Dict
import sklearn
import sklearn.svm
import sklearn.pipeline as pipeline
import sklearn.preprocessing as preprocessing

import sigver.wd.metrics as metrics
import sigver.wd.data as data


def train_wdclassifier_user(training_set: Tuple[np.ndarray, np.ndarray],
                            svmType: str,
                            C: float,
                            gamma: Optional[float]) -> sklearn.svm.SVC:
    """ Trains an SVM classifier for a user

    Parameters
    ----------
    training_set: Tuple (x, y)
        The training set (features and labels). y should have labels -1 and 1
    svmType: string ('linear' or 'rbf')
        The SVM type
    C: float
        Regularization for the SVM optimization
    gamma: float
        Hyperparameter for the RBF kernel

    Returns
    -------
    sklearn.svm.SVC:
        The learned classifier

    """

    assert svmType in ['linear', 'rbf']

    train_x = training_set[0]
    train_y = training_set[1]

    # Adjust for the skew between positive and negative classes
    n_genuine = len([x for x in train_y if x == 1])
    n_forg = len([x for x in train_y if x == -1])
    skew = n_forg / float(n_genuine)

    # Train the model
    if svmType == 'rbf':
        model = sklearn.svm.SVC(C=C, gamma=gamma, class_weight={1: skew})
    else:
        model = sklearn.svm.SVC(kernel='linear', C=C, class_weight={1: skew})

    model_with_scaler = pipeline.Pipeline([('scaler', preprocessing.StandardScaler(with_mean=False)),
                                           ('classifier', model)])

    model_with_scaler.fit(train_x, train_y)

    return model_with_scaler


def test_user(model: sklearn.svm.SVC,
              genuine_signatures: np.ndarray,
              random_forgeries: np.ndarray,
              skilled_forgeries: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """ Test the WD classifier of an user

    Parameters
    ----------
    model: sklearn.svm.SVC
        The learned classifier
    genuine_signatures: np.ndarray
        Genuine signatures for test
    random_forgeries: np.ndarray
        Random forgeries for test (signatures from other users)
    skilled_forgeries: np.ndarray
        Skilled forgeries for test

    Returns
    -------
    np.ndarray, np.ndarray, np.ndarray
        The predictions(scores) for genuine signatures,
        random forgeries and skilled forgeries

    """
    # Get predictions
    genuinePred = model.decision_function(genuine_signatures)
    randomPred = model.decision_function(random_forgeries)
    skilledPred = model.decision_function(skilled_forgeries)

    return genuinePred, randomPred, skilledPred


def train_all_users(exp_train: Tuple[np.ndarray, np.ndarray, np.ndarray],
                    dev_set: Tuple[np.ndarray, np.ndarray, np.ndarray],
                    svm_type: str,
                    C: float,
                    gamma: float,
                    num_forg_from_dev: int,
                    num_forg_from_exp: int,
                    rng: np.random.RandomState) -> Dict[int, sklearn.svm.SVC]:
    """ Train classifiers for all users in the exploitation set

    Parameters
    ----------
    exp_train: tuple of np.ndarray (x, y, yforg)
        The training set split of the exploitation set (system users)
    dev_set: tuple of np.ndarray (x, y, yforg)
        The development set
    svm_type: string ('linear' or 'rbf')
        The SVM type
    C: float
        Regularization for the SVM optimization
    gamma: float
        Hyperparameter for the RBF kernel
    num_forg_from_dev: int
        Number of forgeries from each user in the development set to
        consider as negative samples
    num_forg_from_exp: int
        Number of forgeries from each user in the exploitation set (other
        than the current user) to consider as negative sample.
    rng: np.random.RandomState
        The random number generator (for reproducibility)

    Returns
    -------
    Dict int -> sklearn.svm.SVC
        A dictionary of trained classifiers, where the keys are the users.

    """
    classifiers = {}

    exp_y = exp_train[1]
    users = np.unique(exp_y)

    if num_forg_from_dev > 0:
        other_negatives = data.get_random_forgeries_from_dev(dev_set, num_forg_from_dev, rng)
    else:
        other_negatives = []

    for user in tqdm(users):
        training_set = data.create_training_set_for_user(user, exp_train, num_forg_from_exp, other_negatives, rng)
        classifiers[user] = train_wdclassifier_user(training_set, svm_type, C, gamma)

    return classifiers


def test_all_users(classifier_all_user: Dict[int, sklearn.svm.SVC],
                   exp_test: Tuple[np.ndarray, np.ndarray, np.ndarray],
                   global_threshold: float) -> Dict:
    """ Test classifiers for all users and return the metrics

    Parameters
    ----------
    classifier_all_user: dict (int -> sklearn.svm.SVC)
        The trained classifiers for all users
    exp_test: tuple of np.ndarray (x, y, yforg)
        The testing set split from the exploitation set
    global_threshold: float
        The threshold used to compute false acceptance and
        false rejection rates

    Returns
    -------
    dict
        A dictionary containing a variety of metrics, including
        false acceptance and rejection rates, equal error rates

    """
    xfeatures_test, y_test, yforg_test = exp_test

    genuinePreds = []
    randomPreds = []
    skilledPreds = []

    users = np.unique(y_test)
    for user in users:
        model = classifier_all_user[user]

        # Test the performance for the user without replicates
        skilled_forgeries = xfeatures_test[(y_test == user) & (yforg_test == 1)]
        test_genuine = xfeatures_test[(y_test == user) & (yforg_test == 0)]
        random_forgeries = xfeatures_test[(y_test != user) & (yforg_test == 0)]

        genuinePredUser = model.decision_function(test_genuine)
        skilledPredUser = model.decision_function(skilled_forgeries)
        randomPredUser = model.decision_function(random_forgeries)

        genuinePreds.append(genuinePredUser)
        skilledPreds.append(skilledPredUser)
        randomPreds.append(randomPredUser)

    # Calculate al metrics (EER, FAR, FRR and AUC)
    all_metrics = metrics.compute_metrics(genuinePreds, randomPreds, skilledPreds, global_threshold)

    results = {'all_metrics': all_metrics,
               'predictions': {'genuinePreds': genuinePreds,
                               'randomPreds': randomPreds,
                               'skilledPreds': skilledPreds}}

    print(all_metrics['EER'], all_metrics['EER_userthresholds'])
    return results


def train_test_all_users(exp_set: Tuple[np.ndarray, np.ndarray, np.ndarray],
                         dev_set: Tuple[np.ndarray, np.ndarray, np.ndarray],
                         svm_type: str,
                         C: float,
                         gamma: float,
                         num_gen_train: int,
                         num_forg_from_exp: int,
                         num_forg_from_dev: int,
                         num_gen_test: int,
                         global_threshold: float = 0,
                         rng: np.random.RandomState = np.random.RandomState()) \
        -> Tuple[Dict[int, sklearn.svm.SVC], Dict]:
    """ Train and test classifiers for every user in the exploitation set,
        and returns the metrics.

    Parameters
    ----------
    exp_set: tuple of np.ndarray (x, y, yforg)
        The exploitation set
    dev_set: tuple of np.ndarray (x, y, yforg)
        The development set
    svm_type: string ('linear' or 'rbf')
        The SVM type
    C: float
        Regularization for the SVM optimization
    gamma: float
        Hyperparameter for the RBF kernel
    num_gen_train: int
        Number of genuine signatures available for training
    num_forg_from_dev: int
        Number of forgeries from each user in the development set to
        consider as negative samples
    num_forg_from_exp: int
        Number of forgeries from each user in the exploitation set (other
        than the current user) to consider as negative sample.
    num_gen_test: int
        Number of genuine signatures for testing
    global_threshold: float
        The threshold used to compute false acceptance and
        false rejection rates
    rng: np.random.RandomState
        The random number generator (for reproducibility)

    Returns
    -------
    dict (int -> sklearn.svm.SVC)
        The classifiers for all users

    dict
        A dictionary containing a variety of metrics, including
        false acceptance and rejection rates, equal error rates

    """
    exp_train, exp_test = data.split_train_test(exp_set, num_gen_train, num_gen_test, rng)

    classifiers = train_all_users(exp_train, dev_set, svm_type, C, gamma,
                                  num_forg_from_dev, num_forg_from_exp, rng)

    results = test_all_users(classifiers, exp_test, global_threshold)

    return classifiers, results