Python sklearn.svm() Examples

The following are 30 code examples of sklearn.svm(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn , or try the search function .
Example #1
Source File: classify_shark.py    From ibeis with Apache License 2.0 6 votes vote down vote up
def fit_new_classifier(problem, train_idx):
        """
        References:
            http://leon.bottou.org/research/stochastic
            http://blog.explainmydata.com/2012/06/ntrain-24853-ntest-25147-ncorrupt.html
            http://scikit-learn.org/stable/modules/svm.html#svm-classification
            http://scikit-learn.org/stable/modules/grid_search.html
        """
        print('[problem] train classifier on %d data points' % (len(train_idx)))
        data = problem.ds.data
        target = problem.ds.target
        x_train = data.take(train_idx, axis=0)
        y_train = target.take(train_idx, axis=0)
        clf = sklearn.svm.SVC(kernel=str('linear'), C=.17, class_weight='balanced',
                              decision_function_shape='ovr')

        # C, penalty, loss
        #param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
        #              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
        #param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
        #              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
        #clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
        #clf = clf.fit(X_train_pca, y_train)
        clf.fit(x_train, y_train)
        return clf 
Example #2
Source File: sklearn_intent_classifier.py    From rasa_nlu with Apache License 2.0 6 votes vote down vote up
def _create_classifier(self, num_threads, y):
        from sklearn.model_selection import GridSearchCV
        from sklearn.svm import SVC

        C = self.component_config["C"]
        kernels = self.component_config["kernels"]
        gamma = self.component_config["gamma"]
        # dirty str fix because sklearn is expecting
        # str not instance of basestr...
        tuned_parameters = [{"C": C,
                             "gamma": gamma,
                             "kernel": [str(k) for k in kernels]}]

        # aim for 5 examples in each fold

        cv_splits = self._num_cv_splits(y)

        return GridSearchCV(SVC(C=1,
                                probability=True,
                                class_weight='balanced'),
                            param_grid=tuned_parameters,
                            n_jobs=num_threads,
                            cv=cv_splits,
                            scoring=self.component_config['scoring_function'],
                            verbose=1) 
Example #3
Source File: test_monkeypatch.py    From daal4py with Apache License 2.0 6 votes vote down vote up
def test_monkey_patching(self):
        _tokens = daal4py.sklearn.sklearn_patch_names()
        self.assertTrue(isinstance(_tokens, list) and len(_tokens) > 0)
        for t in _tokens:
            daal4py.sklearn.unpatch_sklearn(t)
        for t in _tokens:
            daal4py.sklearn.patch_sklearn(t)

        import sklearn
        for a in [(sklearn.decomposition, 'PCA'),
                  (sklearn.linear_model, 'Ridge'),
                  (sklearn.linear_model, 'LinearRegression'),
                  (sklearn.cluster, 'KMeans'),
                  (sklearn.svm, 'SVC'),]:
            class_module = getattr(a[0], a[1]).__module__
            self.assertTrue(class_module.startswith('daal4py')) 
Example #4
Source File: sklearn_intent_classifier.py    From Rasa_NLU_Chi with Apache License 2.0 6 votes vote down vote up
def _create_classifier(self, num_threads, y):
        from sklearn.model_selection import GridSearchCV
        from sklearn.svm import SVC

        C = self.component_config["C"]
        kernels = self.component_config["kernels"]
        # dirty str fix because sklearn is expecting
        # str not instance of basestr...
        tuned_parameters = [{"C": C,
                             "kernel": [str(k) for k in kernels]}]

        # aim for 5 examples in each fold

        cv_splits = self._num_cv_splits(y)

        return GridSearchCV(SVC(C=1,
                                probability=True,
                                class_weight='balanced'),
                            param_grid=tuned_parameters,
                            n_jobs=num_threads,
                            cv=cv_splits,
                            scoring='f1_weighted',
                            verbose=1) 
Example #5
Source File: test.py    From object_centric_VAD with MIT License 5 votes vote down vote up
def arg_parse():
    parser = argparse.ArgumentParser()
    parser.add_argument('-g', '--gpu', type=str, default='0', help='Use which gpu?')
    parser.add_argument('-d', '--dataset', type=str, help='Train on which dataset')
    parser.add_argument('-b','--bn',type=bool,default=False,help='whether to use BN layer')
    parser.add_argument('--model_path',type=str,help='Path to saved tensorflow CAE model')
    parser.add_argument('--graph_path',type=str,help='Path to saved object detection frozen graph model')
    parser.add_argument('--svm_model',type=str,help='Path to saved svm model')
    parser.add_argument('--dataset_folder',type=str,help='Dataset Fodlder Path')
    parser.add_argument('-c','--class_add',type=bool,default=False,help='Whether to add class one-hot embedding to the featrue')
    parser.add_argument('-n','--norm',type=int,default=0,help='Whether to use Normalization to the Feature and the normalization level')
    parser.add_argument('--test_CAE',type=bool,default=False,help='Whether to test CAE')
    parser.add_argument('--matlab',type=bool,default=False,help='Whether to use matlab weights and biases to test')
    args = parser.parse_args()
    return args 
Example #6
Source File: classify_shark.py    From ibeis with Apache License 2.0 5 votes vote down vote up
def fit_new_linear_svm(problem, train_idx):
        print('[problem] train classifier on %d data points' % (len(train_idx)))
        data = problem.ds.data
        target = problem.ds.target
        x_train = data.take(train_idx, axis=0)
        y_train = target.take(train_idx, axis=0)
        clf = sklearn.svm.SVC(kernel=str('linear'), C=.17, class_weight='balanced',
                              decision_function_shape='ovr')
        clf.fit(x_train, y_train) 
Example #7
Source File: helpers.py    From MachineLearningSamples-ImageClassificationUsingCntk with MIT License 5 votes vote down vote up
def getModelNode(classifier):
    if classifier.startswith("svm"):
        node = "poolingLayer"
    else:
        node = []
    return(node) 
Example #8
Source File: helpers.py    From MachineLearningSamples-ImageClassificationUsingCntk with MIT License 5 votes vote down vote up
def runClassifier(classifier, dnnOutput, imgDict = [],  lutLabel2Id = [], svmPath = [], svm_boL2Normalize = []):
    # Run classifier on all known images, if not otherwise specified
    if imgDict == []:
        imgDict = {}
        for label in list(dnnOutput.keys()):
            imgDict[label] = list(dnnOutput[label].keys())

    # Compute SVM classification scores
    if classifier.startswith('svm'):
        learner = readPickle(svmPath)
        feats, gtLabels, imgFilenames = getSvmInput(imgDict, dnnOutput, svm_boL2Normalize, lutLabel2Id)
        print("Evaluate SVM...")
        scoresMatrix = learner.decision_function(feats)

        # If binary classification problem then manually create 2nd column
        # Note: scoresMatrix is of size nrImages x nrClasses
        if len(scoresMatrix.shape) == 1:
            scoresMatrix = [[-scoresMatrix[i],scoresMatrix[i]] for i in range(len(scoresMatrix))]
            scoresMatrix = np.array(scoresMatrix)

    # Get DNN classification scores
    else:
        gtLabels = []
        scoresMatrix = []
        imgFilenames = []
        for label in list(imgDict.keys()):
            for imgFilename in imgDict[label]:
                scores = dnnOutput[label][imgFilename]
                if lutLabel2Id == []:
                    gtLabels.append(label)
                else:
                    gtLabels.append(int(lutLabel2Id[label]))
                scoresMatrix.append(scores)
                imgFilenames.append(imgFilename)
        scoresMatrix = np.vstack(scoresMatrix)
    return scoresMatrix, imgFilenames, gtLabels 
Example #9
Source File: svm-bagofWords.py    From TBBTCorpus with Apache License 2.0 5 votes vote down vote up
def __init__(self, path):
        self.train_data  = []
        self.test_data   = []
        self.train_labels = []
        self.test_labels = []
        self.classification = []
        self.svm_classifier = svm.SVC(gamma=0.001, C=50,decision_function_shape='ovr',kernel='rbf')
        self.corpus_path = path
        self.corpus = {}
        self.vocab = [] 
Example #10
Source File: svmClassifier.py    From TBBTCorpus with Apache License 2.0 5 votes vote down vote up
def start_program():
    Total_correct = 0
    Total_labelled = 0
    clf = svm.SVC(gamma=0.001, C=50, kernel='rbf')
    train_features = []
    train_labels = []
    test_features = []
    test_labels = []
    for season in range(1,5):
        for episode in range(1,Season_Episode_Mapping[season]-4):
            features, labels = episode2feature(season,episode)
            train_features.extend(features)
            train_labels.extend(labels)
    #print(all_features)
    for season in range(5,8):
        for episode in range(Season_Episode_Mapping[season]-4,Season_Episode_Mapping[season]+1):
            features, labels = episode2feature(season,episode)
            test_features.extend(features)
            test_labels.extend(labels)
    #print(train_features)
    clf.fit(train_features,train_labels)
    result = clf.predict(test_features)


    txt = "\n Speaker\tPrecision\tRecall\t\tF1\n"
    for i in range(1,7):
        precision, recall,f1_score,correct,total = get_stats(result, train_labels,i)
        Total_correct += correct
        Total_labelled += total
        txt += speaker_rev_enum[i]+"\t\t"+ str(format(precision,'.2f'))+"\t\t"+str(format(recall,'.2f'))+"\t\t"+str(format(f1_score,'.2f'))+"\n"
    with open("output.txt","w") as fh:
        fh.write(txt)
    print("Accuracy of the system is : "+str(Total_correct/Total_labelled)) 
Example #11
Source File: field_based_ml_field_detection.py    From lexpredict-contraxsuite with GNU Affero General Public License v3.0 5 votes vote down vote up
def init_classifier_impl(field_code: str, init_script: str):
    if init_script is not None:
        init_script = init_script.strip()

    if not init_script:
        from sklearn import tree as sklearn_tree
        return sklearn_tree.DecisionTreeClassifier()

    from sklearn import tree as sklearn_tree
    from sklearn import neural_network as sklearn_neural_network
    from sklearn import neighbors as sklearn_neighbors
    from sklearn import svm as sklearn_svm
    from sklearn import gaussian_process as sklearn_gaussian_process
    from sklearn.gaussian_process import kernels as sklearn_gaussian_process_kernels
    from sklearn import ensemble as sklearn_ensemble
    from sklearn import naive_bayes as sklearn_naive_bayes
    from sklearn import discriminant_analysis as sklearn_discriminant_analysis
    from sklearn import linear_model as sklearn_linear_model

    eval_locals = {
        'sklearn_linear_model': sklearn_linear_model,
        'sklearn_tree': sklearn_tree,
        'sklearn_neural_network': sklearn_neural_network,
        'sklearn_neighbors': sklearn_neighbors,
        'sklearn_svm': sklearn_svm,
        'sklearn_gaussian_process': sklearn_gaussian_process,
        'sklearn_gaussian_process_kernels': sklearn_gaussian_process_kernels,
        'sklearn_ensemble': sklearn_ensemble,
        'sklearn_naive_bayes': sklearn_naive_bayes,
        'sklearn_discriminant_analysis': sklearn_discriminant_analysis
    }
    return eval_script('classifier init script of field {0}'.format(field_code), init_script, eval_locals) 
Example #12
Source File: unit_tests.py    From pynisher with MIT License 5 votes vote down vote up
def svm_example(n_samples = 10000, n_features = 100):
	from sklearn.svm import SVR
	from sklearn.datasets import make_regression

	X,Y = make_regression(n_samples, n_features)
	m = SVR()

	m.fit(X,Y) 
Example #13
Source File: unit_tests.py    From pynisher with MIT License 5 votes vote down vote up
def svc_example(n_samples = 10000, n_features = 4):
	from sklearn.svm import LinearSVC
	from sklearn.preprocessing import PolynomialFeatures
	from sklearn.datasets import make_classification
	
	X,Y = make_classification(n_samples, n_features)
	#pp = PolynomialFeatures(degree=3)
	
	#X = pp.fit_transform(X)
	m = LinearSVC()
	m.fit(X,Y) 
Example #14
Source File: example.py    From d6tflow with MIT License 5 votes vote down vote up
def run(self):
        df_train = self.input().load()
        if self.model=='ols':
            model = sklearn.linear_model.LogisticRegression()
        elif self.model=='svm':
            model = sklearn.svm.SVC()
        else:
            raise ValueError('invalid model selection')
        model.fit(df_train.iloc[:,:-1], df_train['y'])
        self.save(model)

# Check task dependencies and their execution status 
Example #15
Source File: tools.py    From neural-tangent-kernel-UCI with Apache License 2.0 5 votes vote down vote up
def svm(K1, K2, y1, y2, C, c):
    n_val, n_train = K2.shape
    clf = SVC(kernel = "precomputed", C = C, cache_size = 100000)
    clf.fit(K1, y1)
    z = clf.predict(K2)
    return 1.0 * np.sum(z == y2) / n_val 
Example #16
Source File: sklearn_intent_classifier.py    From rasa-for-botfront with Apache License 2.0 5 votes vote down vote up
def _create_classifier(
        self, num_threads: int, y
    ) -> "sklearn.model_selection.GridSearchCV":
        from sklearn.model_selection import GridSearchCV
        from sklearn.svm import SVC

        C = self.component_config["C"]
        kernels = self.component_config["kernels"]
        gamma = self.component_config["gamma"]
        # dirty str fix because sklearn is expecting
        # str not instance of basestr...
        tuned_parameters = [
            {"C": C, "gamma": gamma, "kernel": [str(k) for k in kernels]}
        ]

        # aim for 5 examples in each fold

        cv_splits = self._num_cv_splits(y)

        return GridSearchCV(
            SVC(C=1, probability=True, class_weight="balanced"),
            param_grid=tuned_parameters,
            n_jobs=num_threads,
            cv=cv_splits,
            scoring=self.component_config["scoring_function"],
            verbose=1,
            iid=False,
        ) 
Example #17
Source File: run_models.py    From AirBnbPricePrediction with MIT License 5 votes vote down vote up
def svm(X_train, y_train, X_val, y_val):
    model = SVR(gamma = 0.05, verbose = True) #was empty #0.1 #the - best gamma 0.05, c=0.5
    model.fit(X_train, y_train)
    print_evaluation_metrics(model, "svm", X_val, y_val.values.ravel())
    print_evaluation_metrics2(model, "svm", X_train, y_train.values.ravel()) 
Example #18
Source File: svm.py    From ibench with MIT License 5 votes vote down vote up
def _make_args(self, n):
        self._X, self._y = self._gen_datasets(features[n-1],vectors[n-1],2)
        self._clf = svm.SVC(C=0.01, kernel='linear', max_iter=10000, tol=1e-16, shrinking=True) 
Example #19
Source File: training.py    From sigver with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_user(model: sklearn.svm.SVC,
              genuine_signatures: np.ndarray,
              random_forgeries: np.ndarray,
              skilled_forgeries: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """ Test the WD classifier of an user

    Parameters
    ----------
    model: sklearn.svm.SVC
        The learned classifier
    genuine_signatures: np.ndarray
        Genuine signatures for test
    random_forgeries: np.ndarray
        Random forgeries for test (signatures from other users)
    skilled_forgeries: np.ndarray
        Skilled forgeries for test

    Returns
    -------
    np.ndarray, np.ndarray, np.ndarray
        The predictions(scores) for genuine signatures,
        random forgeries and skilled forgeries

    """
    # Get predictions
    genuinePred = model.decision_function(genuine_signatures)
    randomPred = model.decision_function(random_forgeries)
    skilledPred = model.decision_function(skilled_forgeries)

    return genuinePred, randomPred, skilledPred 
Example #20
Source File: transfer.py    From rmnist with MIT License 5 votes vote down vote up
def transfer(n):
    td, vd, ts = data_loader.load_data(n, abstract=True, expanded=expanded)
    classifiers = [
        #sklearn.svm.SVC(),
        #sklearn.svm.SVC(kernel="linear", C=0.1),
        #sklearn.neighbors.KNeighborsClassifier(1),
        #sklearn.tree.DecisionTreeClassifier(),
        #sklearn.ensemble.RandomForestClassifier(max_depth=10, n_estimators=500, max_features=1),
        sklearn.neural_network.MLPClassifier(alpha=1.0, hidden_layer_sizes=(300,), max_iter=500)
    ]
    for clf in classifiers:
        clf.fit(td[0], td[1])
        print "\n{}: {}".format(type(clf).__name__, round(clf.score(vd[0], vd[1])*100, 2)) 
Example #21
Source File: baselines.py    From rmnist with MIT License 5 votes vote down vote up
def baselines(n):
    td, vd, ts = data_loader.load_data(n)
    classifiers = [
        sklearn.svm.SVC(C=1000),
        sklearn.svm.SVC(kernel="linear", C=0.1),
        sklearn.neighbors.KNeighborsClassifier(1),
        sklearn.tree.DecisionTreeClassifier(),
        sklearn.ensemble.RandomForestClassifier(max_depth=10, n_estimators=500, max_features=1),
        sklearn.neural_network.MLPClassifier(alpha=1, hidden_layer_sizes=(500, 100))
    ]
    for clf in classifiers:
        clf.fit(td[0], td[1])
        print "\n{}: {}".format(type(clf).__name__, round(clf.score(vd[0], vd[1])*100, 2)) 
Example #22
Source File: tracklet_utils_3c.py    From TNT with GNU General Public License v3.0 4 votes vote down vote up
def get_tracklet_scores():
    global track_struct
    
    # svm score
    track_struct['tracklet_mat']['svm_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \
                                                                track_struct['tracklet_mat']['xmin_mat'].shape[1]))
    num_det = track_struct['tracklet_mat']['appearance_fea_mat'].shape[0]
    clf = joblib.load(svm_model_path)
    pred_s = np.zeros((num_det,1))
    pred_s[:,0] = clf.decision_function(track_struct['tracklet_mat']['appearance_fea_mat'][:,2:])
    for n in range(num_det):
        track_struct['tracklet_mat']['svm_score_mat'][int(track_struct['tracklet_mat']['appearance_fea_mat'][n,0])-1, \
                                                     int(track_struct['tracklet_mat']['appearance_fea_mat'][n,1])-1] = pred_s[n,0]
    
    # h_score and y_score
    track_struct['tracklet_mat']['h_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \
                                                                track_struct['tracklet_mat']['xmin_mat'].shape[1]))
    track_struct['tracklet_mat']['y_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \
                                                                track_struct['tracklet_mat']['xmin_mat'].shape[1]))
    hloc = np.zeros(num_det)
    yloc = np.zeros(num_det)
    cnt = 0
    for n in range(track_struct['tracklet_mat']['xmin_mat'].shape[0]):
        idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0]
        hloc[cnt:cnt+len(idx)] = track_struct['tracklet_mat']['ymax_mat'][n,idx]-track_struct['tracklet_mat']['ymin_mat'][n,idx]
        yloc[cnt:cnt+len(idx)] = track_struct['tracklet_mat']['ymax_mat'][n,idx]
        cnt = cnt+len(idx)
   
    ph, py = track_lib.estimate_h_y(hloc, yloc)
    
    A = np.ones((hloc.shape[0],2))
    A[:,0] = yloc
    y_err = (np.matmul(A,ph)-hloc)/hloc
    err_std = np.std(y_err)
    h_score = np.zeros((y_err.shape[0],1))
    h_score[:,0] = np.exp(-np.power(y_err,2)/(err_std*err_std))

    A = np.ones((hloc.shape[0],2))
    A[:,0] = hloc
    y_err = np.matmul(A,py)-yloc
    err_std = np.std(y_err)
    y_score = np.zeros((y_err.shape[0],1))
    y_score[:,0] = np.exp(-np.power(y_err,2)/(err_std*err_std))
    #import pdb; pdb.set_trace()
    
    cnt = 0
    for n in range(track_struct['tracklet_mat']['xmin_mat'].shape[0]):
        idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0]
        track_struct['tracklet_mat']['h_score_mat'][n,idx] = h_score[cnt:cnt+len(idx),0]
        track_struct['tracklet_mat']['y_score_mat'][n,idx] = y_score[cnt:cnt+len(idx),0]
        cnt = cnt+len(idx)
    return 
Example #23
Source File: tracklet_utils_3c.py    From TNT with GNU General Public License v3.0 4 votes vote down vote up
def get_tracklet_scores():
    global track_struct
    
    # svm score
    track_struct['tracklet_mat']['svm_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \
                                                                track_struct['tracklet_mat']['xmin_mat'].shape[1]))
    num_det = track_struct['tracklet_mat']['appearance_fea_mat'].shape[0]
    clf = joblib.load(svm_model_path)
    pred_s = np.zeros((num_det,1))
    pred_s[:,0] = clf.decision_function(track_struct['tracklet_mat']['appearance_fea_mat'][:,2:])
    for n in range(num_det):
        track_struct['tracklet_mat']['svm_score_mat'][int(track_struct['tracklet_mat']['appearance_fea_mat'][n,0])-1, \
                                                     int(track_struct['tracklet_mat']['appearance_fea_mat'][n,1])-1] = pred_s[n,0]
    
    # h_score and y_score
    track_struct['tracklet_mat']['h_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \
                                                                track_struct['tracklet_mat']['xmin_mat'].shape[1]))
    track_struct['tracklet_mat']['y_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \
                                                                track_struct['tracklet_mat']['xmin_mat'].shape[1]))
    hloc = np.zeros(num_det)
    yloc = np.zeros(num_det)
    cnt = 0
    for n in range(track_struct['tracklet_mat']['xmin_mat'].shape[0]):
        idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0]
        hloc[cnt:cnt+len(idx)] = track_struct['tracklet_mat']['ymax_mat'][n,idx]-track_struct['tracklet_mat']['ymin_mat'][n,idx]
        yloc[cnt:cnt+len(idx)] = track_struct['tracklet_mat']['ymax_mat'][n,idx]
        cnt = cnt+len(idx)
   
    ph, py = track_lib.estimate_h_y(hloc, yloc)
    
    A = np.ones((hloc.shape[0],2))
    A[:,0] = yloc
    y_err = (np.matmul(A,ph)-hloc)/hloc
    err_std = np.std(y_err)
    h_score = np.zeros((y_err.shape[0],1))
    h_score[:,0] = np.exp(-np.power(y_err,2)/(err_std*err_std))

    A = np.ones((hloc.shape[0],2))
    A[:,0] = hloc
    y_err = np.matmul(A,py)-yloc
    err_std = np.std(y_err)
    y_score = np.zeros((y_err.shape[0],1))
    y_score[:,0] = np.exp(-np.power(y_err,2)/(err_std*err_std))
    #import pdb; pdb.set_trace()
    
    cnt = 0
    for n in range(track_struct['tracklet_mat']['xmin_mat'].shape[0]):
        idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0]
        track_struct['tracklet_mat']['h_score_mat'][n,idx] = h_score[cnt:cnt+len(idx),0]
        track_struct['tracklet_mat']['y_score_mat'][n,idx] = y_score[cnt:cnt+len(idx),0]
        cnt = cnt+len(idx)
    return 
Example #24
Source File: estimateVote.py    From anomaly-event-detection with MIT License 4 votes vote down vote up
def train_svm_classifer(features, labels, model_output_path):
    """
    train_svm_classifer will train a SVM, saved the trained and SVM model and
    report the classification performance
 
    features: array of input features
    labels: array of labels associated with the input features
    model_output_path: path for storing the trained svm model
    """
    # save 20% of data for performance evaluation
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, labels, test_size=0.2)
 
    param = [
        {
            "kernel": ["linear"],
            "C": [1, 10, 100, 1000]
        },
        {
            "kernel": ["rbf"],
            "C": [1, 10, 100, 1000],
            "gamma": [1e-2, 1e-3, 1e-4, 1e-5]
        }
    ]
 
    # request probability estimation
    svm = SVC(probability=True)
 
    # 10-fold cross validation, use 4 thread as each fold and each parameter set can be train in parallel
    clf = grid_search.GridSearchCV(svm, param,
            cv=10, n_jobs=4, verbose=3)
 
    clf.fit(X_train, y_train)
 
    if os.path.exists(model_output_path):
        joblib.dump(clf.best_estimator_, model_output_path)
    else:
        print("Cannot save trained svm model to {0}.".format(model_output_path))
 
    print("\nBest parameters set:")
    print(clf.best_params_)
 
    y_predict=clf.predict(X_test)
 
    # labels=sorted(list(set(labels)))
    labels = [0,1]
    print("\nConfusion matrix:")
    print("Labels: {0}\n".format(",".join(labels)))
    print(confusion_matrix(y_test, y_predict, labels=labels))
 
    print("\nClassification report:")
    print(classification_report(y_test, y_predict)) 
Example #25
Source File: sklearn_svm.py    From android-malware-analysis with GNU General Public License v3.0 4 votes vote down vote up
def train_svm_classifer(features, labels, model_output_path):
    """
    train_svm_classifer will train a SVM, saved the trained and SVM model and
    report the classification performance

    features: 2D array of each input feature for each sample
    labels: array of string labels classifying each sample
    model_output_path: path for storing the trained svm model
    """
    # save 20% of data for performance evaluation
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, labels, test_size=0.2)

    param = [
        {
            "kernel": ["linear"],
            "C": [1, 10, 100, 1000]
        },
        {
            "kernel": ["rbf"],
            "C": [1, 10, 100, 1000],
            "gamma": [1e-2, 1e-3, 1e-4, 1e-5]
        }
    ]

    # request probability estimation
    svm = SVC(probability=True)

    # 10-fold cross validation, use 4 thread as each fold and each parameter set can be train in parallel
    clf = grid_search.GridSearchCV(svm, param,
            cv=10, n_jobs=20, verbose=3)

    clf.fit(X_train, y_train)

    if os.path.exists(model_output_path):
        joblib.dump(clf.best_estimator_, model_output_path)
    else:
        print("Cannot save trained svm model to {0}.".format(model_output_path))

    print("\nBest parameters set:")
    print(clf.best_params_)

    y_predict=clf.predict(X_test)

    labels=sorted(list(set(labels)))
    print("\nConfusion matrix:")
    print("Labels: {0}\n".format(",".join(labels)))
    print(confusion_matrix(y_test, y_predict, labels=labels))

    print("\nClassification report:")
    print(classification_report(y_test, y_predict)) 
Example #26
Source File: training.py    From sigver with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def train_wdclassifier_user(training_set: Tuple[np.ndarray, np.ndarray],
                            svmType: str,
                            C: float,
                            gamma: Optional[float]) -> sklearn.svm.SVC:
    """ Trains an SVM classifier for a user

    Parameters
    ----------
    training_set: Tuple (x, y)
        The training set (features and labels). y should have labels -1 and 1
    svmType: string ('linear' or 'rbf')
        The SVM type
    C: float
        Regularization for the SVM optimization
    gamma: float
        Hyperparameter for the RBF kernel

    Returns
    -------
    sklearn.svm.SVC:
        The learned classifier

    """

    assert svmType in ['linear', 'rbf']

    train_x = training_set[0]
    train_y = training_set[1]

    # Adjust for the skew between positive and negative classes
    n_genuine = len([x for x in train_y if x == 1])
    n_forg = len([x for x in train_y if x == -1])
    skew = n_forg / float(n_genuine)

    # Train the model
    if svmType == 'rbf':
        model = sklearn.svm.SVC(C=C, gamma=gamma, class_weight={1: skew})
    else:
        model = sklearn.svm.SVC(kernel='linear', C=C, class_weight={1: skew})

    model_with_scaler = pipeline.Pipeline([('scaler', preprocessing.StandardScaler(with_mean=False)),
                                           ('classifier', model)])

    model_with_scaler.fit(train_x, train_y)

    return model_with_scaler 
Example #27
Source File: training.py    From sigver with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def train_all_users(exp_train: Tuple[np.ndarray, np.ndarray, np.ndarray],
                    dev_set: Tuple[np.ndarray, np.ndarray, np.ndarray],
                    svm_type: str,
                    C: float,
                    gamma: float,
                    num_forg_from_dev: int,
                    num_forg_from_exp: int,
                    rng: np.random.RandomState) -> Dict[int, sklearn.svm.SVC]:
    """ Train classifiers for all users in the exploitation set

    Parameters
    ----------
    exp_train: tuple of np.ndarray (x, y, yforg)
        The training set split of the exploitation set (system users)
    dev_set: tuple of np.ndarray (x, y, yforg)
        The development set
    svm_type: string ('linear' or 'rbf')
        The SVM type
    C: float
        Regularization for the SVM optimization
    gamma: float
        Hyperparameter for the RBF kernel
    num_forg_from_dev: int
        Number of forgeries from each user in the development set to
        consider as negative samples
    num_forg_from_exp: int
        Number of forgeries from each user in the exploitation set (other
        than the current user) to consider as negative sample.
    rng: np.random.RandomState
        The random number generator (for reproducibility)

    Returns
    -------
    Dict int -> sklearn.svm.SVC
        A dictionary of trained classifiers, where the keys are the users.

    """
    classifiers = {}

    exp_y = exp_train[1]
    users = np.unique(exp_y)

    if num_forg_from_dev > 0:
        other_negatives = data.get_random_forgeries_from_dev(dev_set, num_forg_from_dev, rng)
    else:
        other_negatives = []

    for user in tqdm(users):
        training_set = data.create_training_set_for_user(user, exp_train, num_forg_from_exp, other_negatives, rng)
        classifiers[user] = train_wdclassifier_user(training_set, svm_type, C, gamma)

    return classifiers 
Example #28
Source File: training.py    From sigver with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_all_users(classifier_all_user: Dict[int, sklearn.svm.SVC],
                   exp_test: Tuple[np.ndarray, np.ndarray, np.ndarray],
                   global_threshold: float) -> Dict:
    """ Test classifiers for all users and return the metrics

    Parameters
    ----------
    classifier_all_user: dict (int -> sklearn.svm.SVC)
        The trained classifiers for all users
    exp_test: tuple of np.ndarray (x, y, yforg)
        The testing set split from the exploitation set
    global_threshold: float
        The threshold used to compute false acceptance and
        false rejection rates

    Returns
    -------
    dict
        A dictionary containing a variety of metrics, including
        false acceptance and rejection rates, equal error rates

    """
    xfeatures_test, y_test, yforg_test = exp_test

    genuinePreds = []
    randomPreds = []
    skilledPreds = []

    users = np.unique(y_test)
    for user in users:
        model = classifier_all_user[user]

        # Test the performance for the user without replicates
        skilled_forgeries = xfeatures_test[(y_test == user) & (yforg_test == 1)]
        test_genuine = xfeatures_test[(y_test == user) & (yforg_test == 0)]
        random_forgeries = xfeatures_test[(y_test != user) & (yforg_test == 0)]

        genuinePredUser = model.decision_function(test_genuine)
        skilledPredUser = model.decision_function(skilled_forgeries)
        randomPredUser = model.decision_function(random_forgeries)

        genuinePreds.append(genuinePredUser)
        skilledPreds.append(skilledPredUser)
        randomPreds.append(randomPredUser)

    # Calculate al metrics (EER, FAR, FRR and AUC)
    all_metrics = metrics.compute_metrics(genuinePreds, randomPreds, skilledPreds, global_threshold)

    results = {'all_metrics': all_metrics,
               'predictions': {'genuinePreds': genuinePreds,
                               'randomPreds': randomPreds,
                               'skilledPreds': skilledPreds}}

    print(all_metrics['EER'], all_metrics['EER_userthresholds'])
    return results 
Example #29
Source File: training.py    From sigver with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def train_test_all_users(exp_set: Tuple[np.ndarray, np.ndarray, np.ndarray],
                         dev_set: Tuple[np.ndarray, np.ndarray, np.ndarray],
                         svm_type: str,
                         C: float,
                         gamma: float,
                         num_gen_train: int,
                         num_forg_from_exp: int,
                         num_forg_from_dev: int,
                         num_gen_test: int,
                         global_threshold: float = 0,
                         rng: np.random.RandomState = np.random.RandomState()) \
        -> Tuple[Dict[int, sklearn.svm.SVC], Dict]:
    """ Train and test classifiers for every user in the exploitation set,
        and returns the metrics.

    Parameters
    ----------
    exp_set: tuple of np.ndarray (x, y, yforg)
        The exploitation set
    dev_set: tuple of np.ndarray (x, y, yforg)
        The development set
    svm_type: string ('linear' or 'rbf')
        The SVM type
    C: float
        Regularization for the SVM optimization
    gamma: float
        Hyperparameter for the RBF kernel
    num_gen_train: int
        Number of genuine signatures available for training
    num_forg_from_dev: int
        Number of forgeries from each user in the development set to
        consider as negative samples
    num_forg_from_exp: int
        Number of forgeries from each user in the exploitation set (other
        than the current user) to consider as negative sample.
    num_gen_test: int
        Number of genuine signatures for testing
    global_threshold: float
        The threshold used to compute false acceptance and
        false rejection rates
    rng: np.random.RandomState
        The random number generator (for reproducibility)

    Returns
    -------
    dict (int -> sklearn.svm.SVC)
        The classifiers for all users

    dict
        A dictionary containing a variety of metrics, including
        false acceptance and rejection rates, equal error rates

    """
    exp_train, exp_test = data.split_train_test(exp_set, num_gen_train, num_gen_test, rng)

    classifiers = train_all_users(exp_train, dev_set, svm_type, C, gamma,
                                  num_forg_from_dev, num_forg_from_exp, rng)

    results = test_all_users(classifiers, exp_test, global_threshold)

    return classifiers, results 
Example #30
Source File: test_frame.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_predict_proba(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        models = ['SVC']
        for model in models:
            mod1 = getattr(df.svm, model)(probability=True,
                                          random_state=self.random_state)
            mod2 = getattr(svm, model)(probability=True,
                                       random_state=self.random_state)

            df.fit(mod1)
            mod2.fit(iris.data, iris.target)

            result = df.predict(mod1)
            expected = mod2.predict(iris.data)

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected)

            result = df.predict_proba(mod1)
            expected = mod2.predict_proba(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_index_equal(result.index, df.index)
            self.assert_numpy_array_almost_equal(result.values, expected)
            self.assert_numpy_array_almost_equal(df.proba.values, expected)

            result = df.predict_log_proba(mod1)
            expected = mod2.predict_log_proba(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_index_equal(result.index, df.index)
            self.assert_numpy_array_almost_equal(result.values, expected)
            self.assert_numpy_array_almost_equal(df.log_proba.values, expected)

            result = df.decision_function(mod1)
            expected = mod2.decision_function(iris.data)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_index_equal(result.index, df.index)
            self.assert_numpy_array_almost_equal(result.values, expected)
            self.assert_numpy_array_almost_equal(df.decision.values, expected)

            # not reset if estimator is identical
            df.fit(mod1)
            self.assertFalse(df._predicted is None)
            self.assertFalse(df._proba is None)
            self.assertFalse(df._log_proba is None)
            self.assertFalse(df._decision is None)

            # reset estimator
            mod3 = getattr(df.svm, model)(probability=True,
                                          random_state=self.random_state)
            df.fit(mod3)
            self.assertTrue(df._predicted is None)
            self.assertTrue(df._proba is None)
            self.assertTrue(df._log_proba is None)
            self.assertTrue(df._decision is None)