Python sklearn.svm() Examples
The following are 30
code examples of sklearn.svm().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn
, or try the search function
.
Example #1
Source File: classify_shark.py From ibeis with Apache License 2.0 | 6 votes |
def fit_new_classifier(problem, train_idx): """ References: http://leon.bottou.org/research/stochastic http://blog.explainmydata.com/2012/06/ntrain-24853-ntest-25147-ncorrupt.html http://scikit-learn.org/stable/modules/svm.html#svm-classification http://scikit-learn.org/stable/modules/grid_search.html """ print('[problem] train classifier on %d data points' % (len(train_idx))) data = problem.ds.data target = problem.ds.target x_train = data.take(train_idx, axis=0) y_train = target.take(train_idx, axis=0) clf = sklearn.svm.SVC(kernel=str('linear'), C=.17, class_weight='balanced', decision_function_shape='ovr') # C, penalty, loss #param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], # 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } #param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], # 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } #clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid) #clf = clf.fit(X_train_pca, y_train) clf.fit(x_train, y_train) return clf
Example #2
Source File: sklearn_intent_classifier.py From Rasa_NLU_Chi with Apache License 2.0 | 6 votes |
def _create_classifier(self, num_threads, y): from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC C = self.component_config["C"] kernels = self.component_config["kernels"] # dirty str fix because sklearn is expecting # str not instance of basestr... tuned_parameters = [{"C": C, "kernel": [str(k) for k in kernels]}] # aim for 5 examples in each fold cv_splits = self._num_cv_splits(y) return GridSearchCV(SVC(C=1, probability=True, class_weight='balanced'), param_grid=tuned_parameters, n_jobs=num_threads, cv=cv_splits, scoring='f1_weighted', verbose=1)
Example #3
Source File: test_monkeypatch.py From daal4py with Apache License 2.0 | 6 votes |
def test_monkey_patching(self): _tokens = daal4py.sklearn.sklearn_patch_names() self.assertTrue(isinstance(_tokens, list) and len(_tokens) > 0) for t in _tokens: daal4py.sklearn.unpatch_sklearn(t) for t in _tokens: daal4py.sklearn.patch_sklearn(t) import sklearn for a in [(sklearn.decomposition, 'PCA'), (sklearn.linear_model, 'Ridge'), (sklearn.linear_model, 'LinearRegression'), (sklearn.cluster, 'KMeans'), (sklearn.svm, 'SVC'),]: class_module = getattr(a[0], a[1]).__module__ self.assertTrue(class_module.startswith('daal4py'))
Example #4
Source File: sklearn_intent_classifier.py From rasa_nlu with Apache License 2.0 | 6 votes |
def _create_classifier(self, num_threads, y): from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC C = self.component_config["C"] kernels = self.component_config["kernels"] gamma = self.component_config["gamma"] # dirty str fix because sklearn is expecting # str not instance of basestr... tuned_parameters = [{"C": C, "gamma": gamma, "kernel": [str(k) for k in kernels]}] # aim for 5 examples in each fold cv_splits = self._num_cv_splits(y) return GridSearchCV(SVC(C=1, probability=True, class_weight='balanced'), param_grid=tuned_parameters, n_jobs=num_threads, cv=cv_splits, scoring=self.component_config['scoring_function'], verbose=1)
Example #5
Source File: baselines.py From rmnist with MIT License | 5 votes |
def baselines(n): td, vd, ts = data_loader.load_data(n) classifiers = [ sklearn.svm.SVC(C=1000), sklearn.svm.SVC(kernel="linear", C=0.1), sklearn.neighbors.KNeighborsClassifier(1), sklearn.tree.DecisionTreeClassifier(), sklearn.ensemble.RandomForestClassifier(max_depth=10, n_estimators=500, max_features=1), sklearn.neural_network.MLPClassifier(alpha=1, hidden_layer_sizes=(500, 100)) ] for clf in classifiers: clf.fit(td[0], td[1]) print "\n{}: {}".format(type(clf).__name__, round(clf.score(vd[0], vd[1])*100, 2))
Example #6
Source File: training.py From sigver with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_user(model: sklearn.svm.SVC, genuine_signatures: np.ndarray, random_forgeries: np.ndarray, skilled_forgeries: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Test the WD classifier of an user Parameters ---------- model: sklearn.svm.SVC The learned classifier genuine_signatures: np.ndarray Genuine signatures for test random_forgeries: np.ndarray Random forgeries for test (signatures from other users) skilled_forgeries: np.ndarray Skilled forgeries for test Returns ------- np.ndarray, np.ndarray, np.ndarray The predictions(scores) for genuine signatures, random forgeries and skilled forgeries """ # Get predictions genuinePred = model.decision_function(genuine_signatures) randomPred = model.decision_function(random_forgeries) skilledPred = model.decision_function(skilled_forgeries) return genuinePred, randomPred, skilledPred
Example #7
Source File: svm.py From ibench with MIT License | 5 votes |
def _make_args(self, n): self._X, self._y = self._gen_datasets(features[n-1],vectors[n-1],2) self._clf = svm.SVC(C=0.01, kernel='linear', max_iter=10000, tol=1e-16, shrinking=True)
Example #8
Source File: run_models.py From AirBnbPricePrediction with MIT License | 5 votes |
def svm(X_train, y_train, X_val, y_val): model = SVR(gamma = 0.05, verbose = True) #was empty #0.1 #the - best gamma 0.05, c=0.5 model.fit(X_train, y_train) print_evaluation_metrics(model, "svm", X_val, y_val.values.ravel()) print_evaluation_metrics2(model, "svm", X_train, y_train.values.ravel())
Example #9
Source File: sklearn_intent_classifier.py From rasa-for-botfront with Apache License 2.0 | 5 votes |
def _create_classifier( self, num_threads: int, y ) -> "sklearn.model_selection.GridSearchCV": from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC C = self.component_config["C"] kernels = self.component_config["kernels"] gamma = self.component_config["gamma"] # dirty str fix because sklearn is expecting # str not instance of basestr... tuned_parameters = [ {"C": C, "gamma": gamma, "kernel": [str(k) for k in kernels]} ] # aim for 5 examples in each fold cv_splits = self._num_cv_splits(y) return GridSearchCV( SVC(C=1, probability=True, class_weight="balanced"), param_grid=tuned_parameters, n_jobs=num_threads, cv=cv_splits, scoring=self.component_config["scoring_function"], verbose=1, iid=False, )
Example #10
Source File: tools.py From neural-tangent-kernel-UCI with Apache License 2.0 | 5 votes |
def svm(K1, K2, y1, y2, C, c): n_val, n_train = K2.shape clf = SVC(kernel = "precomputed", C = C, cache_size = 100000) clf.fit(K1, y1) z = clf.predict(K2) return 1.0 * np.sum(z == y2) / n_val
Example #11
Source File: example.py From d6tflow with MIT License | 5 votes |
def run(self): df_train = self.input().load() if self.model=='ols': model = sklearn.linear_model.LogisticRegression() elif self.model=='svm': model = sklearn.svm.SVC() else: raise ValueError('invalid model selection') model.fit(df_train.iloc[:,:-1], df_train['y']) self.save(model) # Check task dependencies and their execution status
Example #12
Source File: unit_tests.py From pynisher with MIT License | 5 votes |
def svc_example(n_samples = 10000, n_features = 4): from sklearn.svm import LinearSVC from sklearn.preprocessing import PolynomialFeatures from sklearn.datasets import make_classification X,Y = make_classification(n_samples, n_features) #pp = PolynomialFeatures(degree=3) #X = pp.fit_transform(X) m = LinearSVC() m.fit(X,Y)
Example #13
Source File: unit_tests.py From pynisher with MIT License | 5 votes |
def svm_example(n_samples = 10000, n_features = 100): from sklearn.svm import SVR from sklearn.datasets import make_regression X,Y = make_regression(n_samples, n_features) m = SVR() m.fit(X,Y)
Example #14
Source File: field_based_ml_field_detection.py From lexpredict-contraxsuite with GNU Affero General Public License v3.0 | 5 votes |
def init_classifier_impl(field_code: str, init_script: str): if init_script is not None: init_script = init_script.strip() if not init_script: from sklearn import tree as sklearn_tree return sklearn_tree.DecisionTreeClassifier() from sklearn import tree as sklearn_tree from sklearn import neural_network as sklearn_neural_network from sklearn import neighbors as sklearn_neighbors from sklearn import svm as sklearn_svm from sklearn import gaussian_process as sklearn_gaussian_process from sklearn.gaussian_process import kernels as sklearn_gaussian_process_kernels from sklearn import ensemble as sklearn_ensemble from sklearn import naive_bayes as sklearn_naive_bayes from sklearn import discriminant_analysis as sklearn_discriminant_analysis from sklearn import linear_model as sklearn_linear_model eval_locals = { 'sklearn_linear_model': sklearn_linear_model, 'sklearn_tree': sklearn_tree, 'sklearn_neural_network': sklearn_neural_network, 'sklearn_neighbors': sklearn_neighbors, 'sklearn_svm': sklearn_svm, 'sklearn_gaussian_process': sklearn_gaussian_process, 'sklearn_gaussian_process_kernels': sklearn_gaussian_process_kernels, 'sklearn_ensemble': sklearn_ensemble, 'sklearn_naive_bayes': sklearn_naive_bayes, 'sklearn_discriminant_analysis': sklearn_discriminant_analysis } return eval_script('classifier init script of field {0}'.format(field_code), init_script, eval_locals)
Example #15
Source File: transfer.py From rmnist with MIT License | 5 votes |
def transfer(n): td, vd, ts = data_loader.load_data(n, abstract=True, expanded=expanded) classifiers = [ #sklearn.svm.SVC(), #sklearn.svm.SVC(kernel="linear", C=0.1), #sklearn.neighbors.KNeighborsClassifier(1), #sklearn.tree.DecisionTreeClassifier(), #sklearn.ensemble.RandomForestClassifier(max_depth=10, n_estimators=500, max_features=1), sklearn.neural_network.MLPClassifier(alpha=1.0, hidden_layer_sizes=(300,), max_iter=500) ] for clf in classifiers: clf.fit(td[0], td[1]) print "\n{}: {}".format(type(clf).__name__, round(clf.score(vd[0], vd[1])*100, 2))
Example #16
Source File: svmClassifier.py From TBBTCorpus with Apache License 2.0 | 5 votes |
def start_program(): Total_correct = 0 Total_labelled = 0 clf = svm.SVC(gamma=0.001, C=50, kernel='rbf') train_features = [] train_labels = [] test_features = [] test_labels = [] for season in range(1,5): for episode in range(1,Season_Episode_Mapping[season]-4): features, labels = episode2feature(season,episode) train_features.extend(features) train_labels.extend(labels) #print(all_features) for season in range(5,8): for episode in range(Season_Episode_Mapping[season]-4,Season_Episode_Mapping[season]+1): features, labels = episode2feature(season,episode) test_features.extend(features) test_labels.extend(labels) #print(train_features) clf.fit(train_features,train_labels) result = clf.predict(test_features) txt = "\n Speaker\tPrecision\tRecall\t\tF1\n" for i in range(1,7): precision, recall,f1_score,correct,total = get_stats(result, train_labels,i) Total_correct += correct Total_labelled += total txt += speaker_rev_enum[i]+"\t\t"+ str(format(precision,'.2f'))+"\t\t"+str(format(recall,'.2f'))+"\t\t"+str(format(f1_score,'.2f'))+"\n" with open("output.txt","w") as fh: fh.write(txt) print("Accuracy of the system is : "+str(Total_correct/Total_labelled))
Example #17
Source File: svm-bagofWords.py From TBBTCorpus with Apache License 2.0 | 5 votes |
def __init__(self, path): self.train_data = [] self.test_data = [] self.train_labels = [] self.test_labels = [] self.classification = [] self.svm_classifier = svm.SVC(gamma=0.001, C=50,decision_function_shape='ovr',kernel='rbf') self.corpus_path = path self.corpus = {} self.vocab = []
Example #18
Source File: helpers.py From MachineLearningSamples-ImageClassificationUsingCntk with MIT License | 5 votes |
def runClassifier(classifier, dnnOutput, imgDict = [], lutLabel2Id = [], svmPath = [], svm_boL2Normalize = []): # Run classifier on all known images, if not otherwise specified if imgDict == []: imgDict = {} for label in list(dnnOutput.keys()): imgDict[label] = list(dnnOutput[label].keys()) # Compute SVM classification scores if classifier.startswith('svm'): learner = readPickle(svmPath) feats, gtLabels, imgFilenames = getSvmInput(imgDict, dnnOutput, svm_boL2Normalize, lutLabel2Id) print("Evaluate SVM...") scoresMatrix = learner.decision_function(feats) # If binary classification problem then manually create 2nd column # Note: scoresMatrix is of size nrImages x nrClasses if len(scoresMatrix.shape) == 1: scoresMatrix = [[-scoresMatrix[i],scoresMatrix[i]] for i in range(len(scoresMatrix))] scoresMatrix = np.array(scoresMatrix) # Get DNN classification scores else: gtLabels = [] scoresMatrix = [] imgFilenames = [] for label in list(imgDict.keys()): for imgFilename in imgDict[label]: scores = dnnOutput[label][imgFilename] if lutLabel2Id == []: gtLabels.append(label) else: gtLabels.append(int(lutLabel2Id[label])) scoresMatrix.append(scores) imgFilenames.append(imgFilename) scoresMatrix = np.vstack(scoresMatrix) return scoresMatrix, imgFilenames, gtLabels
Example #19
Source File: helpers.py From MachineLearningSamples-ImageClassificationUsingCntk with MIT License | 5 votes |
def getModelNode(classifier): if classifier.startswith("svm"): node = "poolingLayer" else: node = [] return(node)
Example #20
Source File: classify_shark.py From ibeis with Apache License 2.0 | 5 votes |
def fit_new_linear_svm(problem, train_idx): print('[problem] train classifier on %d data points' % (len(train_idx))) data = problem.ds.data target = problem.ds.target x_train = data.take(train_idx, axis=0) y_train = target.take(train_idx, axis=0) clf = sklearn.svm.SVC(kernel=str('linear'), C=.17, class_weight='balanced', decision_function_shape='ovr') clf.fit(x_train, y_train)
Example #21
Source File: test.py From object_centric_VAD with MIT License | 5 votes |
def arg_parse(): parser = argparse.ArgumentParser() parser.add_argument('-g', '--gpu', type=str, default='0', help='Use which gpu?') parser.add_argument('-d', '--dataset', type=str, help='Train on which dataset') parser.add_argument('-b','--bn',type=bool,default=False,help='whether to use BN layer') parser.add_argument('--model_path',type=str,help='Path to saved tensorflow CAE model') parser.add_argument('--graph_path',type=str,help='Path to saved object detection frozen graph model') parser.add_argument('--svm_model',type=str,help='Path to saved svm model') parser.add_argument('--dataset_folder',type=str,help='Dataset Fodlder Path') parser.add_argument('-c','--class_add',type=bool,default=False,help='Whether to add class one-hot embedding to the featrue') parser.add_argument('-n','--norm',type=int,default=0,help='Whether to use Normalization to the Feature and the normalization level') parser.add_argument('--test_CAE',type=bool,default=False,help='Whether to test CAE') parser.add_argument('--matlab',type=bool,default=False,help='Whether to use matlab weights and biases to test') args = parser.parse_args() return args
Example #22
Source File: test_frame.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 4 votes |
def test_predict_automatic(self): with warnings.catch_warnings(): warnings.simplefilter("always", UserWarning) iris = datasets.load_iris() df = pdml.ModelFrame(iris) model = 'SVC' df = pdml.ModelFrame(iris) mod1 = getattr(df.svm, model)(probability=True, random_state=self.random_state) mod2 = getattr(svm, model)(probability=True, random_state=self.random_state) df.fit(mod1) mod2.fit(iris.data, iris.target) # test automatically calls related methods with tm.assert_produces_warning(UserWarning): result = df.predicted expected = mod2.predict(iris.data) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected) # with tm.assert_produces_warning(UserWarning): result = df.proba expected = mod2.predict_proba(iris.data) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_index_equal(result.index, df.index) self.assert_numpy_array_almost_equal(result.values, expected) with tm.assert_produces_warning(UserWarning): result = df.log_proba expected = mod2.predict_log_proba(iris.data) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_index_equal(result.index, df.index) self.assert_numpy_array_almost_equal(result.values, expected) # with tm.assert_produces_warning(UserWarning): result = df.decision expected = mod2.decision_function(iris.data) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_index_equal(result.index, df.index) self.assert_numpy_array_almost_equal(result.values, expected) warnings.simplefilter("default")
Example #23
Source File: test_frame.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 4 votes |
def test_predict_proba(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) models = ['SVC'] for model in models: mod1 = getattr(df.svm, model)(probability=True, random_state=self.random_state) mod2 = getattr(svm, model)(probability=True, random_state=self.random_state) df.fit(mod1) mod2.fit(iris.data, iris.target) result = df.predict(mod1) expected = mod2.predict(iris.data) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected) result = df.predict_proba(mod1) expected = mod2.predict_proba(iris.data) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_index_equal(result.index, df.index) self.assert_numpy_array_almost_equal(result.values, expected) self.assert_numpy_array_almost_equal(df.proba.values, expected) result = df.predict_log_proba(mod1) expected = mod2.predict_log_proba(iris.data) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_index_equal(result.index, df.index) self.assert_numpy_array_almost_equal(result.values, expected) self.assert_numpy_array_almost_equal(df.log_proba.values, expected) result = df.decision_function(mod1) expected = mod2.decision_function(iris.data) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_index_equal(result.index, df.index) self.assert_numpy_array_almost_equal(result.values, expected) self.assert_numpy_array_almost_equal(df.decision.values, expected) # not reset if estimator is identical df.fit(mod1) self.assertFalse(df._predicted is None) self.assertFalse(df._proba is None) self.assertFalse(df._log_proba is None) self.assertFalse(df._decision is None) # reset estimator mod3 = getattr(df.svm, model)(probability=True, random_state=self.random_state) df.fit(mod3) self.assertTrue(df._predicted is None) self.assertTrue(df._proba is None) self.assertTrue(df._log_proba is None) self.assertTrue(df._decision is None)
Example #24
Source File: sklearn_svm.py From android-malware-analysis with GNU General Public License v3.0 | 4 votes |
def train_svm_classifer(features, labels, model_output_path): """ train_svm_classifer will train a SVM, saved the trained and SVM model and report the classification performance features: 2D array of each input feature for each sample labels: array of string labels classifying each sample model_output_path: path for storing the trained svm model """ # save 20% of data for performance evaluation X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, labels, test_size=0.2) param = [ { "kernel": ["linear"], "C": [1, 10, 100, 1000] }, { "kernel": ["rbf"], "C": [1, 10, 100, 1000], "gamma": [1e-2, 1e-3, 1e-4, 1e-5] } ] # request probability estimation svm = SVC(probability=True) # 10-fold cross validation, use 4 thread as each fold and each parameter set can be train in parallel clf = grid_search.GridSearchCV(svm, param, cv=10, n_jobs=20, verbose=3) clf.fit(X_train, y_train) if os.path.exists(model_output_path): joblib.dump(clf.best_estimator_, model_output_path) else: print("Cannot save trained svm model to {0}.".format(model_output_path)) print("\nBest parameters set:") print(clf.best_params_) y_predict=clf.predict(X_test) labels=sorted(list(set(labels))) print("\nConfusion matrix:") print("Labels: {0}\n".format(",".join(labels))) print(confusion_matrix(y_test, y_predict, labels=labels)) print("\nClassification report:") print(classification_report(y_test, y_predict))
Example #25
Source File: training.py From sigver with BSD 3-Clause "New" or "Revised" License | 4 votes |
def train_test_all_users(exp_set: Tuple[np.ndarray, np.ndarray, np.ndarray], dev_set: Tuple[np.ndarray, np.ndarray, np.ndarray], svm_type: str, C: float, gamma: float, num_gen_train: int, num_forg_from_exp: int, num_forg_from_dev: int, num_gen_test: int, global_threshold: float = 0, rng: np.random.RandomState = np.random.RandomState()) \ -> Tuple[Dict[int, sklearn.svm.SVC], Dict]: """ Train and test classifiers for every user in the exploitation set, and returns the metrics. Parameters ---------- exp_set: tuple of np.ndarray (x, y, yforg) The exploitation set dev_set: tuple of np.ndarray (x, y, yforg) The development set svm_type: string ('linear' or 'rbf') The SVM type C: float Regularization for the SVM optimization gamma: float Hyperparameter for the RBF kernel num_gen_train: int Number of genuine signatures available for training num_forg_from_dev: int Number of forgeries from each user in the development set to consider as negative samples num_forg_from_exp: int Number of forgeries from each user in the exploitation set (other than the current user) to consider as negative sample. num_gen_test: int Number of genuine signatures for testing global_threshold: float The threshold used to compute false acceptance and false rejection rates rng: np.random.RandomState The random number generator (for reproducibility) Returns ------- dict (int -> sklearn.svm.SVC) The classifiers for all users dict A dictionary containing a variety of metrics, including false acceptance and rejection rates, equal error rates """ exp_train, exp_test = data.split_train_test(exp_set, num_gen_train, num_gen_test, rng) classifiers = train_all_users(exp_train, dev_set, svm_type, C, gamma, num_forg_from_dev, num_forg_from_exp, rng) results = test_all_users(classifiers, exp_test, global_threshold) return classifiers, results
Example #26
Source File: training.py From sigver with BSD 3-Clause "New" or "Revised" License | 4 votes |
def test_all_users(classifier_all_user: Dict[int, sklearn.svm.SVC], exp_test: Tuple[np.ndarray, np.ndarray, np.ndarray], global_threshold: float) -> Dict: """ Test classifiers for all users and return the metrics Parameters ---------- classifier_all_user: dict (int -> sklearn.svm.SVC) The trained classifiers for all users exp_test: tuple of np.ndarray (x, y, yforg) The testing set split from the exploitation set global_threshold: float The threshold used to compute false acceptance and false rejection rates Returns ------- dict A dictionary containing a variety of metrics, including false acceptance and rejection rates, equal error rates """ xfeatures_test, y_test, yforg_test = exp_test genuinePreds = [] randomPreds = [] skilledPreds = [] users = np.unique(y_test) for user in users: model = classifier_all_user[user] # Test the performance for the user without replicates skilled_forgeries = xfeatures_test[(y_test == user) & (yforg_test == 1)] test_genuine = xfeatures_test[(y_test == user) & (yforg_test == 0)] random_forgeries = xfeatures_test[(y_test != user) & (yforg_test == 0)] genuinePredUser = model.decision_function(test_genuine) skilledPredUser = model.decision_function(skilled_forgeries) randomPredUser = model.decision_function(random_forgeries) genuinePreds.append(genuinePredUser) skilledPreds.append(skilledPredUser) randomPreds.append(randomPredUser) # Calculate al metrics (EER, FAR, FRR and AUC) all_metrics = metrics.compute_metrics(genuinePreds, randomPreds, skilledPreds, global_threshold) results = {'all_metrics': all_metrics, 'predictions': {'genuinePreds': genuinePreds, 'randomPreds': randomPreds, 'skilledPreds': skilledPreds}} print(all_metrics['EER'], all_metrics['EER_userthresholds']) return results
Example #27
Source File: training.py From sigver with BSD 3-Clause "New" or "Revised" License | 4 votes |
def train_all_users(exp_train: Tuple[np.ndarray, np.ndarray, np.ndarray], dev_set: Tuple[np.ndarray, np.ndarray, np.ndarray], svm_type: str, C: float, gamma: float, num_forg_from_dev: int, num_forg_from_exp: int, rng: np.random.RandomState) -> Dict[int, sklearn.svm.SVC]: """ Train classifiers for all users in the exploitation set Parameters ---------- exp_train: tuple of np.ndarray (x, y, yforg) The training set split of the exploitation set (system users) dev_set: tuple of np.ndarray (x, y, yforg) The development set svm_type: string ('linear' or 'rbf') The SVM type C: float Regularization for the SVM optimization gamma: float Hyperparameter for the RBF kernel num_forg_from_dev: int Number of forgeries from each user in the development set to consider as negative samples num_forg_from_exp: int Number of forgeries from each user in the exploitation set (other than the current user) to consider as negative sample. rng: np.random.RandomState The random number generator (for reproducibility) Returns ------- Dict int -> sklearn.svm.SVC A dictionary of trained classifiers, where the keys are the users. """ classifiers = {} exp_y = exp_train[1] users = np.unique(exp_y) if num_forg_from_dev > 0: other_negatives = data.get_random_forgeries_from_dev(dev_set, num_forg_from_dev, rng) else: other_negatives = [] for user in tqdm(users): training_set = data.create_training_set_for_user(user, exp_train, num_forg_from_exp, other_negatives, rng) classifiers[user] = train_wdclassifier_user(training_set, svm_type, C, gamma) return classifiers
Example #28
Source File: training.py From sigver with BSD 3-Clause "New" or "Revised" License | 4 votes |
def train_wdclassifier_user(training_set: Tuple[np.ndarray, np.ndarray], svmType: str, C: float, gamma: Optional[float]) -> sklearn.svm.SVC: """ Trains an SVM classifier for a user Parameters ---------- training_set: Tuple (x, y) The training set (features and labels). y should have labels -1 and 1 svmType: string ('linear' or 'rbf') The SVM type C: float Regularization for the SVM optimization gamma: float Hyperparameter for the RBF kernel Returns ------- sklearn.svm.SVC: The learned classifier """ assert svmType in ['linear', 'rbf'] train_x = training_set[0] train_y = training_set[1] # Adjust for the skew between positive and negative classes n_genuine = len([x for x in train_y if x == 1]) n_forg = len([x for x in train_y if x == -1]) skew = n_forg / float(n_genuine) # Train the model if svmType == 'rbf': model = sklearn.svm.SVC(C=C, gamma=gamma, class_weight={1: skew}) else: model = sklearn.svm.SVC(kernel='linear', C=C, class_weight={1: skew}) model_with_scaler = pipeline.Pipeline([('scaler', preprocessing.StandardScaler(with_mean=False)), ('classifier', model)]) model_with_scaler.fit(train_x, train_y) return model_with_scaler
Example #29
Source File: tracklet_utils_3c.py From TNT with GNU General Public License v3.0 | 4 votes |
def get_tracklet_scores(): global track_struct # svm score track_struct['tracklet_mat']['svm_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \ track_struct['tracklet_mat']['xmin_mat'].shape[1])) num_det = track_struct['tracklet_mat']['appearance_fea_mat'].shape[0] clf = joblib.load(svm_model_path) pred_s = np.zeros((num_det,1)) pred_s[:,0] = clf.decision_function(track_struct['tracklet_mat']['appearance_fea_mat'][:,2:]) for n in range(num_det): track_struct['tracklet_mat']['svm_score_mat'][int(track_struct['tracklet_mat']['appearance_fea_mat'][n,0])-1, \ int(track_struct['tracklet_mat']['appearance_fea_mat'][n,1])-1] = pred_s[n,0] # h_score and y_score track_struct['tracklet_mat']['h_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \ track_struct['tracklet_mat']['xmin_mat'].shape[1])) track_struct['tracklet_mat']['y_score_mat'] = -1*np.ones((track_struct['tracklet_mat']['xmin_mat'].shape[0], \ track_struct['tracklet_mat']['xmin_mat'].shape[1])) hloc = np.zeros(num_det) yloc = np.zeros(num_det) cnt = 0 for n in range(track_struct['tracklet_mat']['xmin_mat'].shape[0]): idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0] hloc[cnt:cnt+len(idx)] = track_struct['tracklet_mat']['ymax_mat'][n,idx]-track_struct['tracklet_mat']['ymin_mat'][n,idx] yloc[cnt:cnt+len(idx)] = track_struct['tracklet_mat']['ymax_mat'][n,idx] cnt = cnt+len(idx) ph, py = track_lib.estimate_h_y(hloc, yloc) A = np.ones((hloc.shape[0],2)) A[:,0] = yloc y_err = (np.matmul(A,ph)-hloc)/hloc err_std = np.std(y_err) h_score = np.zeros((y_err.shape[0],1)) h_score[:,0] = np.exp(-np.power(y_err,2)/(err_std*err_std)) A = np.ones((hloc.shape[0],2)) A[:,0] = hloc y_err = np.matmul(A,py)-yloc err_std = np.std(y_err) y_score = np.zeros((y_err.shape[0],1)) y_score[:,0] = np.exp(-np.power(y_err,2)/(err_std*err_std)) #import pdb; pdb.set_trace() cnt = 0 for n in range(track_struct['tracklet_mat']['xmin_mat'].shape[0]): idx = np.where(track_struct['tracklet_mat']['xmin_mat'][n,:]!=-1)[0] track_struct['tracklet_mat']['h_score_mat'][n,idx] = h_score[cnt:cnt+len(idx),0] track_struct['tracklet_mat']['y_score_mat'][n,idx] = y_score[cnt:cnt+len(idx),0] cnt = cnt+len(idx) return
Example #30
Source File: estimateVote.py From anomaly-event-detection with MIT License | 4 votes |
def train_svm_classifer(features, labels, model_output_path): """ train_svm_classifer will train a SVM, saved the trained and SVM model and report the classification performance features: array of input features labels: array of labels associated with the input features model_output_path: path for storing the trained svm model """ # save 20% of data for performance evaluation X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, labels, test_size=0.2) param = [ { "kernel": ["linear"], "C": [1, 10, 100, 1000] }, { "kernel": ["rbf"], "C": [1, 10, 100, 1000], "gamma": [1e-2, 1e-3, 1e-4, 1e-5] } ] # request probability estimation svm = SVC(probability=True) # 10-fold cross validation, use 4 thread as each fold and each parameter set can be train in parallel clf = grid_search.GridSearchCV(svm, param, cv=10, n_jobs=4, verbose=3) clf.fit(X_train, y_train) if os.path.exists(model_output_path): joblib.dump(clf.best_estimator_, model_output_path) else: print("Cannot save trained svm model to {0}.".format(model_output_path)) print("\nBest parameters set:") print(clf.best_params_) y_predict=clf.predict(X_test) # labels=sorted(list(set(labels))) labels = [0,1] print("\nConfusion matrix:") print("Labels: {0}\n".format(",".join(labels))) print(confusion_matrix(y_test, y_predict, labels=labels)) print("\nClassification report:") print(classification_report(y_test, y_predict))