Python sklearn.neighbors.KNeighborsClassifier() Examples

The following are code examples for showing how to use sklearn.neighbors.KNeighborsClassifier(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: Stock_Market_Forecast   Author: cuevas1208   File: sklearn_main.py    MIT License 10 votes vote down vote up
def train(x, y, valid_sample, ticker='stock'):
    logging.debug("X sample: \ {} ".format(len(x.shape)))
    logging.debug("y sample: \ {} ".format(len(y.shape)))

    # random shuffle and split
    test_size = int(len(y)*0.2)
    x_train, x_test, y_train, y_test = x[test_size:], x[:test_size], y[test_size:], y[:test_size]

    # combine the predictions of several base estimators
    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor', RandomForestClassifier())])

    clf.fit(x_train, y_train)

    # test data prediction
    np.set_printoptions(precision=2)
    confidence = clf.score(x_test, y_test)
    print('accuracy:', confidence)

    from visualization import matplot_graphs
    y_pred = clf.predict(valid_sample['x'])
    matplot_graphs.plot_histogram(y_pred, valid_sample['y'], valid_sample['dates'], ticker, confidence, PREDICTION_DAYS)

    return confidence, clf 
Example 2
Project: transferlearning   Author: jindongwang   File: main.py    MIT License 7 votes vote down vote up
def classify_1nn(data_train, data_test):
    '''
    Classification using 1NN
    Inputs: data_train, data_test: train and test csv file path
    Outputs: yprediction and accuracy
    '''
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler
    data = {'src': np.loadtxt(data_train, delimiter=','),
            'tar': np.loadtxt(data_test, delimiter=','),
            }
    Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, -
                                                      1], data['tar'][:, :-1], data['tar'][:, -1]
    Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs)
    Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt)
    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(Xs, Ys)
    ypred = clf.predict(Xt)
    acc = accuracy_score(y_true=Yt, y_pred=ypred)
    print('Acc: {:.4f}'.format(acc))
    return ypred, acc 
Example 3
Project: Attention-Based-Siamese-Text-CNN-for-Stance-Detection   Author: Yikai-Wang   File: naive_ml.py    MIT License 7 votes vote down vote up
def __init__(self, train_set, valid_set,multi_classification=False):
        self.train_set = train_set
        self.valid_set = valid_set
        self.multi_classification = multi_classification
        self.my_LR = sklearn.linear_model.logistic.LogisticRegression()
        self.my_RF = RandomForestClassifier(criterion='entropy',
                                            max_depth= 50,
                                            min_samples_leaf= 1,
                                            min_samples_split= 3,
                                            n_estimators= 50)
        self.my_P = Perceptron(max_iter=10000,tol=0.1)
        self.my_SVM_rbf = SVC(kernel='rbf', gamma=0.03, C=30,max_iter=10000)
        self.my_SVM_linear = SVC(kernel='linear', gamma=0.03, C=30,max_iter=10000)
        self.my_DT = DecisionTreeClassifier()
        self.my_NB = GaussianNB()
        self.my_KNN = KNeighborsClassifier(n_neighbors=3) 
Example 4
Project: associative_compression_networks   Author: jalexvig   File: acn.py    MIT License 6 votes vote down vote up
def __init__(self, k=5):
        """
        Args:
            k: Number of neighbors to choose from when picking code to condition prior.
        """

        super().__init__()

        self.fc1 = nn.Linear(CODE_LEN, 512)
        self.fc2_u = nn.Linear(512, CODE_LEN)
        self.fc2_s = nn.Linear(512, CODE_LEN)

        self.k = k
        self.knn = KNeighborsClassifier(n_neighbors=2 * k)

        codes = torch.randn((len(data_loader.dataset), CODE_LEN)).numpy()

        self.fit_knn(codes) 
Example 5
Project: AudioEmotionDetection   Author: DefinitelyNotTim   File: scikit_network.py    MIT License 6 votes vote down vote up
def compare_new(new_metrics, user_profile):
    # Changed the emotion data to use user profile data
    # Tim - 11/24
    emotion_data = user_profile.path
    df = pd.read_csv(emotion_data, header = None, sep = ',', names = ['Pitch', 'Tone', 'SPL', 'wordGap' , 'WordGapLen', 'Emotion'])

    data = df.values
    y = df['Emotion']
    X = df[['Pitch', 'Tone', 'SPL' , 'wordGap' , 'WordGapLen']]

    knn = KNeighborsClassifier(n_neighbors=1) #uses k nearest neighbor to find closest example

    knn.fit(X,y)

    new_metrics = new_metrics.reshape(1,-1)
    return(knn.predict(new_metrics)) 
Example 6
Project: Attention-Based-Siamese-Text-CNN-for-Stance-Detection   Author: Yikai-Wang   File: naive_ml.py    MIT License 6 votes vote down vote up
def method_KNeighborsClassifier(self):
#        pipeline = Pipeline([('clf', KNeighborsClassifier())])
#        parameters = {'clf__n_neighbors': (5, 10, 3, 50)}
#        grid_search = GridSearchCV(pipeline, 
#                                   parameters, 
#                                   verbose=1,
#                                   scoring='accuracy')
#        grid_search.fit(self.train_set[0], self.train_set[1])
#        print('Best score: %0.3f' % grid_search.best_score_)
#        print('Best parameters; ')
#        best_parameters = grid_search.best_estimator_.get_params()
#        for param_name in sorted(best_parameters.keys()):
#            print('\t%s: %r' % (param_name, best_parameters[param_name]))
        self.my_KNN.fit(self.train_set[0], self.train_set[1])
        self.my_KNN_pred = self.my_KNN.predict(self.valid_set[0])
        self.my_KNN_acc = accuracy_score(self.my_KNN_pred, self.valid_set[1])
        print('KNeighborsClassifier accuracy is: ' + str(self.my_KNN_acc))
        if self.multi_classification:
            print('KNeighborsClassifier penalized accuracy is: ' + self.penalized_accuracy(self.my_KNN_pred,self.valid_set[1])) 
Example 7
Project: DAML   Author: anonymous1computervision   File: evaluation.py    MIT License 6 votes vote down vote up
def evaluate_cluster(feats,labels,n_clusters):

    kmeans = cluster.KMeans(n_clusters=n_clusters, random_state=0).fit(feats)
    centers = kmeans.cluster_centers_

    ### k-nearest neighbors
    neigh = neighbors.KNeighborsClassifier(n_neighbors=1)
    neigh.fit(centers,range(len(centers)))

    idx_in_centers = neigh.predict(feats)
    num = len(feats)
    d = np.zeros(num)
    for i in range(num):
        d[i] = np.linalg.norm(feats[i,:] - centers[idx_in_centers[i],:])  

    labels_pred = np.zeros(num)
    for i in range(n_clusters):
        index = np.where(idx_in_centers == i)[0];
        ind = np.argmin(d[index]);
        cid = index[ind];
        labels_pred[index] = cid;


    nmi,f1 =  compute_clutering_metric(labels, labels_pred)
    return nmi,f1 
Example 8
Project: stock-price-prediction   Author: chinuy   File: classifier.py    MIT License 6 votes vote down vote up
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown']) 
Example 9
Project: Generative-ConvACs   Author: HUJI-Deep   File: knn_missing_data.py    MIT License 6 votes vote down vote up
def knn_masked_data(trX,trY,missing_data_dir, input_shape, k):
    
    raw_im_data = np.loadtxt(join(script_dir,missing_data_dir,'index.txt'),delimiter=' ',dtype=str)
    raw_mask_data = np.loadtxt(join(script_dir,missing_data_dir,'index_mask.txt'),delimiter=' ',dtype=str)
    # Using 'brute' method since we only want to do one query per classifier
    # so this will be quicker as it avoids overhead of creating a search tree
    knn_m = KNeighborsClassifier(algorithm='brute',n_neighbors=k)
    prob_Y_hat = np.zeros((raw_im_data.shape[0],int(np.max(trY)+1)))
    total_images = raw_im_data.shape[0]
    pbar = progressbar.ProgressBar(widgets=[progressbar.FormatLabel('\rProcessed %(value)d of %(max)d Images '), progressbar.Bar()], maxval=total_images, term_width=50).start()
    for i in range(total_images):
        mask_im=load_image(join(script_dir,missing_data_dir,raw_mask_data[i][0]), input_shape,1).reshape(np.prod(input_shape))
        mask = np.logical_not(mask_im > eps) # since mask is 1 at missing locations
        v_im=load_image(join(script_dir,missing_data_dir,raw_im_data[i][0]), input_shape, 255).reshape(np.prod(input_shape))
        rep_mask = np.tile(mask,(trX.shape[0],1))
        # Corrupt whole training set according to the current mask
        corr_trX = np.multiply(trX, rep_mask)        
        knn_m.fit(corr_trX, trY)
        prob_Y_hat[i,:] = knn_m.predict_proba(v_im.reshape(1,-1))
        pbar.update(i)
    pbar.finish()
    return prob_Y_hat 
Example 10
Project: NJU-Machine-Learning   Author: bushiwoshifeng   File: sklearn_KNN.py    Mozilla Public License 2.0 6 votes vote down vote up
def classification(train_feature, train_label, test_feature):
    '''
    对test_feature进行红酒分类
    :param train_feature: 训练集数据,类型为ndarray
    :param train_label: 训练集标签,类型为ndarray
    :param test_feature: 测试集数据,类型为ndarray
    :return: 测试集数据的分类结果
    '''
    scaler = StandardScaler()
    train_feature = scaler.fit_transform(train_feature)
    test_feature = scaler.fit_transform(test_feature)
    clf = KNeighborsClassifier(algorithm='auto',
                               leaf_size=30,
                               metric='minkowski',
                               metric_params=None,
                               n_jobs=1,
                               n_neighbors=1,
                               p=2,
                               weights='uniform')
    clf.fit(train_feature, train_label)
    predict = clf.predict(test_feature)
    return predict 
Example 11
Project: ml-parameter-optimization   Author: arnaudvl   File: sklearn_tune.py    MIT License 6 votes vote down vote up
def tune_params(self):
        """
        tune specified (and default) parameters
        """
        self._start_time = time.time()
        self.default_params() # set default parameters
        self.score_init() # set initial score
        if self.dim_reduction is not None:
            knn = Pipeline([('dimred',self.dim_reduction_method())
                            ('knn',KNeighborsClassifier(**self._params))])
            self._pipeline = True
        else:
            knn = KNeighborsClassifier(**self._params)
        self.apply_gridsearch(knn)
        self.print_progress(self._start_time)
        return self 
Example 12
Project: Localization_via_WiFi_Fingerprinting   Author: ryanmclark   File: models.py    MIT License 6 votes vote down vote up
def load_KNN():
    '''
    Loads K-Nearest Neighbor and gives a name for the output files.
    
    Parameters : None
    
    Returns    : model_name : (str) Name of the model for output file.
                       clf  : (Classifier) Building and Floor Classifier
                       regr : (REgressor) Longitude and Latitude Regressor
    '''
    model_name = "K-Nearest Neighbors"
    clf = KNeighborsClassifier(n_neighbors=1, algorithm='kd_tree',
                                leaf_size=50, p=1)
    regr = KNeighborsRegressor(n_neighbors=1, algorithm='kd_tree',
                                leaf_size=50, p=1)
    
    return model_name, clf, regr 
Example 13
Project: transferlearning   Author: jindongwang   File: digit_deep_feature.py    MIT License 5 votes vote down vote up
def classify_1nn():
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler
    data = {'src': np.loadtxt(args.source + '_' + args.source + '.csv', delimiter=','),
            'tar': np.loadtxt(args.source + '_' + args.target + '.csv', delimiter=','),
            }
    Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, -1], data['tar'][:, :-1], data['tar'][:, -1]
    Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs)
    Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt)
    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(Xs, Ys)
    ypred = clf.predict(Xt)
    acc = accuracy_score(y_true=Yt, y_pred=ypred)
    print('{} - {}: acc: {:.4f}'.format(args.source, args.target, acc)) 
Example 14
Project: transferlearning   Author: jindongwang   File: TCA.py    MIT License 5 votes vote down vote up
def fit_predict(self, Xs, Ys, Xt, Yt):
        '''
        Transform Xs and Xt, then make predictions on target using 1NN
        :param Xs: ns * n_feature, source feature
        :param Ys: ns * 1, source label
        :param Xt: nt * n_feature, target feature
        :param Yt: nt * 1, target label
        :return: Accuracy and predicted_labels on the target domain
        '''
        Xs_new, Xt_new = self.fit(Xs, Xt)
        clf = KNeighborsClassifier(n_neighbors=1)
        clf.fit(Xs_new, Ys.ravel())
        y_pred = clf.predict(Xt_new)
        acc = sklearn.metrics.accuracy_score(Yt, y_pred)
        return acc, y_pred 
Example 15
Project: stockMarketPrediction   Author: hmn21   File: machineLearning.py    MIT License 5 votes vote down vote up
def KNN(X_train, y_train, X_test, y_test):
	clf = neighbors.KNeighborsClassifier()
	clf.fit(X_train, y_train)
	accuracy = clf.score(X_test, y_test)
	return accuracy 
Example 16
Project: Sentiment-analysis   Author: renjunxiang   File: sklearn_supervised.py    MIT License 5 votes vote down vote up
def sklearn_supervised(data=None,
                       label=None,
                       model_savepath=DIR + '/sentence_transform/classify.model',
                       model_name='SVM',
                       **sklearn_param):
    '''
    :param data: 训练文本
    :param label: 训练文本的标签
    :param model_savepath: 模型保存路径
    :param model_name: 机器学习分类模型,SVM,KNN,Logistic
    :param return: 训练好的模型
    '''

    if model_name == 'KNN':
        # 调用KNN,近邻=5
        model = KNeighborsClassifier(**sklearn_param)
    elif model_name == 'SVM':
        # 核函数为linear,惩罚系数为1.0
        model = SVC(**sklearn_param)
        model.fit(data, label)
    elif model_name == 'Logistic':
        model = LogisticRegression(**sklearn_param)  # 核函数为线性,惩罚系数为1
        model.fit(data, label)

    if model_savepath != None:
        joblib.dump(model, model_savepath)  # 保存模型


    return model 
Example 17
Project: MCS_DTW   Author: CGuichardMasterDL   File: kppv.py    MIT License 5 votes vote down vote up
def pretraitement_acp(base):
    """
        Construire la base de l'espace vectoriel correspondant aux vecteurs propres
        associés aux trois plus grandes valeurs propres de la matrice de covariance
        de la base d'apprentissage

        NB:

        PCA(n_components=3) indique qu'on souhaite réduire les données en 3 dimmensions

        acp.components_ contient alors la matrice 3*12 correspondant aux n_components
        vecteurs propres

        ---> (12,1)*(3,12) ---> (1,3) un point de l'espace
    """
    learning_base = []
    for sound in base:
        learning_base.append(mean_mfcc(sound))

    scaler = StandardScaler()
    learning_base = scaler.fit_transform(learning_base)
    acp = PCA(n_components=3)
    acp.fit_transform(learning_base)

    for sound in base:
        update_composantes_principales(
            sound, np.transpose(acp.components_), scaler)

    kppv = KNeighborsClassifier(n_neighbors=1)
    data = [sound.get_composantes_principales() for sound in base]
    classes = range(len(base))
    kppv.fit(data, classes)

    return (kppv, scaler, np.transpose(acp.components_)) 
Example 18
Project: MCS_DTW   Author: CGuichardMasterDL   File: kppv.py    MIT License 5 votes vote down vote up
def pretraitement_acp_dual(base):
    """
        Préparer l'analyse kppv sur les ordres et sur les locuteurs
    """
    learning_base = []
    for sound in base:
        learning_base.append(mean_mfcc(sound))

    scaler = StandardScaler()
    learning_base = scaler.fit_transform(learning_base)
    acp = PCA(n_components=3)
    acp.fit_transform(learning_base)

    for sound in base:
        update_composantes_principales(
            sound, np.transpose(acp.components_), scaler)


    kppv_ordre = KNeighborsClassifier(n_neighbors=k_ordre)
    kppv_locuteur = KNeighborsClassifier(n_neighbors=k_locuteur)

    data = [sound.get_composantes_principales() for sound in base]
    classes_ordre = [sound.get_ordre() for sound in base]
    classes_locuteur = [sound.get_locuteur() for sound in base]

    kppv_ordre.fit(data, classes_ordre)
    kppv_locuteur.fit(data, classes_locuteur)
    return (kppv_ordre, kppv_locuteur, scaler, np.transpose(acp.components_)) 
Example 19
Project: Github-Classifier   Author: Ichaelus   File: knnreadmeonly.py    MIT License 5 votes vote down vote up
def __init__(self, text_corpus):
        ClassificationModule.__init__(self, "Readme Only KNN", "A K-Nearest Neighbor Classifier trained and used on Readme-Vec")
        # Create vectorizer and fit on all available Descriptions
        self.vectorizer = getTextVectorizer(10000) # Maximum of different columns
        corpus = []
        for description in text_corpus:
            corpus.append(process_text(description))
        self.vectorizer.fit(corpus)

        self.clf = KNeighborsClassifier(n_neighbors=10,  algorithm='auto') 
        print "\t-", self.name 
Example 20
Project: Jtyoui   Author: jtyoui   File: neighbor.py    MIT License 5 votes vote down vote up
def knn_class_fit(train, label):
    """训练数据模型"""
    binary = LabelBinarizer()  # 二值化
    y_ = binary.fit_transform(label)
    clf = KNeighborsClassifier()
    clf.fit(train, np.ravel(y_))
    return clf, binary 
Example 21
Project: smote_variants   Author: gykovacs   File: _smote_variants.py    MIT License 5 votes vote down vote up
def remove_noise(self, X, y):
        """
        Removes noise from dataset
        
        Args:
            X (np.matrix): features
            y (np.array): target labels
            
        Returns:
            np.matrix, np.array: dataset after noise removal
        """
        _logger.info(self.__class__.__name__ + ": " +"Running noise removal via %s" % self.__class__.__name__)
        self.class_label_statistics(X, y)
        
        # Initial result set consists of all minority samples and 1 majority sample
        X_maj= X[y == self.majority_label]
        X_hat= np.vstack([X[y == self.minority_label], X_maj[0]])
        y_hat= np.hstack([np.repeat(self.minority_label, len(X_hat)-1), [self.majority_label]])
        X_maj= X_maj[1:]
        
        # Adding misclassified majority elements repeatedly        
        while True:
            knn= KNeighborsClassifier(n_neighbors= 1, n_jobs= self.n_jobs)
            knn.fit(X_hat, y_hat)
            pred= knn.predict(X_maj)
            
            if np.all(pred == self.majority_label):
                break
            else:
                X_hat= np.vstack([X_hat, X_maj[pred != self.majority_label]])
                y_hat= np.hstack([y_hat, np.repeat(self.majority_label, len(X_hat) - len(y_hat))])
                X_maj= np.delete(X_maj, np.where(pred != self.majority_label)[0], axis= 0)
                if len(X_maj) == 0:
                    break
        
        return X_hat, y_hat 
Example 22
Project: GoldTrackML   Author: andri27-ts   File: detector_geometry.py    MIT License 5 votes vote down vote up
def get_layer_classifier(train_path):
    '''
    Return a classifier to predict the volume and layer id of a hit
    '''

    # Create the dataset
    X,y = acquire_vlm_dataset(50,10, train_path)
    # Create the model
    classifier = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
    classifier.fit(X, y)

    return classifier 
Example 23
Project: fake-news-detection   Author: aldengolab   File: model_loop.py    MIT License 5 votes vote down vote up
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             } 
Example 24
Project: Mussy-Robot   Author: arnomoonens   File: training.py    MIT License 5 votes vote down vote up
def training(data):

    
    svc_1 = SVC(kernel='linear')
    
    
    #we create the target vector of -1 for sad images, 0 for normal, 
    #and 1 for happy images, the data  is composed by 15 sad image after 15 happy image and after 15 normal image
    zero=[int(i) for i in numpy.zeros(15)]
    one=[int(i) for i in numpy.ones(15)]
    minus1=[int(i) for i in numpy.repeat(-1,15)]
    target=numpy.concatenate((minus1,one,zero,),axis=0)
   
    #we test if the classifier work correctly with CROSS-VALIDATION
    #5 fold cross validation
    from sklearn.cross_validation import train_test_split

    
    
    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.20, random_state=0)
    
    from sklearn import neighbors
    n_neighbors =3 
    for weights in ['uniform', 'distance']:
        # we create an instance of Neighbours Classifier and fit the data.
        KNeigh = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
        KNeigh.fit(X_train,y_train)
        print(KNeigh.predict(X_test))
        
    print(y_test)
    #evaluate_cross_validation(KNeigh, X_train, y_train, 10)
    #svc is better!!!
    svc_1.fit(X_train,y_train)
    evaluate_cross_validation(svc_1, X_train, y_train, 10)
    joblib.dump(svc_1,'svc_1.pkl') 
Example 25
Project: highdimensional-decision-boundary-plot   Author: tmadl   File: decisionboundaryplot.py    MIT License 5 votes vote down vote up
def setclassifier(self, estimator=KNeighborsClassifier(n_neighbors=10)):
        """Assign classifier for which decision boundary should be plotted.

        Parameters
        ----------
        estimator : BaseEstimator instance, optional (default=KNeighborsClassifier(n_neighbors=10)).
            Classifier for which the decision boundary should be plotted. Must have
            probability estimates enabled (i.e. estimator.predict_proba must work).
            Make sure it is possible for probability estimates to get close to 0.5
            (more specifically, as close as specified by acceptance_threshold).
        """
        self.classifier = estimator 
Example 26
Project: MMA-Odds   Author: gilmanjo   File: mma_analyzer.py    MIT License 5 votes vote down vote up
def knn_analysis(X_train, y_train, X_test, y_test, verbose=False, K=5):
	# Uses K-Nearest Neighbors to analyze fight data
	print("Performing K-NN analysis (K = {})...".format(K))

	clf = neighbors.KNeighborsClassifier(K)
	clf.fit(X_train, y_train)

	# K-NN makes predictions!
	if not verbose:
		score = clf.score(X_test.astype("float64"), y_test.astype("float64"))
		print(score)
		return score

	# if verbose, print out probabilities for each prediction
	else:

		prediction_threshhold = 0.775
		thresh_pred = []
		estimates = clf.predict(X_test), clf.predict_proba(X_test)

		for x in range(len(X_test)):
			print("Predicted: {}\tProbabilities: {}, {} \tActual: {}".\
				format(estimates[0][x], estimates[1][x][0], estimates[1][x][1],
					y_test[x]))

			# prediction accuracy with given choice threshold
			prediction = estimates[0][x]
			if estimates[1][x][prediction] >= prediction_threshhold:

				if prediction == y_test[x]:
					thresh_pred.append(1)

				else:
					thresh_pred.append(0)

		print("\nPrediction Accuracy (with threshold = {}: {}; n = {})".format(
			prediction_threshhold, sum(thresh_pred)/len(thresh_pred),
			len(thresh_pred)))

		return estimates 
Example 27
Project: ml-helper-funcs   Author: numb3r33   File: custom_metrics_knn.py    MIT License 5 votes vote down vote up
def train_knn():
	X, y = create_examples()
	X_train, X_test, y_train, y_test = split_dataset(X, y)
	
	# pass in the custom metric in the metric parameter
	clf = KNeighborsClassifier(n_neighbors=5, metric=euclidean_distance)
	
	# train the classifier on the dataset
	clf.fit(X_train, y_train)
	print('Accuracy Score: %f'%(accuracy_score(y_test, clf.predict(X_test)))) 
Example 28
Project: Audio-classification-using-Bag-of-Frames-approach   Author: amogh3892   File: test.py    Apache License 2.0 5 votes vote down vote up
def knn_predict(training_samples, training_labels, test_samples, test_lables,k_neighbours = 5,weights = "uniform",algorithm = "auto"):
	from sklearn.neighbors import KNeighborsClassifier

	clf = KNeighborsClassifier(n_neighbors = k_neighbours, weights =weights, algorithm = algorithm)

	t0 = time()
	clf.fit(training_samples,training_labels)
	training_time = round(time()-t0, 3)

	t0 = time()
	pred = clf.predict(test_samples)
	test_time = round(time()-t0, 3)

	from sklearn.metrics import accuracy_score

	acc = accuracy_score(pred,test_lables)

	no_features = np.array(training_samples).shape[1]
	training_samples = np.array(training_samples).shape[0]
	test_samples = np.array(test_samples).shape[0]

	with open("Temp\\results.txt","w") as outfile:
		outfile.write("Alogirthm : {}\n".format("KNN"))
		outfile.write("K  = {}\n".format(k_neighbours))
		outfile.write("weight = {}\n".format(weights))
		outfile.write("algorithm = {}\n".format(algorithm))
		outfile.write("No of features : {}\n".format(no_features))
		outfile.write("No of training samples : {}\n".format(training_samples))
		outfile.write("No of test samples : {}\n".format(test_samples))
		outfile.write("Training time : {}\n".format(training_time))
		outfile.write("Test time : {}\n".format(test_time))
		outfile.write("Accuracy : {}\n".format(acc))

	with open("Temp\\result_labels.csv","wb") as outfile:
		np.savetxt(outfile,pred) 
Example 29
Project: lightwood   Author: mindsdb   File: feature.py    MIT License 5 votes vote down vote up
def get_model_class(self, classifier_class, regression_class):
        """

        :param classifier_class: 
        :param regression_class: 
        :return:  model which will be used to fit the data
        """
        return classifier_class(KNeighborsClassifier(3), n_jobs=-1) 
Example 30
Project: healthcareai-py   Author: HealthCatalyst   File: advanced_supvervised_model_trainer.py    MIT License 5 votes vote down vote up
def knn(self,
            scoring_metric='roc_auc',
            hyperparameter_grid=None,
            randomized_search=True,
            number_iteration_samples=10):
        """
        A light wrapper for Sklearn's knn classifier that performs randomized 
        search over an overridable default
        hyperparameter grid.
        
        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for classification
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)

            number_iteration_samples (int): Number of models to train during the 
            randomized search for exploring the
            hyperparameter space. More may lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel: 
        """
        self.validate_classification('KNN')
        if hyperparameter_grid is None:
            neighbors = list(range(5, 26))
            hyperparameter_grid = {'n_neighbors': neighbors, 'weights': ['uniform', 'distance']}
            number_iteration_samples = 10

            print('KNN Grid: {}'.format(hyperparameter_grid))
        algorithm = get_algorithm(KNeighborsClassifier,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model 
Example 31
Project: libact   Author: ntucllab   File: test_sklearn_adapter.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_adapt_knn(self):
        adapter = SklearnAdapter(KNeighborsClassifier())
        clf = KNeighborsClassifier()
        self.check_functions(adapter, clf) 
Example 32
Project: pennai   Author: EpistasisLab   File: learn_tests.py    GNU General Public License v3.0 5 votes vote down vote up
def test_compute_imp_score_4():
    """Test compute_imp_score function returns 'Permutation Feature Importance' with KNeighborsClassifier on multiclass dataset."""
    model = KNeighborsClassifier()
    model.fit(training_features_1, training_classes_1)
    coefs, imp_score_type = compute_imp_score(model,
                                        'accuracy',
                                        training_features_1,
                                        training_classes_1,
                                        42)
    assert imp_score_type == "Permutation Feature Importance" 
Example 33
Project: cactus-maml   Author: kylehkhsu   File: baselines.py    MIT License 5 votes vote down vote up
def embedding_nearest_neighbour(n_neighbors=FLAGS.n_neighbours, num_classes=FLAGS.way, num_shots=FLAGS.shot, num_tasks=FLAGS.num_tasks,
                                num_encoding_dims=FLAGS.num_encoding_dims, test_set=FLAGS.test_set,
                                dataset=FLAGS.dataset):
    print('{}-way {}-shot embedding nearest neighbour'.format(num_classes, num_shots))
    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes, num_train_samples_per_class=num_shots, num_samples_per_class=num_shots+5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions)

    accuracies = []

    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, accuracy {:.5}'.format(i_task + 1, np.mean(accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]

        knn = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1)
        knn.fit(Z_train_few, Y_train_few)
        accuracy = knn.score(Z_test_few, Y_test_few)

        accuracies.append(accuracy)

    print('{}-way {}-shot embedding nearest neighbour: {:.5} with 95% CI {:.5} over {} tests'.format(num_classes, num_shots, np.mean(accuracies), 1.96*np.std(accuracies)/np.sqrt(num_tasks), num_tasks)) 
Example 34
Project: MNIST-baselines   Author: cxy1997   File: KNN.py    MIT License 5 votes vote down vote up
def KNN():
    loader = MnistLoader(flatten=True, data_path='../data', var_per=None)
    model = KNeighborsClassifier(n_neighbors=5)

    model.fit(loader.data_train, loader.label_train)
    print('model trained')
    res = model.score(loader.data_test, loader.label_test)
    print(res)

    return res 
Example 35
Project: dockerizeme   Author: dockerizeme   File: snippet.py    Apache License 2.0 5 votes vote down vote up
def get_accuracy(data):
    accuracy = []
    print "Plotting K..."
    is_missing = np.random.uniform(0, 1, len(data)) > 0.8
    train = data[is_missing == False]
    test = data[is_missing == True]
    for n in range(1, 20, 1):
        clf = KNeighborsClassifier(n_neighbors=n)
        clf.fit(train[['x', 'y']], train['color'])
        preds = clf.predict(test[['x', 'y']])
        k_accuracy = np.where(preds==test['color'], 1, 0).sum() / float(len(test))
        print "Neighbors: %d, Accuracy: %3f" % (n, k_accuracy)

        accuracy.append([n, k_accuracy])
    return accuracy 
Example 36
Project: dockerizeme   Author: dockerizeme   File: snippet.py    Apache License 2.0 5 votes vote down vote up
def run(image_name):
    im = load_image(image_name)
    filename, file_extension = os.path.splitext(image_name)
    data, all_cols = convert_to_dataframe(im)
    df = pd.DataFrame(data, columns=['x', 'y', 'color'])


    # generate missing data (in our case this is the same as our trainig/test sets)
    is_missing = np.random.uniform(0, 1, len(df)) > 0.7
    print "Total number of pixels:", len(df), "Pixels available:", np.where(is_missing==False)[0].size, "Pixels missing:", np.where(is_missing==True)[0].size
    train = df[is_missing==False]
    test = df[is_missing==True]
    save_to_file('{}_missing{}'.format(filename, file_extension), [train], (im.width, im.height))

    # Uncomment the following line to run the k-finding function
    # Caution: This will likely take a long time!
    # plot_k(train)

    clf = KNeighborsClassifier(n_neighbors=3)
    clf.fit(train[['x', 'y']], train['color'])

    save_to_file('{}_test{}'.format(filename, file_extension), [test], (im.width, im.height))
    preds = clf.predict(test[['x', 'y']])
    test.color = preds

    save_to_file('{}_predicted{}'.format(filename, file_extension), [test], (im.width, im.height))
    save_to_file('{}_combined{}'.format(filename, file_extension), [train, test], (im.width, im.height)) 
Example 37
Project: text_classification   Author: lpq29743   File: knn.py    MIT License 5 votes vote down vote up
def run(self):
        X_train, X_test, y_train, y_test = data_utils.load_train_test_data(self.data_fname)
        train_features, test_features = self.vectorizer.feature_extraction(X_train, X_test)
        neigh = KNeighborsClassifier(n_neighbors=self.n_neighbors)
        neigh.fit(train_features, y_train)
        print(neigh.score(test_features, y_test)) 
Example 38
Project: DeepBugHunter   Author: sed-inf-u-szeged   File: knn.py    Apache License 2.0 5 votes vote down vote up
def learn(train, dev, test, args, sargs_str):
    sargs = util.parse(parser, sargs_str.split())
    return util.sklearn_wrapper(train, dev, test, KNeighborsClassifier(**sargs)) 
Example 39
Project: stock-price-prediction   Author: chinuy   File: classifier.py    MIT License 5 votes vote down vote up
def performKNNClass(X_train, y_train, X_test, y_test, parameters, savemodel):
    """
    KNN binary Classification
    """
    clf = neighbors.KNeighborsClassifier(parameters[0])
    clf.fit(X_train, y_train)

    accuracy = clf.score(X_test, y_test)

    return accuracy 
Example 40
Project: pybalu   Author: mbucchi   File: sfs.py    GNU General Public License v3.0 5 votes vote down vote up
def performance_for_features(feat_idxs):
    # train classifier
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(f_train_norm[:, feat_idxs], c_train)

    # predict and evaluate performance
    prediction = knn.predict(f_test_norm[:, feat_idxs])
    return performance(prediction, c_test) 
Example 41
Project: ASR-for-chinese-number   Author: edmundwsy   File: main.py    MIT License 5 votes vote down vote up
def train_a_classifier(self, data, label, num=1000):
        '''
        Should add other classifiers
        right now SVM only .
        :param data:
        :param label:
        :return:
        '''
        if self.method == 'lsvm':
            clf = sklearn.svm.LinearSVC()
        elif self.method == 'ksvm':
            clf = sklearn.svm.SVC(kernel='sigmoid', gamma='scale', max_iter=100000)
            # kernel: ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’
            # gamma: 'auto' 'scale'
        elif self.method == 'dctree':
            clf = sklearn.tree.DecisionTreeClassifier()
        elif self.method == 'sgd':
            clf = sklearn.linear_model.SGDClassifier(
                loss="modified_huber", penalty="l2")
        elif self.method == 'bayes':
            clf = sklearn.naive_bayes.GaussianNB()
        elif self.method == 'ada_boost':
            clf = AdaBoostClassifier(n_estimators=100)
        elif self.method == 'knn':
            clf = KNeighborsClassifier()
        else:
            clf = []
            assert 0, "ERROR"

        for i in range(num):
            clf.fit(data, label.squeeze())
        return clf 
Example 42
Project: abnormal-traffic   Author: chenxu93   File: tradition_2.py    Apache License 2.0 5 votes vote down vote up
def knn_classifier(feature,label):
	 from sklearn.neighbors import KNeighborsClassifier
	 model = KNeighborsClassifier()
	 model.fit(feature,label)
	 return model 
Example 43
Project: abnormal-traffic   Author: chenxu93   File: tradition_11.py    Apache License 2.0 5 votes vote down vote up
def knn_classifier(feature,label):
	 from sklearn.neighbors import KNeighborsClassifier
	 model = KNeighborsClassifier()
	 model.fit(feature,label)
	 return model 
Example 44
Project: BuildingMachineLearning   Author: ademyanchuk   File: figure4_5_sklearn.py    MIT License 5 votes vote down vote up
def plot_decision(features, labels, num_neighbors=1):
    '''Plots decision boundary for KNN

    Parameters
    ----------
    features : ndarray
    labels : sequence

    Returns
    -------
    fig : Matplotlib Figure
    ax  : Matplotlib Axes
    '''
    y0, y1 = features[:, 2].min() * .9, features[:, 2].max() * 1.1
    x0, x1 = features[:, 0].min() * .9, features[:, 0].max() * 1.1
    X = np.linspace(x0, x1, 1000)
    Y = np.linspace(y0, y1, 1000)
    X, Y = np.meshgrid(X, Y)

    model = KNeighborsClassifier(num_neighbors)
    model.fit(features[:, (0,2)], labels)
    C = model.predict(np.vstack([X.ravel(), Y.ravel()]).T).reshape(X.shape)
    if COLOUR_FIGURE:
        cmap = ListedColormap([(1., .7, .7), (.7, 1., .7), (.7, .7, 1.)])
    else:
        cmap = ListedColormap([(1., 1., 1.), (.2, .2, .2), (.6, .6, .6)])
    fig,ax = plt.subplots()
    ax.set_xlim(x0, x1)
    ax.set_ylim(y0, y1)
    ax.set_xlabel(feature_names[0])
    ax.set_ylabel(feature_names[2])
    ax.pcolormesh(X, Y, C, cmap=cmap)
    if COLOUR_FIGURE:
        cmap = ListedColormap([(1., .0, .0), (.1, .6, .1), (.0, .0, 1.)])
        ax.scatter(features[:, 0], features[:, 2], c=labels, cmap=cmap)
    else:
        for lab, ma in zip(range(3), "Do^"):
            ax.plot(features[labels == lab, 0], features[
                     labels == lab, 2], ma, c=(1., 1., 1.), ms=6)
    return fig,ax 
Example 45
Project: motion-classification   Author: matthiasplappert   File: decision.py    MIT License 5 votes vote down vote up
def _init_model(self):
        return KNeighborsClassifier() 
Example 46
Project: CS259D_Notes_HW   Author: LeoTsui   File: touch.py    MIT License 5 votes vote down vote up
def f1_knn(data, label_np):
    k_f1 = []
    for k in range(1, 8):
        f1_list = []
        for u in np.sort(list(set(label_np))):
            label = [int(l == u) for l in label_np]
            neigh = KNeighborsClassifier(k)
            f1 = np.mean(cross_val_score(neigh, data, label, cv=10, scoring='f1'))
            f1_list.append(f1)
        k_f1.append([k, np.mean(f1_list)])
        print(k, np.mean(f1_list))

    best_idx = np.argmax(k_f1, axis=0)[1]
    k_best, f1_best = k_f1[best_idx][0], k_f1[best_idx][1]
    return k_best, f1_best 
Example 47
Project: Phishing-Website-Detection   Author: fatihhcelik   File: main.py    MIT License 5 votes vote down vote up
def knn():
    knn = KNeighborsClassifier(n_neighbors=5)  
    knn.fit(train_inputs,train_outputs.values.ravel())
    predict=knn.predict(test_inputs)
    accuracy=100.0*accuracy_score(test_outputs,predict)
    return accuracy
    #91.91820837390458 
Example 48
Project: practicalDataAnalysisCookbook   Author: drabastomek   File: reduce_kNN.py    GNU General Public License v2.0 5 votes vote down vote up
def fit_kNN_classifier(data):
    '''
        Build the kNN classifier
    '''
    # create the classifier object
    knn = nb.KNeighborsClassifier()

    # fit the data
    knn.fit(data[0],data[1])

    #return the classifier
    return knn 
Example 49
Project: lucifers-screwdriver   Author: dwysocki   File: ml.py    MIT License 5 votes vote down vote up
def train(object_params, PHAs, **kwargs):
    X = features(object_params)

    clf = KNeighborsClassifier(**kwargs)
    clf.fit(X, PHAs)

    return clf 
Example 50
Project: Building-Machine-Learning-Systems-With-Python-Second-Edition   Author: PacktPublishing   File: figure4_5_sklearn.py    MIT License 5 votes vote down vote up
def plot_decision(features, labels, num_neighbors=1):
    '''Plots decision boundary for KNN

    Parameters
    ----------
    features : ndarray
    labels : sequence

    Returns
    -------
    fig : Matplotlib Figure
    ax  : Matplotlib Axes
    '''
    y0, y1 = features[:, 2].min() * .9, features[:, 2].max() * 1.1
    x0, x1 = features[:, 0].min() * .9, features[:, 0].max() * 1.1
    X = np.linspace(x0, x1, 1000)
    Y = np.linspace(y0, y1, 1000)
    X, Y = np.meshgrid(X, Y)

    model = KNeighborsClassifier(num_neighbors)
    model.fit(features[:, (0,2)], labels)
    C = model.predict(np.vstack([X.ravel(), Y.ravel()]).T).reshape(X.shape)
    if COLOUR_FIGURE:
        cmap = ListedColormap([(1., .7, .7), (.7, 1., .7), (.7, .7, 1.)])
    else:
        cmap = ListedColormap([(1., 1., 1.), (.2, .2, .2), (.6, .6, .6)])
    fig,ax = plt.subplots()
    ax.set_xlim(x0, x1)
    ax.set_ylim(y0, y1)
    ax.set_xlabel(feature_names[0])
    ax.set_ylabel(feature_names[2])
    ax.pcolormesh(X, Y, C, cmap=cmap)
    if COLOUR_FIGURE:
        cmap = ListedColormap([(1., .0, .0), (.1, .6, .1), (.0, .0, 1.)])
        ax.scatter(features[:, 0], features[:, 2], c=labels, cmap=cmap)
    else:
        for lab, ma in zip(range(3), "Do^"):
            ax.plot(features[labels == lab, 0], features[
                     labels == lab, 2], ma, c=(1., 1., 1.), ms=6)
    return fig,ax 
Example 51
Project: dvb.datascience   Author: devolksbank   File: score_test.py    MIT License 5 votes vote down vote up
def test_binaryclass_kneighbors(self):
        p = self.get_pipeline(
            "breast_cancer",
            ds.predictor.SklearnClassifier(clf=KNeighborsClassifier, n_neighbors=3),
        )

        params = {"split": {"split": ds.transform.RandomTrainTestSplit.TRAIN}}

        p.fit_transform(transform_params=params, fit_params=params)
        assert round(abs(p.get_pipe("score").accuracy()-0.947), 3) == 0
        numpy.testing.assert_array_equal(
            p.get_pipe("score").confusion_matrix(), np.array([[133, 16], [5, 244]])
        )
        assert round(abs(p.get_pipe("score").auc()-0.992), 3) == 0
        assert round(abs(p.get_pipe("score").mcc()-0.887), 3) == 0
        assert round(abs(p.get_pipe("score").log_loss()-1.822), 3) == 0
        assert round(abs(p.get_pipe("score").classification_report().iloc[1]["f1-score"]-0.959), 3) == 0
        assert round(abs(p.get_pipe("score").classification_report().iloc[1]["precision"]-0.938), 3) == 0
        assert round(abs(p.get_pipe("score").classification_report().iloc[1]["recall"]-0.980), 3) == 0
        assert round(abs(p.get_pipe("score").classification_report().iloc[1]["support"]-249.0), 3) == 0

        params = {"split": {"split": ds.transform.RandomTrainTestSplit.TEST}}
        p.transform(transform_params=params)
        assert round(abs(p.get_pipe("score").accuracy()-0.942), 3) == 0
        numpy.testing.assert_array_equal(
            p.get_pipe("score").confusion_matrix(), np.array([[57, 6], [4, 104]])
        )
        assert round(abs(p.get_pipe("score").auc()-0.991), 3) == 0
        assert round(abs(p.get_pipe("score").mcc()-0.874), 3) == 0
        assert round(abs(p.get_pipe("score").log_loss()-2.020), 3) == 0
        assert round(abs(p.get_pipe("score").classification_report().iloc[1]["f1-score"]-0.954), 3) == 0
        assert round(abs(p.get_pipe("score").classification_report().iloc[1]["precision"]-0.945), 3) == 0
        assert round(abs(p.get_pipe("score").classification_report().iloc[1]["recall"]-0.963), 3) == 0
        assert round(abs(p.get_pipe("score").classification_report().iloc[1]["support"]-108.0), 3) == 0 
Example 52
Project: dvb.datascience   Author: devolksbank   File: predictor_test.py    MIT License 5 votes vote down vote up
def get_pipeline(self):
        p = ds.Pipeline()
        p.addPipe("read", ds.data.DataPipe())
        p.addPipe("metadata", ds.data.DataPipe(data={"y_true_label": "y"}))
        p.addPipe(
            "clf",
            ds.predictor.SklearnClassifier(clf=KNeighborsClassifier, n_neighbors=3),
            [("read", "data", "df"), ("metadata", "data", "df_metadata")],
        )

        return p 
Example 53
Project: dvb.datascience   Author: devolksbank   File: init_test_script.py    MIT License 5 votes vote down vote up
def run():
    p = ds.Pipeline()
    p.addPipe("read", ds.data.SampleData(dataset_name="breast_cancer"))
    p.addPipe(
        "split",
        ds.transform.RandomTrainTestSplit(test_size=0.3, random_state=42),
        [("read", "df", "df")],
    )
    p.addPipe(
        "clf",
        ds.predictor.SklearnClassifier(clf=KNeighborsClassifier, n_neighbors=3),
        [("split", "df", "df"), ("read", "df_metadata", "df_metadata")],
    )
    p.addPipe(
        "score",
        ds.score.ClassificationScore(["accuracy", "confusion_matrix"]),
        [
            ("clf", "predict", "predict"),
            ("clf", "predict_metadata", "predict_metadata"),
        ],
    )

    params = {"split": {"train": True}}

    p.fit_transform(transform_params=params, fit_params=params)

    params = {"split": {"train": False}}
    p.transform(transform_params=params) 
Example 54
Project: nldrp   Author: etzinis   File: fuse_call_level_loso_opt_feats_nonlinear_first.py    MIT License 5 votes vote down vote up
def configure_models():
    # class ELMWrapper(object):
    #     def __init__(self, **kwargs):
    #         self.kernel = elm.ELMKernel()
    #     def predict(self, x):
    #         return self.kernel.test(x)
    #     def fit(self, x_tr, y_tr):
    #         self.le = LabelEncoder()
    #         self.le.fit(y_tr)
    #         int_labels = self.le.transform(y_tr)
    #         labels_col = np.asarray(int_labels)
    #         labels_col = np.reshape(labels_col, (-1,1))
    #         new_data = np.concatenate([labels_col, x_tr], axis=1)
    #
    #         new_data = elm.read('/home/thymios/Desktop/iris.data')
    #         print new_data.shape
    #
    #         self.kernel.search_param(new_data,
    #                                  of="accuracy",
    #                                  eval=10)
    #         # self.kernel.train(new_data)
    #         exit()

    models = []
    # models.append(('ELM', ELMWrapper()))
    models.append(('LR', LogisticRegression()))
    #models.append(('LDA', LinearDiscriminantAnalysis()))
    #models.append(('KNN', KNeighborsClassifier()))
    # models.append(('CART', DecisionTreeClassifier()))
    # models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC()))
    # models.append(('RF', RandomForestClassifier()))
    # models.append(('ADAb', AdaBoostClassifier()))
    # models.append(('GRADb', GradientBoostingClassifier()))
    # models.append(('QDA', QuadraticDiscriminantAnalysis()))
    # models.append(('LinR', LogisticRegression()))


    return dict(models) 
Example 55
Project: nldrp   Author: etzinis   File: call_level_loso.py    MIT License 5 votes vote down vote up
def configure_models():
    models = []
    models.append(('LR', LogisticRegression()))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    models.append(('KNN', KNeighborsClassifier()))
    models.append(('CART', DecisionTreeClassifier()))
    models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC()))
    models.append(('RF', RandomForestClassifier()))
    models.append(('ADAb', AdaBoostClassifier()))
    models.append(('GRADb', GradientBoostingClassifier()))
    models.append(('QDA', QuadraticDiscriminantAnalysis()))
    models.append(('LinR', LogisticRegression()))
    return dict(models) 
Example 56
Project: nldrp   Author: etzinis   File: fuse_call_level_loso_opt_feats.py    MIT License 5 votes vote down vote up
def configure_models():
    # class ELMWrapper(object):
    #     def __init__(self, **kwargs):
    #         self.kernel = elm.ELMKernel()
    #     def predict(self, x):
    #         return self.kernel.test(x)
    #     def fit(self, x_tr, y_tr):
    #         self.le = LabelEncoder()
    #         self.le.fit(y_tr)
    #         int_labels = self.le.transform(y_tr)
    #         labels_col = np.asarray(int_labels)
    #         labels_col = np.reshape(labels_col, (-1,1))
    #         new_data = np.concatenate([labels_col, x_tr], axis=1)
    #
    #         new_data = elm.read('/home/thymios/Desktop/iris.data')
    #         print new_data.shape
    #
    #         self.kernel.search_param(new_data,
    #                                  of="accuracy",
    #                                  eval=10)
    #         # self.kernel.train(new_data)
    #         exit()

    models = []
    # models.append(('ELM', ELMWrapper()))
    models.append(('LR', LogisticRegression()))
    #models.append(('LDA', LinearDiscriminantAnalysis()))
    #models.append(('KNN', KNeighborsClassifier()))
    # models.append(('CART', DecisionTreeClassifier()))
    # models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC()))
    # models.append(('RF', RandomForestClassifier()))
    # models.append(('ADAb', AdaBoostClassifier()))
    # models.append(('GRADb', GradientBoostingClassifier()))
    # models.append(('QDA', QuadraticDiscriminantAnalysis()))
    # models.append(('LinR', LogisticRegression()))


    return dict(models) 
Example 57
Project: nldrp   Author: etzinis   File: fuse_call_level_loso.py    MIT License 5 votes vote down vote up
def configure_models():
    # class ELMWrapper(object):
    #     def __init__(self, **kwargs):
    #         self.kernel = elm.ELMKernel()
    #     def predict(self, x):
    #         return self.kernel.test(x)
    #     def fit(self, x_tr, y_tr):
    #         self.le = LabelEncoder()
    #         self.le.fit(y_tr)
    #         int_labels = self.le.transform(y_tr)
    #         labels_col = np.asarray(int_labels)
    #         labels_col = np.reshape(labels_col, (-1,1))
    #         new_data = np.concatenate([labels_col, x_tr], axis=1)
    #
    #         new_data = elm.read('/home/thymios/Desktop/iris.data')
    #         print new_data.shape
    #
    #         self.kernel.search_param(new_data,
    #                                  of="accuracy",
    #                                  eval=10)
    #         # self.kernel.train(new_data)
    #         exit()

    models = []
    # models.append(('ELM', ELMWrapper()))
    models.append(('LR', LogisticRegression()))
    #models.append(('LDA', LinearDiscriminantAnalysis()))
    #models.append(('KNN', KNeighborsClassifier()))
    # models.append(('CART', DecisionTreeClassifier()))
    # models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC()))
    # models.append(('RF', RandomForestClassifier()))
    # models.append(('ADAb', AdaBoostClassifier()))
    # models.append(('GRADb', GradientBoostingClassifier()))
    # models.append(('QDA', QuadraticDiscriminantAnalysis()))
    # models.append(('LinR', LogisticRegression()))


    return dict(models) 
Example 58
Project: nldrp   Author: etzinis   File: fuse_call_level_loso_opt_feats_KL.py    MIT License 5 votes vote down vote up
def configure_models():
    # class ELMWrapper(object):
    #     def __init__(self, **kwargs):
    #         self.kernel = elm.ELMKernel()
    #     def predict(self, x):
    #         return self.kernel.test(x)
    #     def fit(self, x_tr, y_tr):
    #         self.le = LabelEncoder()
    #         self.le.fit(y_tr)
    #         int_labels = self.le.transform(y_tr)
    #         labels_col = np.asarray(int_labels)
    #         labels_col = np.reshape(labels_col, (-1,1))
    #         new_data = np.concatenate([labels_col, x_tr], axis=1)
    #
    #         new_data = elm.read('/home/thymios/Desktop/iris.data')
    #         print new_data.shape
    #
    #         self.kernel.search_param(new_data,
    #                                  of="accuracy",
    #                                  eval=10)
    #         # self.kernel.train(new_data)
    #         exit()

    models = []
    # models.append(('ELM', ELMWrapper()))
    models.append(('LR', LogisticRegression()))
    #models.append(('LDA', LinearDiscriminantAnalysis()))
    #models.append(('KNN', KNeighborsClassifier()))
    # models.append(('CART', DecisionTreeClassifier()))
    # models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC()))
    # models.append(('RF', RandomForestClassifier()))
    # models.append(('ADAb', AdaBoostClassifier()))
    # models.append(('GRADb', GradientBoostingClassifier()))
    # models.append(('QDA', QuadraticDiscriminantAnalysis()))
    # models.append(('LinR', LogisticRegression()))


    return dict(models) 
Example 59
Project: mixture_density_VAEs   Author: enalisnick   File: knn_mnist.py    MIT License 5 votes vote down vote up
def get_knn_predictions(train_data, test_data, model, param_file_path, vae_hyperParams, nSamples=50):
    import scipy as scp
    import scipy.special
    import sklearn as skl
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.linear_model import LogisticRegression


    train_embeddings = get_embedding_samples(train_data, model, param_file_path, nSamples=1)


    # then fit knn to samples
    print "fitting classifier..."
    lr = KNeighborsClassifier(n_neighbors = 3) #LogisticRegression(multi_class='multinomial', solver='newton-cg')
    lr.fit(train_embeddings, train_set_y.ravel())

    # then get samples for test
    test_embeddings = get_embedding_samples(test_data, model, param_file_path, nSamples=1)

    # then classify test based on knn classifier
    y_hats = lr.predict(samples)




    return mLL.mean() 
Example 60
Project: ISM2017   Author: ybayle   File: identify_singing_voice_gender.py    MIT License 5 votes vote down vote up
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions 
Example 61
Project: nn-toolbox   Author: nhatsmrt   File: classifier.py    Apache License 2.0 5 votes vote down vote up
def __init__(
            self, database: DataLoader, model: Module, n_neighbors: int=5,
            tta_transform=None, tta_beta: float=0.4, weights: Union[str, Callable]='distance',
            device=get_device(), threshold=0.0
    ):
        self._knn = KNeighborsClassifier(
            n_neighbors=n_neighbors, weights=weights
        )
        self._model = model.to(device)
        self._model.eval()

        embeddings = []
        labels = []

        for image, label in database:
            embeddings.append(self._model(image.to(device)).cpu().detach().numpy())
            labels.append(label.cpu().detach().numpy())

        embeddings = np.concatenate(embeddings, axis=0)

        labels = np.concatenate(labels, axis=0)

        self._knn.fit(embeddings, labels.ravel())
        self._labels_sort = np.unique(labels.ravel())
        self._n_class = np.max(labels) + 1
        self._tta_transform = tta_transform
        self._tta_beta = tta_beta
        self._device = device
        self._threshold = threshold 
Example 62
Project: driverlessai-recipes   Author: h2oai   File: knearestneighbour.py    Apache License 2.0 5 votes vote down vote up
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs):
        X = dt.Frame(X)

        orig_cols = list(X.names)
        feature_model = Ridge(alpha=1., random_state=self.random_state)

        self.params['n_neighbors'] = min(self.params['n_neighbors'], X.shape[0])

        if self.num_classes >= 2:

            model = KNeighborsClassifier(n_neighbors=self.params['n_neighbors'], metric=self.params['metric'],
                                         weights=self.params['weights'], n_jobs=self.params['n_jobs'])
            lb = LabelEncoder()
            lb.fit(self.labels)
            y = lb.transform(y)
        else:
            model = KNeighborsRegressor(n_neighbors=self.params['n_neighbors'], metric=self.params['metric'],
                                        weights=self.params['weights'], n_jobs=self.params['n_jobs'])
        self.means = dict()
        self.standard_scaler = StandardScaler()
        for col in X.names:
            XX = X[:, col]
            self.means[col] = XX.mean1()
            if self.means[col] is None:
                self.means[col] = 0
            XX.replace(None, self.means[col])
            X[:, col] = XX
            assert X[dt.isna(dt.f[col]), col].nrows == 0
        X = X.to_numpy()
        X = self.standard_scaler.fit_transform(X)
        feature_model.fit(X, y)
        model.fit(X, y)
        importances = np.array(abs(feature_model.coef_))

        self.set_model_properties(model=model,
                                  features=orig_cols,
                                  importances=importances.tolist(),  # abs(model.coef_[0])
                                  iterations=0) 
Example 63
Project: website-fingerprinting   Author: wisepythagoras   File: utils.py    MIT License 5 votes vote down vote up
def train(streams, labels):
    """ This function trains the classifier with the data. """

    # Shuffle the arrays.
    streams, labels = shuffle(streams, labels)

    stream_amount = len(streams)
    training_size = int(stream_amount * 0.9)

    # Get 70% of the streams for training purposes.
    training_x = streams[:training_size]
    training_y = labels[:training_size]

    # Get 30% of the streams for testing purposes
    testing_x = streams[training_size:]
    testing_y = labels[training_size:]

    print("Training size: {}".format(training_size))
    print("Testing size:  {}".format(stream_amount - training_size))

    # Initialize the classifier.
    clf = KNeighborsClassifier()

    # Now lets train our KNN classifier.
    clf = clf.fit(training_x, training_y)

    # Save a snapshot of this classifier.
    joblib.dump(clf, "./classifier-nb.dmp", compress=9)

    # Get the prediction.
    predictions = clf.predict(testing_x)

    print("Accuracy: %s%%" % (accuracy_score(testing_y, predictions) * 100,)) 
Example 64
Project: text-search   Author: szakrewsky   File: classifier.py    MIT License 5 votes vote down vote up
def load_classifier(f='data/classifier.pkl', test=True, save=True):
    if os.path.isfile(f):
        print 'loading %s' % (f,)
        classifier = joblib.load(f)
    else:
        raw_data, raw_labels = trainingdata.load_raw(n=62*n)
        raw_data = map(lambda i: cv2.cvtColor(i, cv2.COLOR_BGR2GRAY), raw_data)
        raw_data = map(lambda i: cv2.resize(i, (24, 24)), raw_data)

        raw_non_char_data = trainingdata.load_raw_non_char(n=n)
        raw_non_char_data = map(lambda i: cv2.cvtColor(i, cv2.COLOR_BGR2GRAY), raw_non_char_data)
        raw_non_char_data = map(lambda i: cv2.resize(i, (24, 24)), raw_non_char_data)

        raw_data = np.concatenate([raw_data, raw_non_char_data])
        raw_labels = np.concatenate([raw_labels, [0]*n])

        mix = np.random.permutation(len(raw_labels))
        raw_data = raw_data[mix]
        raw_labels = raw_labels[mix]

        i = int(len(raw_data)*0.75)
        train_data = hogdesc(raw_data[0:i])
        test_data = hogdesc(raw_data[i:])
        train_labels = raw_labels[0:i]
        test_labels = raw_labels[i:]

        print 'training %d samples' % (len(train_data),)
        classifier = KNeighborsClassifier(n_neighbors=1)
        classifier.fit(train_data, train_labels)
        if test:
            print 'testing with %d samples' % (len(test_data),)
            print 'testing score %f' % (classifier.score(test_data, test_labels),)
        if save:
            joblib.dump(classifier, f)
            print 'classifier saved as %s' % (f,)
    return classifier 
Example 65
Project: NSE_Sentiment_Analysis   Author: kwoshvick   File: knn_accuracy.py    MIT License 5 votes vote down vote up
def knn_accuracy(X, y):
    """Different Classifiers"""
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
    knn = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('knn', KNeighborsClassifier())])
    knn = knn.fit(X_train, y_train)
    ypredknn = knn.predict(X_test)
    print("KNN evaluation")
    print(metrics.accuracy_score(y_test, ypredknn))
    print(metrics.classification_report(y_test, ypredknn))
    drawrocKNN(y_test, ypredknn) 
Example 66
Project: prediction-constrained-topic-models   Author: dtak   File: train_and_eval_sklearn_binary_classifier.py    MIT License 4 votes vote down vote up
def __init__(self, clf=None, proba_thr_for_class1=0.5, classes=[0,1]):
        """ Make hard predictions at custom-tuned thresholds

        Args
        ----
        clf : sklearn ClassifierMixin
        threshold : float within (0.0, 1.0)
            Provides value at which we call labels of second category
        classes : list
            Provides numeric/string values of class labels

        Examples
        --------
        # Create toy dataset with 80% label=0, 20% label=1
        >>> prng = np.random.RandomState(0)
        >>> x_N = prng.randn(100, 1)
        >>> y_N = np.asarray(prng.rand(100) > 0.8, dtype=np.int32)

        # 'Train' neighbor classifier
        >>> clf = KNeighborsClassifier(n_neighbors=100);
        >>> clf = clf.fit(x_N, y_N)
        >>> clf.classes_
        array([0, 1], dtype=int32)

        # A classifier with 0.5 threshold calls all 0
        >>> thr050 = ThresholdClassifier(clf, 0.5)
        >>> thr050.predict(x_N).min()
        0

        # A classifier with 0.15 threshold calls all 1 
        >>> thr015 = ThresholdClassifier(clf, 0.15)
        >>> thr015.predict(x_N).min()
        1

        # A classifier with 0.95 threshold calls all 0 
        >>> thr015 = ThresholdClassifier(clf, 0.95)
        >>> thr015.predict(x_N).min()
        0
        """
        self.clf = clf
        self.proba_thr_for_class1 = proba_thr_for_class1
        try:
            self.classes_ = clf.classes_
        except AttributeError:
            self.classes_ = classes
        assert len(self.classes_) == 2 
Example 67
Project: transferlearning   Author: jindongwang   File: JDA.py    MIT License 4 votes vote down vote up
def fit_predict(self, Xs, Ys, Xt, Yt):
        '''
        Transform and Predict using 1NN as JDA paper did
        :param Xs: ns * n_feature, source feature
        :param Ys: ns * 1, source label
        :param Xt: nt * n_feature, target feature
        :param Yt: nt * 1, target label
        :return: acc, y_pred, list_acc
        '''
        list_acc = []
        X = np.hstack((Xs.T, Xt.T))
        X /= np.linalg.norm(X, axis=0)
        m, n = X.shape
        ns, nt = len(Xs), len(Xt)
        e = np.vstack((1 / ns * np.ones((ns, 1)), -1 / nt * np.ones((nt, 1))))
        C = len(np.unique(Ys))
        H = np.eye(n) - 1 / n * np.ones((n, n))

        M = 0
        Y_tar_pseudo = None
        for t in range(self.T):
            N = 0
            M0 = e * e.T * C
            if Y_tar_pseudo is not None and len(Y_tar_pseudo) == nt:
                for c in range(1, C + 1):
                    e = np.zeros((n, 1))
                    tt = Ys == c
                    e[np.where(tt == True)] = 1 / len(Ys[np.where(Ys == c)])
                    yy = Y_tar_pseudo == c
                    ind = np.where(yy == True)
                    inds = [item + ns for item in ind]
                    e[tuple(inds)] = -1 / len(Y_tar_pseudo[np.where(Y_tar_pseudo == c)])
                    e[np.isinf(e)] = 0
                    N = N + np.dot(e, e.T)
            M = M0 + N
            M = M / np.linalg.norm(M, 'fro')
            K = kernel(self.kernel_type, X, None, gamma=self.gamma)
            n_eye = m if self.kernel_type == 'primal' else n
            a, b = np.linalg.multi_dot([K, M, K.T]) + self.lamb * np.eye(n_eye), np.linalg.multi_dot([K, H, K.T])
            w, V = scipy.linalg.eig(a, b)
            ind = np.argsort(w)
            A = V[:, ind[:self.dim]]
            Z = np.dot(A.T, K)
            Z /= np.linalg.norm(Z, axis=0)
            Xs_new, Xt_new = Z[:, :ns].T, Z[:, ns:].T

            clf = KNeighborsClassifier(n_neighbors=1)
            clf.fit(Xs_new, Ys.ravel())
            Y_tar_pseudo = clf.predict(Xt_new)
            acc = sklearn.metrics.accuracy_score(Yt, Y_tar_pseudo)
            list_acc.append(acc)
            print('JDA iteration [{}/{}]: Acc: {:.4f}'.format(t + 1, self.T, acc))
        return acc, Y_tar_pseudo, list_acc 
Example 68
Project: transferlearning   Author: jindongwang   File: MEDA.py    MIT License 4 votes vote down vote up
def fit_predict(self, Xs, Ys, Xt, Yt):
        '''
        Transform and Predict
        :param Xs: ns * n_feature, source feature
        :param Ys: ns * 1, source label
        :param Xt: nt * n_feature, target feature
        :param Yt: nt * 1, target label
        :return: acc, y_pred, list_acc
        '''
        gfk = GFK.GFK(dim=self.dim)
        _, Xs_new, Xt_new = gfk.fit(Xs, Xt)
        Xs_new, Xt_new = Xs_new.T, Xt_new.T
        X = np.hstack((Xs_new, Xt_new))
        n, m = Xs_new.shape[1], Xt_new.shape[1]
        C = len(np.unique(Ys))
        list_acc = []
        YY = np.zeros((n, C))
        for c in range(1, C + 1):
            ind = np.where(Ys == c)
            YY[ind, c - 1] = 1
        YY = np.vstack((YY, np.zeros((m, C))))
        YY[0, 1:] = 0

        X /= np.linalg.norm(X, axis=0)
        L = 0  # Graph Laplacian is on the way...
        knn_clf = KNeighborsClassifier(n_neighbors=1)
        knn_clf.fit(X[:, :n].T, Ys.ravel())
        Cls = knn_clf.predict(X[:, n:].T)
        K = kernel(self.kernel_type, X, X2=None, gamma=self.gamma)
        E = np.diagflat(np.vstack((np.ones((n, 1)), np.zeros((m, 1)))))
        for t in range(1, self.T + 1):
            mu = self.estimate_mu(Xs_new.T, Ys, Xt_new.T, Cls)
            e = np.vstack((1 / n * np.ones((n, 1)), -1 / m * np.ones((m, 1))))
            M = e * e.T * C
            N = 0
            for c in range(1, C + 1):
                e = np.zeros((n + m, 1))
                tt = Ys == c
                e[np.where(tt == True)] = 1 / len(Ys[np.where(Ys == c)])
                yy = Cls == c
                ind = np.where(yy == True)
                inds = [item + n for item in ind]
                e[tuple(inds)] = -1 / len(Cls[np.where(Cls == c)])
                e[np.isinf(e)] = 0
                N += np.dot(e, e.T)
            M = (1 - mu) * M + mu * N
            M /= np.linalg.norm(M, 'fro')
            left = np.dot(E + self.lamb * M + self.rho * L, K) + self.eta * np.eye(n + m, n + m)
            Beta = np.dot(np.linalg.inv(left), np.dot(E, YY))
            F = np.dot(K, Beta)
            Cls = np.argmax(F, axis=1) + 1
            Cls = Cls[n:]
            acc = np.mean(Cls == Yt.ravel())
            list_acc.append(acc)
            print('MEDA iteration [{}/{}]: mu={:.2f}, Acc={:.4f}'.format(t, self.T, mu, acc))
        return acc, Cls, list_acc 
Example 69
Project: Recession-Predictor   Author: tzhangwps   File: knn.py    MIT License 4 votes vote down vote up
def run_knn_cv(self):
        """
        Runs cross-validation by grid-searching through neighbor values.
        """
        from sklearn.neighbors import KNeighborsClassifier
        
        for neighbors in self.neighbors_range:
            all_predicted_probs = pd.DataFrame()
            all_testing_y = pd.Series()
            dates = []
            self.log_loss_weights = []
            for test_name in range(1, self.test_name + 1):
                self.cv_start = self.cv_params[test_name]['cv_start']
                self.cv_end = self.cv_params[test_name]['cv_end']
                self.get_cv_indices()
                training_x = self.full_df.loc[: (self.cv_indices[0] - 1),
                                              self.feature_names]
                self.training_y = self.full_df.loc[: (self.cv_indices[0] - 1),
                                                   self.output_name]
                scaler = StandardScaler()
                scaler.fit(training_x)
                training_x_scaled = scaler.transform(training_x)
                knn = KNeighborsClassifier(n_neighbors=neighbors, weights='distance',
                                           algorithm='auto', p=2, metric='minkowski')
                knn.fit(X=training_x_scaled, y=self.training_y)
            
                testing_x = self.full_df[self.feature_names].loc[self.cv_indices]
                testing_x_scaled = scaler.transform(testing_x)
                self.testing_y = self.full_df[self.output_name].loc[self.cv_indices]
                self.calculate_log_loss_weights()
                predicted_probs = pd.DataFrame(knn.predict_proba(X=testing_x_scaled))
                all_predicted_probs = all_predicted_probs.append(predicted_probs,
                                                                 ignore_index=True)
                all_testing_y = all_testing_y.append(self.testing_y)
                dates.extend(self.full_df['Dates'].loc[self.cv_indices])
                
            log_loss_score = log_loss(y_true=all_testing_y,
                                      y_pred=all_predicted_probs,
                                      sample_weight=self.log_loss_weights)
            if log_loss_score < self.best_cv_score:
                self.best_cv_score = log_loss_score
                self.optimal_neighbors = neighbors
                self.knn_cv_predictions['Dates'] = dates
                self.knn_cv_predictions['True'] = all_testing_y.to_list()
                self.knn_cv_predictions['Predicted'] = all_predicted_probs[1].to_list()
        
        self.knn_optimal_params['Neighbors'] = self.optimal_neighbors
        self.knn_optimal_params['Best CV Score'] = self.best_cv_score 
Example 70
Project: Recession-Predictor   Author: tzhangwps   File: knn.py    MIT License 4 votes vote down vote up
def run_knn_prediction(self):
        """
        Performs prediction on the hold-out sample.
        """
        from sklearn.neighbors import KNeighborsClassifier
        
        self.optimal_neighbors = self.knn_optimal_params['Neighbors']
        all_predicted_probs = pd.DataFrame()
        all_testing_y = pd.Series()
        dates = []
        self.log_loss_weights = []
        training_x = self.full_df.loc[: (self.pred_indices[0] - 1),
                                      self.feature_names]
        self.training_y = self.full_df.loc[: (self.pred_indices[0] - 1),
                                           self.output_name]
        scaler = StandardScaler()
        scaler.fit(training_x)
        training_x_scaled = scaler.transform(training_x)
        knn = KNeighborsClassifier(n_neighbors=self.optimal_neighbors,
                                   weights='distance',
                                   algorithm='auto', p=2, metric='minkowski')
        knn.fit(X=training_x_scaled, y=self.training_y)

        testing_x = self.full_df[self.feature_names].loc[self.pred_indices]
        testing_x_scaled = scaler.transform(testing_x)
        self.testing_y = self.full_df[self.output_name].loc[self.pred_indices]
        self.calculate_log_loss_weights()
        predicted_probs = pd.DataFrame(knn.predict_proba(X=testing_x_scaled))
        all_predicted_probs = all_predicted_probs.append(predicted_probs,
                                                         ignore_index=True)
        all_testing_y = all_testing_y.append(self.testing_y)
        dates.extend(self.full_df['Dates'].loc[self.pred_indices])
            
        self.knn_pred_error = log_loss(y_true=all_testing_y,
                                       y_pred=all_predicted_probs,
                                       sample_weight=self.log_loss_weights)
        self.knn_predictions['Dates'] = dates
        self.knn_predictions['True'] = all_testing_y.to_list()
        self.knn_predictions['Predicted'] = all_predicted_probs[1].to_list()

#MIT License
#
#Copyright (c) 2019 Terrence Zhang
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in all
#copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#SOFTWARE. 
Example 71
Project: TPs   Author: DataMiningP7   File: tp3_solutions.py    MIT License 4 votes vote down vote up
def test_kneighbors_k1_3(X, y):
    """ Test the KNeighborsClassifier on X and y with k=1 and k=3 and return
     the best value.

    Args:
        X: the TF-IDF matrix where each line represents a document and each
           column represents a word, typically obtained by running
           transform_text() from the TP2.
        y: a binary vector where the i-th value indicates whether the i-th is a
           spam or a ham.
    Returns:
        An int indicating the best value for k.
    """
    X_train, X_test, y_train, y_test = get_train_test_sets(X, y)

    knn_k1 = KNeighborsClassifier(n_clusters=1)
    knn_k1.fit(X_train, y_train)

    # score = accuracy = % good observations
    score_k1 = knn_k1.score1(X_test, y_test)
    print("KNeighbors with k=1:", score_k1)

    # Ex4.3
    knn_k3 = KNeighborsClassifier(n_clusters=3)
    knn_k3.fit(X_train, y_train)

    # score = accuracy = % good observations
    score_k3 = knn_k3.score3(X_test, y_test)
    print("KNeighbors with k=3:", score_k3)

    if score_k1 > score_k3:
        print("The best K is 1")
        return 1

    print("The best K is 3")
    return 3

# Ex4.4
# We can't modify our algorithm based on its results on our test set, otherwise
# we'll overfit. We need to use cross-validation to find the best K.
# See the 6th lecture, slides 29 and 44-45.

# Ex4.5 
Example 72
Project: qsar-tools   Author: dkoes   File: trainclassifier.py    Apache License 2.0 4 votes vote down vote up
def trainmodels(m, x, y):
    '''For the model type m, train a classifier on x->y using built-in CV to
    parameterize.  Return both this model and an unfit model that can be used for CV.
    Note for PLS we cheat a little bit since there isn't a built-in CV trainer.
    '''
    
    if m == 'knn':
        #have to manually cross-validate to choose number of components
        kf = KFold(n_splits=3)
        bestscore = -10000
        besti = 0
        for i in xrange(1,10):
            #try larger number of components until average CV perf decreases
            knn = KNeighborsClassifier(i)
            scores = []
            #TODO: parallelize below
            for train,test in kf.split(x):
                xtrain = x[train]
                ytrain = y[train]
                xtest = x[test]
                ytest = y[test]            
                knn.fit(xtrain,ytrain)
                score = scoremodel(knn,xtest,ytest)
                scores.append(score)
                
            ave = np.mean(scores)
            if ave > bestscore:
                bestscore = ave
                besti = i
        
        model = KNeighborsClassifier(besti) 
        model.fit(x,y)
        print "Best k = %d"%besti
        unfit = KNeighborsClassifier(besti)  #choose number of components using full data - iffy
    elif m == 'svm':
        C_range = np.logspace(-2, 3, 6)
        gamma_range = np.logspace(-9, 3, 7)
        param_grid = dict(gamma=gamma_range, C=C_range)
        grid = GridSearchCV(SVC(), param_grid=param_grid,n_jobs=-1)
        grid.fit(x,y)
        print "svm params",grid.best_params_
        model = grid.best_estimator_
        unfit = SVC(**grid.best_params_)
    elif m == 'logistic':
        model = LogisticRegressionCV(n_jobs=-1)
        model.fit(x,y)
        unfit = LogisticRegressionCV(n_jobs=-1)
    elif m == 'rf':
        #evalute different max depths
        parameters = {'max_depth': range(2,int(np.log2(len(x[0])))+1)}
        clf = GridSearchCV(RandomForestClassifier(), parameters, 'roc_auc',n_jobs=-1)
        clf.fit(x,y)
        model = clf.best_estimator_
        print "max_depth =",clf.best_params_['max_depth']
        unfit = RandomForestClassifier(**clf.best_params_)


    return (model,unfit) 
Example 73
Project: highdimensional-decision-boundary-plot   Author: tmadl   File: decisionboundaryplot.py    MIT License 4 votes vote down vote up
def __init__(
        self,
        estimator=KNeighborsClassifier(n_neighbors=10),
        dimensionality_reduction=PCA(n_components=2),
        acceptance_threshold=0.03,
        n_decision_boundary_keypoints=60,
        n_connecting_keypoints=None,
        n_interpolated_keypoints=None,
        n_generated_testpoints_per_keypoint=15,
        linear_iteration_budget=100,
        hypersphere_iteration_budget=300,
        verbose=True,
    ):
        if acceptance_threshold == 0:
            raise Warning(
                "A nonzero acceptance threshold is strongly recommended so the optimizer can finish in finite time"
            )
        if linear_iteration_budget < 2 or hypersphere_iteration_budget < 2:
            raise Exception("Invalid iteration budget")

        self.classifier = estimator
        self.dimensionality_reduction = dimensionality_reduction
        self.acceptance_threshold = acceptance_threshold

        if (
            n_decision_boundary_keypoints
            and n_connecting_keypoints
            and n_interpolated_keypoints
            and n_connecting_keypoints + n_interpolated_keypoints
            != n_decision_boundary_keypoints
        ):
            raise Exception(
                "n_connecting_keypoints and n_interpolated_keypoints must sum to n_decision_boundary_keypoints (set them to None to use calculated suggestions)"
            )

        self.n_connecting_keypoints = (
            n_connecting_keypoints
            if n_connecting_keypoints != None
            else n_decision_boundary_keypoints / 3
        )
        self.n_interpolated_keypoints = (
            n_interpolated_keypoints
            if n_interpolated_keypoints != None
            else n_decision_boundary_keypoints * 2 / 3
        )

        self.linear_iteration_budget = linear_iteration_budget
        self.n_generated_testpoints_per_keypoint = n_generated_testpoints_per_keypoint
        self.hypersphere_iteration_budget = hypersphere_iteration_budget
        self.verbose = verbose

        self.decision_boundary_points = []
        self.decision_boundary_points_2d = []
        self.X_testpoints = []
        self.y_testpoints = []
        self.background = []
        self.steps = 3

        self.hypersphere_max_retry_budget = 20
        self.penalties_enabled = True
        self.random_gap_selection = False 
Example 74
Project: Jetson-Nano-FaceRecognition   Author: step305   File: face_train.py    MIT License 4 votes vote down vote up
def train(train_dir, model_save_path=None, n_neighbors=None, knn_algo='ball_tree', verbose=False):
    """
    Trains a k-nearest neighbors classifier for face recognition.
    :param train_dir: directory that contains a sub-directory for each known person, with its name.
     (View in source code to see train_dir example tree structure)
     Structure:
        <train_dir>/
        ├── <person1>/
        │   ├── <somename1>.jpeg
        │   ├── <somename2>.jpeg
        │   ├── ...
        ├── <person2>/
        │   ├── <somename1>.jpeg
        │   └── <somename2>.jpeg
        └── ...
    :param model_save_path: (optional) path to save model on disk
    :param n_neighbors: (optional) number of neighbors to weigh in classification. Chosen automatically if not specified
    :param knn_algo: (optional) underlying data structure to support knn.default is ball_tree
    :param verbose: verbosity of training
    :return: returns knn classifier that was trained on the given data.
    """
    X = []
    y = []

    # Loop through each person in the training set
    for class_dir in os.listdir(train_dir):
        if not os.path.isdir(os.path.join(train_dir, class_dir)):
            continue

        # Loop through each training image for the current person
        for img_path in image_files_in_folder(os.path.join(train_dir, class_dir)):
            image = face_recognition.load_image_file(img_path)
            face_bounding_boxes = face_recognition.face_locations(image)

            if len(face_bounding_boxes) != 1:
                # If there are no people (or too many people) in a training image, skip the image.
                if verbose:
                    print("Image {} not suitable for training: {}".format(img_path, "Didn't find a face" if len(face_bounding_boxes) < 1 else "Found more than one face"))
            else:
                # Add face encoding for current image to the training set
                X.append(face_recognition.face_encodings(image, known_face_locations=face_bounding_boxes)[0])
                y.append(class_dir)

    # Determine how many neighbors to use for weighting in the KNN classifier
    if n_neighbors is None:
        n_neighbors = int(round(math.sqrt(len(X))))
        if verbose:
            print("Chose n_neighbors automatically:", n_neighbors)

    # Create and train the KNN classifier
    knn_clf = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors, algorithm=knn_algo, weights='distance')
    knn_clf.fit(X, y)

    # Save the trained KNN classifier
    if model_save_path is not None:
        with open(model_save_path, 'wb') as f:
            pickle.dump(knn_clf, f)

    return knn_clf 
Example 75
Project: Jetson-Nano-FaceRecognition   Author: step305   File: face_train.py    MIT License 4 votes vote down vote up
def train(train_dir, model_save_path=None, n_neighbors=None, knn_algo='ball_tree', verbose=False):
    """
    Trains a k-nearest neighbors classifier for face recognition.
    :param train_dir: directory that contains a sub-directory for each known person, with its name.
     (View in source code to see train_dir example tree structure)
     Structure:
        <train_dir>/
        ├── <person1>/
        │   ├── <somename1>.jpeg
        │   ├── <somename2>.jpeg
        │   ├── ...
        ├── <person2>/
        │   ├── <somename1>.jpeg
        │   └── <somename2>.jpeg
        └── ...
    :param model_save_path: (optional) path to save model on disk
    :param n_neighbors: (optional) number of neighbors to weigh in classification. Chosen automatically if not specified
    :param knn_algo: (optional) underlying data structure to support knn.default is ball_tree
    :param verbose: verbosity of training
    :return: returns knn classifier that was trained on the given data.
    """
    X = []
    y = []

    # Loop through each person in the training set
    for class_dir in os.listdir(train_dir):
        if not os.path.isdir(os.path.join(train_dir, class_dir)):
            continue

        # Loop through each training image for the current person
        for img_path in image_files_in_folder(os.path.join(train_dir, class_dir)):
            image = face_recognition.load_image_file(img_path)
            face_bounding_boxes = face_recognition.face_locations(image)

            if len(face_bounding_boxes) != 1:
                # If there are no people (or too many people) in a training image, skip the image.
                if verbose:
                    print("Image {} not suitable for training: {}".format(img_path, "Didn't find a face" if len(face_bounding_boxes) < 1 else "Found more than one face"))
            else:
                # Add face encoding for current image to the training set
                X.append(face_recognition.face_encodings(image, known_face_locations=face_bounding_boxes)[0])
                y.append(class_dir)

    # Determine how many neighbors to use for weighting in the KNN classifier
    if n_neighbors is None:
        n_neighbors = int(round(math.sqrt(len(X))))
        if verbose:
            print("Chose n_neighbors automatically:", n_neighbors)

    # Create and train the KNN classifier
    knn_clf = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors, algorithm=knn_algo, weights='distance')
    knn_clf.fit(X, y)

    # Save the trained KNN classifier
    if model_save_path is not None:
        with open(model_save_path, 'wb') as f:
            pickle.dump(knn_clf, f)

    return knn_clf 
Example 76
Project: Jetson-Nano-FaceRecognition   Author: step305   File: face_train.py    MIT License 4 votes vote down vote up
def train(train_dir, model_save_path=None, n_neighbors=None, knn_algo='ball_tree', verbose=False):
    """
    Trains a k-nearest neighbors classifier for face recognition.
    :param train_dir: directory that contains a sub-directory for each known person, with its name.
     (View in source code to see train_dir example tree structure)
     Structure:
        <train_dir>/
        ├── <person1>/
        │   ├── <somename1>.jpeg
        │   ├── <somename2>.jpeg
        │   ├── ...
        ├── <person2>/
        │   ├── <somename1>.jpeg
        │   └── <somename2>.jpeg
        └── ...
    :param model_save_path: (optional) path to save model on disk
    :param n_neighbors: (optional) number of neighbors to weigh in classification. Chosen automatically if not specified
    :param knn_algo: (optional) underlying data structure to support knn.default is ball_tree
    :param verbose: verbosity of training
    :return: returns knn classifier that was trained on the given data.
    """
    X = []
    y = []

    # Loop through each person in the training set
    for class_dir in os.listdir(train_dir):
        if not os.path.isdir(os.path.join(train_dir, class_dir)):
            continue

        # Loop through each training image for the current person
        for img_path in image_files_in_folder(os.path.join(train_dir, class_dir)):
            image = face_recognition.load_image_file(img_path)
            face_bounding_boxes = face_recognition.face_locations(image)

            if len(face_bounding_boxes) != 1:
                # If there are no people (or too many people) in a training image, skip the image.
                if verbose:
                    print("Image {} not suitable for training: {}".format(img_path, "Didn't find a face" if len(face_bounding_boxes) < 1 else "Found more than one face"))
            else:
                # Add face encoding for current image to the training set
                X.append(face_recognition.face_encodings(image, known_face_locations=face_bounding_boxes)[0])
                y.append(class_dir)

    # Determine how many neighbors to use for weighting in the KNN classifier
    if n_neighbors is None:
        n_neighbors = int(round(math.sqrt(len(X))))
        if verbose:
            print("Chose n_neighbors automatically:", n_neighbors)

    # Create and train the KNN classifier
    knn_clf = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors, algorithm=knn_algo, weights='distance')
    knn_clf.fit(X, y)

    # Save the trained KNN classifier
    if model_save_path is not None:
        with open(model_save_path, 'wb') as f:
            pickle.dump(knn_clf, f)

    return knn_clf 
Example 77
Project: techjam_online_audition   Author: supachaic   File: models.py    MIT License 4 votes vote down vote up
def train_stack(self, n_features, n_classes, 
                    estimators, tr_stack, y_train, 
                    val_stack, y_val, test_stack, score_name):

        tr_pred = []
        val_pred = []
        test_pred = []
        
        for estimator_name in estimators:        
            estimator, params = select_model(estimator_name, n_features, n_classes)
            
            # Train 2nd and 3rd layer with val_stack and evaluate with tr_stack
            train_kwargs = {
                            'estimator': estimator,
                            'params': params,
                            'X_train': val_stack,
                            'y_train': y_val,
                            'X_val': tr_stack,
                            'y_val': y_train,
                            'n_iter': 100,
                            'score_name': score_name,
                            'report': False,
                            'cv': 3,
                            'random_state': 42,
            }
        
            # Random train with stacked data and get best_params
            params, _, _ = random_model(**train_kwargs)
        
            if estimator_name == 'xgb':
                clf = XGBClassifier(**params)
            elif estimator_name == 'lgb':
                clf = LGBMClassifier(**params)
            elif estimator_name == 'rfo':
                clf = RandomForestClassifier(**params)
            elif estimator_name == 'log':
                clf = LogisticRegression(**params)
            elif estimator_name == 'svc':
                clf = SVC(**params)
            elif estimator_name == 'knn':
                clf = KNeighborsClassifier(**params)
            elif estimator_name == 'ada':
                clf = AdaBoostClassifier(**params)
            elif estimator_name == 'ext':
                clf = ExtraTreesClassifier(**params)

            clf.fit(val_stack, y_val)
            tr_prob = clf.predict_proba(tr_stack)
            val_prob = clf.predict_proba(val_stack)
            test_prob = clf.predict_proba(test_stack)
            
            tr_pred.append(tr_prob)
            val_pred.append(val_prob)
            test_pred.append(test_prob)
        return tr_pred, val_pred, test_pred 
Example 78
Project: dvb.datascience   Author: devolksbank   File: example.py    MIT License 4 votes vote down vote up
def run():
    display(HTML("<h1>Experiment started</h1>"))

    display(HTML("<h2>Running pipeline 1</h2>"))
    p = ds.Pipeline()
    p.addPipe("read", ds.data.CSVDataImportPipe())
    p.addPipe(
        "read2",
        ds.data.CSVDataImportPipe(),
        comment="Very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, long comment",
    )
    p.addPipe("numeric", ds.transform.FilterTypeFeatures(), [("read", "df", "df")])
    p.addPipe(
        "numeric2",
        ds.transform.FilterTypeFeatures(),
        [("read2", "df", "df")],
        comment="Very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, long comment",
    )
    p.addPipe(
        "boxplot", ds.eda.BoxPlot(), [("numeric", "df", "df"), ("numeric2", "df", "df")]
    )
    p.draw_design()

    display(HTML("<h2>Running pipeline 2</h2>"))
    from sklearn.neighbors import KNeighborsClassifier

    p = ds.Pipeline()
    p.addPipe(
        name="read",
        pipe=ds.data.SampleData("iris"),
        comment="Default sklearn iris dataset",
    )
    p.addPipe(
        "clf",
        ds.predictor.SklearnClassifier(KNeighborsClassifier, n_neighbors=3),
        [("read", "df", "df"), ("read", "df_metadata", "df_metadata")],
        comment="Short com.",
    )
    p.addPipe(
        "score",
        ds.score.ClassificationScore(),
        [
            ("clf", "predict", "predict"),
            ("clf", "predict_metadata", "predict_metadata"),
        ],
        comment="Very long comment to describe some very important stuff regarding this processing step",
    )
    p.fit_transform()

    display(HTML("<h1>Experiment done</h1>"))

    return p 
Example 79
Project: freesound-classification   Author: ex4sperans   File: utils.py    Apache License 2.0 4 votes vote down vote up
def plot_projection(vectors, labels, frames_per_example=3, newline=False):

    representations = []
    classes = []
    for sample, label in zip(vectors, labels):
        if sum(label) > 1:
            continue
        choices = np.random.choice(
            np.arange(len(sample)), replace=False,
            size=min(frames_per_example, len(sample)))
        representations.extend(sample[choices])
        classes.extend([label.tolist().index(1)] * len(choices))

    representations = np.array(representations)

    # fit a simple model to estimate the quality of the learned representations
    X_train, X_valid, y_train, y_valid = train_test_split(
        representations, classes, shuffle=False, test_size=0.2)

    scaler = StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_valid = scaler.transform(X_valid)
    model = KNeighborsClassifier(n_neighbors=5)
    model.fit(X_train, y_train)

    score = accuracy_score(y_valid, model.predict(X_valid))
    if newline:
        print()
    print("Classification accuracy: {:.4f}".format(score))

    # plot projection
    embeddings = TSNE().fit_transform(representations)

    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111)
    ax.scatter(embeddings[:, 0], embeddings[:, 1], c=classes, s=10)

    fig.canvas.draw()

    image = np.array(fig.canvas.renderer._renderer)

    plt.close()

    return image 
Example 80
Project: ISM2017   Author: ybayle   File: classify.py    MIT License 4 votes vote down vote up
def train_test(train, test, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if train is not None and test is not None:
        train_features = []
        test_features = []
        train_groundtruths = []
        test_groundtruths = []
        for elem in train:
            train_groundtruths.append(elem)
            train_features.append(train[elem])
        for elem in test:
            test_groundtruths.append(elem)
            test_features.append(test[elem])
    else:
        utils.print_error("No valid data provided.")
    res_dir = utils.create_dir(res_dir)
    classifiers = {
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        "KNeighbors":KNeighborsClassifier(1),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)

        print("Precision weighted\t" + str(precision_score(test_groundtruths, predictions, average='weighted')))
        print("Recall weighted\t" + str(recall_score(test_groundtruths, predictions, average='weighted')))
        print("F1 weighted\t" + str(f1_score(test_groundtruths, predictions, average='weighted')))
        # print("Precision weighted\t" + str(precision_score(test_groundtruths, predictions, average=None)))
        # print("Recall weighted\t" + str(recall_score(test_groundtruths, predictions, average=None)))
        # print("f1 weighted\t" + str(f1_score(test_groundtruths, predictions, average=None)))