Python sklearn.metrics.pairwise.linear_kernel() Examples

The following are 28 code examples of sklearn.metrics.pairwise.linear_kernel(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics.pairwise , or try the search function .
Example #1
Source File: TfidfRetriever.py    From SOQAL with MIT License 6 votes vote down vote up
def get_topk_docs_scores(self, query):
        """
        :param query: question as string
        :return: the top k articles with each of their paragraphs seperated by '###' as python list of strings
        """
        qeury = self.stem_string(query)
        query_tfidf = self.vectorizer.transform([query])
        similarities_raw = linear_kernel(self.tfidf_matrix, query_tfidf)
        similarities = []
        for s in similarities_raw:
            similarities.append(s[0])
        indices_sorted = np.argsort(similarities)[::-1]  # reverse order
        top_docs = []
        docs_scores = []
        i = 0
        while i < min(self.k, len(self.docs)):
            doc = self.docs[indices_sorted[i]]
            top_docs.append(doc)
            docs_scores.append(similarities[indices_sorted[i]])
            i += 1
        norm_cst = np.sum(np.asarray(docs_scores))
        docs_scores = np.asarray(docs_scores)
        docs_scores = docs_scores / norm_cst
        return top_docs, docs_scores 
Example #2
Source File: word2vec.py    From vec4ir with MIT License 6 votes vote down vote up
def query(self, query, k=None, indices=None, return_scores=False, sort=True):
        centroids = self.centroids
        if centroids is None:
            raise NotFittedError
        if indices is not None:
            centroids = centroids[indices]
        q = self.vect.transform([query])
        q = normalize(q, copy=False)
        D = linear_kernel(q, centroids)  # l2 normalized, so linear kernel
        # ind = np.argsort(D[0, :])[::-1]  # similarity metric, so reverse
        # if k is not None:  # we could use our argtopk in the first place
        #     ind = ind[:k]
        # print(ind)
        ind = argtopk(D[0], k) if sort else np.arange(D.shape[1])
        if return_scores:
            return ind, D[0, ind]
        else:
            return ind 
Example #3
Source File: test_utils.py    From numpy-ml with GNU General Public License v3.0 6 votes vote down vote up
def test_linear_kernel(N=1):
    np.random.seed(12345)
    i = 0
    while i < N:
        N = np.random.randint(1, 100)
        M = np.random.randint(1, 100)
        C = np.random.randint(1, 1000)

        X = np.random.rand(N, C)
        Y = np.random.rand(M, C)

        mine = LinearKernel()(X, Y)
        gold = sk_linear(X, Y)

        np.testing.assert_almost_equal(mine, gold)
        print("PASSED")
        i += 1 
Example #4
Source File: rank_verbs.py    From StrepHit with GNU General Public License v3.0 6 votes vote down vote up
def get_similarity_scores(verb_token, vectorizer, tf_idf_matrix):
    """ Compute the cosine similarity score of a given verb token against the input corpus TF/IDF matrix.

        :param str verb_token: Surface form of a verb, e.g., *born*
        :param sklearn.feature_extraction.text.TfidfVectorizer vectorizer: Vectorizer
         used to transform verbs into vectors
        :return: cosine similarity score
        :rtype: ndarray
    """
    verb_token_vector = vectorizer.transform([verb_token])
    # Here the linear kernel is the same as the cosine similarity, but faster
    # cf. http://scikit-learn.org/stable/modules/metrics.html#cosine-similarity
    scores = linear_kernel(verb_token_vector, tf_idf_matrix)
    logger.debug("Corpus-wide TF/IDF scores for '%s': %s" % (verb_token, scores))
    logger.debug("Average TF/IDF score for '%s': %f" % (verb_token, average(scores)))
    return scores 
Example #5
Source File: TfidfRetriever.py    From SOQAL with MIT License 6 votes vote down vote up
def get_topk_docs(self, query):
        """
        :param query: question as string
        :return: the top k articles with each of their paragraphs seperated by '###' as python list of strings
        """
        query_tfidf = self.vectorizer.transform([query])
        similarities_raw = linear_kernel(self.tfidf_matrix, query_tfidf)
        similarities = []
        for s in similarities_raw:
            similarities.append(s[0])
        indices_sorted = np.argsort(similarities)[::-1]  # reverse order
        top_docs = []
        i = 0
        while i < min(self.k, len(self.docs)):
            doc = self.docs[indices_sorted[i]]
            top_docs.append(doc)
            i += 1
        return top_docs 
Example #6
Source File: TfidfRetriever.py    From SOQAL with MIT License 6 votes vote down vote up
def get_topk_docs(self, query):
        """
        :param query: question as string
        :return: the top k articles with each of their paragraphs seperated by '###' as python list of strings
        """
        qeury = self.stem_string(query)
        query_tfidf = self.vectorizer.transform([query])
        similarities_raw = linear_kernel(self.tfidf_matrix, query_tfidf)
        similarities = []
        for s in similarities_raw:
            similarities.append(s[0])
        indices_sorted = np.argsort(similarities)[::-1]  # reverse order
        top_docs = []
        scores = []
        i = 0
        while i < min(self.k, len(self.docs)):
            doc = self.docs[indices_sorted[i]]
            top_docs.append(doc)
            i += 1
        norm_cst = np.sum(np.asarray(scores))
        return top_docs 
Example #7
Source File: tfidf.py    From moviebox with MIT License 6 votes vote down vote up
def getSimilarities(id, recommendations, plotsTFIDF, verbose):
    start = time.time()
    # Generate cosine similarities
    cosineSimilarities = linear_kernel(plotsTFIDF, plotsTFIDF)
    # Get similarity scores for the input movie
    scores = list(enumerate(cosineSimilarities[id]))
    # Sort into descending order the scores
    sortedScores = sorted(scores, key=lambda x: x[1], reverse=True)
    # Get the number of the recommendations asked
    movieRecommendations = sortedScores[1:recommendations + 1]
    # Get the indices of the recommendation movies
    movieIndices = [i[0] for i in movieRecommendations]
    if (verbose):
        printGreen(
            '✔ Found Similarities\t{0:.1f}s'.format(time.time() - start))
    return movieIndices 
Example #8
Source File: WGGraph.py    From AbTextSumm with Mozilla Public License 2.0 6 votes vote down vote up
def removeSimilarSentences(generatedSentences, originalSentences,  stopwords,threshold=0.80,):
    docs=[]
    for sent, sim in generatedSentences:
        docs.append(sent)
    docs.extend(originalSentences)
    
    bow_matrix = StemmedTfidfVectorizer(stop_words=stopwords).fit_transform(docs)
    normalized = TfidfTransformer().fit_transform(bow_matrix)
    #simMatrix = (normalized[0:] * normalized[0:].T).A
    simindices=[]
    #print 'Num original, ', len(originalSentences)
    for i in xrange(len(generatedSentences)):
        simGeneratedScores = linear_kernel(normalized[i], normalized[len(generatedSentences):]).flatten()
        if(max(simGeneratedScores) >= threshold):
            simindices.append(i)
    
    #print simindices
    finalGen=[sentence for k,sentence in enumerate(generatedSentences) if k not in simindices]
    #print len(generatedSentences), len(finalGen)
    return finalGen 
Example #9
Source File: komd.py    From MKLpy with GNU General Public License v3.0 6 votes vote down vote up
def __kernel_definition__(self):
        """Select the kernel function
        
        Returns
        -------
        kernel : a callable relative to selected kernel
        """
        if hasattr(self.kernel, '__call__'):
            return self.kernel
        if self.kernel == 'rbf' or self.kernel == None:
            return lambda X,Y : rbf_kernel(X,Y,self.rbf_gamma)
        if self.kernel == 'poly':
            return lambda X,Y : polynomial_kernel(X, Y, degree=self.degree, gamma=self.rbf_gamma, coef0=self.coef0)
        if self.kernel == 'linear':
            return lambda X,Y : linear_kernel(X,Y)
        if self.kernel == 'precomputed':
            return lambda X,Y : X 
Example #10
Source File: kernel.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def trainModel(data):
    """
    在模型里使用不同的核函数
    """
    kernel = [linear_kernel, polynomial_kernel, rbf_kernel, laplacian_kernel]
    res = []
    for i in kernel:
        model = SVC(kernel=i, coef0=1)
        model.fit(data[["x1", "x2"]], data["y"])
        res.append({"name": i.__name__, "result": model})
    return res 
Example #11
Source File: query_labels.py    From ALiPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, X, y, train_idx, **kwargs):
        # K: kernel matrix
        #
        X = np.asarray(X)[train_idx]
        y = np.asarray(y)[train_idx]
        self._train_idx = np.asarray(train_idx)

        self.y = np.array(y)
        self.lmbda = kwargs.pop('lambda', 1.)
        self.kernel = kwargs.pop('kernel', 'rbf')
        if self.kernel == 'rbf':
            self.K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma', 1.))
        elif self.kernel == 'poly':
            self.K = polynomial_kernel(X=X,
                                       Y=X,
                                       coef0=kwargs.pop('coef0', 1),
                                       degree=kwargs.pop('degree', 3),
                                       gamma=kwargs.pop('gamma', 1.))
        elif self.kernel == 'linear':
            self.K = linear_kernel(X=X, Y=X)
        elif hasattr(self.kernel, '__call__'):
            self.K = self.kernel(X=np.array(X), Y=np.array(X))
        else:
            raise NotImplementedError

        if not isinstance(self.K, np.ndarray):
            raise TypeError('K should be an ndarray')
        if self.K.shape != (len(X), len(X)):
            raise ValueError(
                'kernel should have size (%d, %d)' % (len(X), len(X)))
        self.L = np.linalg.inv(self.K + self.lmbda * np.eye(len(X))) 
Example #12
Source File: test_pairwise.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_kernel_symmetry():
    # Valid kernels should be symmetric
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    for kernel in (linear_kernel, polynomial_kernel, rbf_kernel,
                   laplacian_kernel, sigmoid_kernel, cosine_similarity):
        K = kernel(X, X)
        assert_array_almost_equal(K, K.T, 15) 
Example #13
Source File: test_pairwise.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_kernel_sparse():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    X_sparse = csr_matrix(X)
    for kernel in (linear_kernel, polynomial_kernel, rbf_kernel,
                   laplacian_kernel, sigmoid_kernel, cosine_similarity):
        K = kernel(X, X)
        K2 = kernel(X_sparse, X_sparse)
        assert_array_almost_equal(K, K2) 
Example #14
Source File: scorer.py    From entity2vec with Apache License 2.0 5 votes vote down vote up
def similarity_function(vec1,vec2, similarity):
    
    #compute cosine similarity or other similarities

    v1 = np.array(vec1)

    v2 = np.array(vec2)

    if len(v1)*len(v2) == 0: #any of the two is 0
        global count
        count +=1

        return 0

    else:

        if similarity == 'cosine':

            return cosine_similarity([v1],[v2])[0][0] #returns a double array [[sim]]

        elif similarity == 'softmax':

            return np.exp(np.dot(v1,v2)) #normalization is useless for relative comparisons

        elif similarity == 'linear_kernel':
            return linear_kernel(v1,v2)[0][0]

        elif similarity == 'euclidean':
            return euclidean_distances(v1,v2)[0][0]
        else:
            raise NameError('Choose a valid similarity function') 
Example #15
Source File: doc2vec.py    From vec4ir with MIT License 5 votes vote down vote up
def query(self, query, k=None, indices=None):
        if indices is not None:
            dvs = self.inferred_docvecs[indices]
        else:
            dvs = self.inferred_docvecs

        analyzed_query = self.analyzer(query)
        qv = self.model.infer_vector(analyzed_query).reshape(1, -1)
        qv = normalize(qv, copy=False)

        dists = linear_kernel(qv, dvs)[0]

        ind = argtopk(dists)

        return ind 
Example #16
Source File: base.py    From vec4ir with MIT License 5 votes vote down vote up
def query(self, query, k=None, indices=None, return_scores=False, sort=True):
        if self._fit_X is None:
            raise NotFittedError
        q = super().transform([query])
        if indices is not None:
            fit_X = self._fit_X[indices]
        else:
            fit_X = self._fit_X
        # both fit_X and q are l2-normalized
        D = linear_kernel(q, fit_X)
        ind = argtopk(D[0], k) if sort else np.arange(D.shape[1])
        if return_scores:
            return ind, D[0,ind]
        else:
            return ind 
Example #17
Source File: test_pairwise.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_linear_kernel():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    K = linear_kernel(X, X)
    # the diagonal elements of a linear kernel are their squared norm
    assert_array_almost_equal(K.flat[::6], [linalg.norm(x) ** 2 for x in X]) 
Example #18
Source File: boolean.py    From MKLpy with GNU General Public License v3.0 5 votes vote down vote up
def monotone_conjunctive_kernel(X,Z=None,c=2):
    L = linear_kernel(X,Z)
    return binom(L,c) 
Example #19
Source File: multi_label.py    From ALiPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, X, y, **kwargs):
        # K: kernel matrix
        super(QueryMultiLabelQUIRE, self).__init__(X, y)
        self.lmbda = kwargs.pop('lambda', 1.)
        self.kernel = kwargs.pop('kernel', 'rbf')
        if self.kernel == 'rbf':
            self.K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma', 1.))
        elif self.kernel == 'poly':
            self.K = polynomial_kernel(X=X,
                                       Y=X,
                                       coef0=kwargs.pop('coef0', 1),
                                       degree=kwargs.pop('degree', 3),
                                       gamma=kwargs.pop('gamma', 1.))
        elif self.kernel == 'linear':
            self.K = linear_kernel(X=X, Y=X)
        elif hasattr(self.kernel, '__call__'):
            self.K = self.kernel(X=np.array(X), Y=np.array(X))
        else:
            raise NotImplementedError

        if not isinstance(self.K, np.ndarray):
            raise TypeError('K should be an ndarray')
        if self.K.shape != (len(X), len(X)):
            raise ValueError(
                'Kernel should have size (%d, %d)' % (len(X), len(X)))
        self._nsamples, self._nclass = self.y.shape
        self.L = np.linalg.pinv(self.K + self.lmbda * np.eye(len(X))) 
Example #20
Source File: test_pairwise.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_linear_kernel():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    K = linear_kernel(X, X)
    # the diagonal elements of a linear kernel are their squared norm
    assert_array_almost_equal(K.flat[::6], [linalg.norm(x) ** 2 for x in X]) 
Example #21
Source File: unit_tests.py    From MKLpy with GNU General Public License v3.0 5 votes vote down vote up
def test_lambda(self):
		funcs = [pairwise_mk.linear_kernel, lambda X,Z : (X @ Z.T)**2]
		KLtr = [pairwise_mk.linear_kernel(self.Xtr), pairwise_mk.homogeneous_polynomial_kernel(self.Xtr)]
		KLte = [pairwise_mk.linear_kernel(self.Xte, self.Xtr), pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr)]
		KLtr_g = Lambda_generator(self.Xtr, kernels=funcs)
		KLte_g = Lambda_generator(self.Xte, self.Xtr, kernels=funcs)
		self.assertTrue(matNear(average(KLtr), average(KLtr_g)))
		self.assertTrue(matNear(average(KLte), average(KLte_g)))
		self.assertTrue(matNear(KLtr[1], KLtr_g[1]))
		self.assertTrue(matNear(KLte[0], KLte_g[0])) 
Example #22
Source File: unit_tests.py    From MKLpy with GNU General Public License v3.0 5 votes vote down vote up
def test_kernel_normalization(self):
		K = self.X @ self.X.T
		Kn_torch = preprocessing.kernel_normalization(K)
		Kn_numpy = preprocessing.kernel_normalization(K.numpy())
		self.assertAlmostEqual(Kn_torch.max().item(), 1., places=6)
		self.assertAlmostEqual(Kn_torch.diag().min().item(), 1., places=6)
		self.assertEqual(Kn_torch.shape, (5,5))
		self.assertTrue(matNear(Kn_torch, Kn_numpy))
		self.assertEqual(type(Kn_torch), torch.Tensor)
		self.assertEqual(type(Kn_numpy), torch.Tensor)
		linear = pairwise_mk.linear_kernel(preprocessing.normalization(self.X))
		self.assertTrue(matNear(Kn_torch, linear, eps=1e-7)) 
Example #23
Source File: unit_tests.py    From MKLpy with GNU General Public License v3.0 5 votes vote down vote up
def test_numpy(self):
		Xtr = self.Xtr.numpy()
		self.assertTrue(matNear(
			pairwise_mk.polynomial_kernel(Xtr, degree=4, gamma=0.1, coef0=2),
			pairwise_sk.polynomial_kernel(Xtr, degree=4, gamma=0.1, coef0=2)))
		self.assertTrue(matNear(
			pairwise_mk.linear_kernel(Xtr),
			pairwise_sk.linear_kernel(Xtr))) 
Example #24
Source File: unit_tests.py    From MKLpy with GNU General Public License v3.0 5 votes vote down vote up
def test_HPK_train(self):
		Ktr = self.Xtr @ self.Xtr.T
		self.assertTrue(matNear(Ktr,pairwise_sk.linear_kernel(self.Xtr)))
		self.assertTrue(matNear(
			pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=4),
			pairwise_sk.polynomial_kernel(self.Xtr, degree=4, gamma=1, coef0=0)))
		self.assertTrue(matNear(
			pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=5),
			pairwise_sk.polynomial_kernel(self.Xtr, degree=5, gamma=1, coef0=0)))
		self.assertTrue(matNear(Ktr**3, pairwise_sk.polynomial_kernel(self.Xtr, degree=3, gamma=1, coef0=0)))
		self.assertTrue(matNear(
			pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, self.Xtr, degree=3),
			pairwise_sk.polynomial_kernel(self.Xtr, self.Xtr, degree=3, gamma=1, coef0=0))) 
Example #25
Source File: boolean.py    From MKLpy with GNU General Public License v3.0 5 votes vote down vote up
def tanimoto_kernel(X,Z=None):#?
    L = linear_kernel(X,Z)
    xx = np.linalg.norm(X,axis=1)
    tt = np.linalg.norm(T,axis=1)
    pass 
Example #26
Source File: boolean.py    From MKLpy with GNU General Public License v3.0 5 votes vote down vote up
def monotone_disjunctive_kernel(X,Z=None,d=2):
    L = linear_kernel(X,Z)
    n = X.shape[1]

    XX = np.dot(X.sum(axis=1).reshape(X.shape[0],1), np.ones((1,Z.shape[0])))
    TT = np.dot(Z.sum(axis=1).reshape(Z.shape[0],1), np.ones((1,X.shape[0])))
    N_x = n - XX
    N_t = n - TT
    N_xz = N_x - TT.T + L

    N_d = binom(n, d)
    N_x = binom(N_x,d)
    N_t = binom(N_t,d)
    N_xz = binom(N_xz,d)
    return (N_d - N_x - N_t.T + N_xz) 
Example #27
Source File: query_labels.py    From ALiPy with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def __init__(self, X, y, mu=0.1, gamma=0.1, rho=1, lambda_init=0.1, lambda_pace=0.01, **kwargs):
        try:
            import cvxpy
            self._cvxpy = cvxpy
        except:
            raise ImportError("This method need cvxpy to solve the QP problem."
                              "Please refer to https://www.cvxpy.org/install/index.html "
                              "install cvxpy manually before using.")

        # K: kernel matrix
        super(QueryInstanceSPAL, self).__init__(X, y)
        ul = unique_labels(self.y)
        if len(unique_labels(self.y)) != 2:
            warnings.warn("This query strategy is implemented for binary classification only.",
                          category=FunctionWarning)
        if len(ul) == 2 and {1, -1} != set(ul):
            y_temp = np.array(copy.deepcopy(self.y))
            y_temp[y_temp == ul[0]] = 1
            y_temp[y_temp == ul[1]] = -1
            self.y = y_temp

        self._mu = mu
        self._gamma = gamma
        self._rho = rho
        self._lambda_init = lambda_init
        self._lambda_pace = lambda_pace
        self._lambda = lambda_init

        # calc kernel
        self._kernel = kwargs.pop('kernel', 'rbf')
        if self._kernel == 'rbf':
            self._K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'poly':
            self._K = polynomial_kernel(X=X,
                                        Y=X,
                                        coef0=kwargs.pop('coef0', 1),
                                        degree=kwargs.pop('degree', 3),
                                        gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'linear':
            self._K = linear_kernel(X=X, Y=X)
        elif hasattr(self._kernel, '__call__'):
            self._K = self._kernel(X=np.array(X), Y=np.array(X))
        else:
            raise NotImplementedError

        if not isinstance(self._K, np.ndarray):
            raise TypeError('K should be an ndarray')
        if self._K.shape != (len(X), len(X)):
            raise ValueError(
                'kernel should have size (%d, %d)' % (len(X), len(X))) 
Example #28
Source File: query_labels.py    From ALiPy with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def __init__(self, X, y, beta=1000, gamma=0.1, rho=1, **kwargs):
        try:
            import cvxpy
            self._cvxpy = cvxpy
        except:
            raise ImportError("This method need cvxpy to solve the QP problem."
                              "Please refer to https://www.cvxpy.org/install/index.html "
                              "install cvxpy manually before using.")

        # K: kernel matrix
        super(QueryInstanceBMDR, self).__init__(X, y)
        ul = unique_labels(self.y)
        if len(ul) != 2:
            warnings.warn("This query strategy is implemented for binary classification only.",
                          category=FunctionWarning)
        if len(ul) == 2 and {1, -1} != set(ul):
            y_temp = np.array(copy.deepcopy(self.y))
            y_temp[y_temp == ul[0]] = 1
            y_temp[y_temp == ul[1]] = -1
            self.y = y_temp

        self._beta = beta
        self._gamma = gamma
        self._rho = rho

        # calc kernel
        self._kernel = kwargs.pop('kernel', 'rbf')
        if self._kernel == 'rbf':
            self._K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'poly':
            self._K = polynomial_kernel(X=X,
                                        Y=X,
                                        coef0=kwargs.pop('coef0', 1),
                                        degree=kwargs.pop('degree', 3),
                                        gamma=kwargs.pop('gamma_ker', 1.))
        elif self._kernel == 'linear':
            self._K = linear_kernel(X=X, Y=X)
        elif hasattr(self._kernel, '__call__'):
            self._K = self._kernel(X=np.array(X), Y=np.array(X))
        else:
            raise NotImplementedError

        if not isinstance(self._K, np.ndarray):
            raise TypeError('K should be an ndarray')
        if self._K.shape != (len(X), len(X)):
            raise ValueError(
                'kernel should have size (%d, %d)' % (len(X), len(X)))