Python sklearn.metrics.pairwise.linear_kernel() Examples
The following are 28
code examples of sklearn.metrics.pairwise.linear_kernel().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.metrics.pairwise
, or try the search function
.
Example #1
Source File: TfidfRetriever.py From SOQAL with MIT License | 6 votes |
def get_topk_docs_scores(self, query): """ :param query: question as string :return: the top k articles with each of their paragraphs seperated by '###' as python list of strings """ qeury = self.stem_string(query) query_tfidf = self.vectorizer.transform([query]) similarities_raw = linear_kernel(self.tfidf_matrix, query_tfidf) similarities = [] for s in similarities_raw: similarities.append(s[0]) indices_sorted = np.argsort(similarities)[::-1] # reverse order top_docs = [] docs_scores = [] i = 0 while i < min(self.k, len(self.docs)): doc = self.docs[indices_sorted[i]] top_docs.append(doc) docs_scores.append(similarities[indices_sorted[i]]) i += 1 norm_cst = np.sum(np.asarray(docs_scores)) docs_scores = np.asarray(docs_scores) docs_scores = docs_scores / norm_cst return top_docs, docs_scores
Example #2
Source File: word2vec.py From vec4ir with MIT License | 6 votes |
def query(self, query, k=None, indices=None, return_scores=False, sort=True): centroids = self.centroids if centroids is None: raise NotFittedError if indices is not None: centroids = centroids[indices] q = self.vect.transform([query]) q = normalize(q, copy=False) D = linear_kernel(q, centroids) # l2 normalized, so linear kernel # ind = np.argsort(D[0, :])[::-1] # similarity metric, so reverse # if k is not None: # we could use our argtopk in the first place # ind = ind[:k] # print(ind) ind = argtopk(D[0], k) if sort else np.arange(D.shape[1]) if return_scores: return ind, D[0, ind] else: return ind
Example #3
Source File: test_utils.py From numpy-ml with GNU General Public License v3.0 | 6 votes |
def test_linear_kernel(N=1): np.random.seed(12345) i = 0 while i < N: N = np.random.randint(1, 100) M = np.random.randint(1, 100) C = np.random.randint(1, 1000) X = np.random.rand(N, C) Y = np.random.rand(M, C) mine = LinearKernel()(X, Y) gold = sk_linear(X, Y) np.testing.assert_almost_equal(mine, gold) print("PASSED") i += 1
Example #4
Source File: rank_verbs.py From StrepHit with GNU General Public License v3.0 | 6 votes |
def get_similarity_scores(verb_token, vectorizer, tf_idf_matrix): """ Compute the cosine similarity score of a given verb token against the input corpus TF/IDF matrix. :param str verb_token: Surface form of a verb, e.g., *born* :param sklearn.feature_extraction.text.TfidfVectorizer vectorizer: Vectorizer used to transform verbs into vectors :return: cosine similarity score :rtype: ndarray """ verb_token_vector = vectorizer.transform([verb_token]) # Here the linear kernel is the same as the cosine similarity, but faster # cf. http://scikit-learn.org/stable/modules/metrics.html#cosine-similarity scores = linear_kernel(verb_token_vector, tf_idf_matrix) logger.debug("Corpus-wide TF/IDF scores for '%s': %s" % (verb_token, scores)) logger.debug("Average TF/IDF score for '%s': %f" % (verb_token, average(scores))) return scores
Example #5
Source File: TfidfRetriever.py From SOQAL with MIT License | 6 votes |
def get_topk_docs(self, query): """ :param query: question as string :return: the top k articles with each of their paragraphs seperated by '###' as python list of strings """ query_tfidf = self.vectorizer.transform([query]) similarities_raw = linear_kernel(self.tfidf_matrix, query_tfidf) similarities = [] for s in similarities_raw: similarities.append(s[0]) indices_sorted = np.argsort(similarities)[::-1] # reverse order top_docs = [] i = 0 while i < min(self.k, len(self.docs)): doc = self.docs[indices_sorted[i]] top_docs.append(doc) i += 1 return top_docs
Example #6
Source File: TfidfRetriever.py From SOQAL with MIT License | 6 votes |
def get_topk_docs(self, query): """ :param query: question as string :return: the top k articles with each of their paragraphs seperated by '###' as python list of strings """ qeury = self.stem_string(query) query_tfidf = self.vectorizer.transform([query]) similarities_raw = linear_kernel(self.tfidf_matrix, query_tfidf) similarities = [] for s in similarities_raw: similarities.append(s[0]) indices_sorted = np.argsort(similarities)[::-1] # reverse order top_docs = [] scores = [] i = 0 while i < min(self.k, len(self.docs)): doc = self.docs[indices_sorted[i]] top_docs.append(doc) i += 1 norm_cst = np.sum(np.asarray(scores)) return top_docs
Example #7
Source File: tfidf.py From moviebox with MIT License | 6 votes |
def getSimilarities(id, recommendations, plotsTFIDF, verbose): start = time.time() # Generate cosine similarities cosineSimilarities = linear_kernel(plotsTFIDF, plotsTFIDF) # Get similarity scores for the input movie scores = list(enumerate(cosineSimilarities[id])) # Sort into descending order the scores sortedScores = sorted(scores, key=lambda x: x[1], reverse=True) # Get the number of the recommendations asked movieRecommendations = sortedScores[1:recommendations + 1] # Get the indices of the recommendation movies movieIndices = [i[0] for i in movieRecommendations] if (verbose): printGreen( '✔ Found Similarities\t{0:.1f}s'.format(time.time() - start)) return movieIndices
Example #8
Source File: WGGraph.py From AbTextSumm with Mozilla Public License 2.0 | 6 votes |
def removeSimilarSentences(generatedSentences, originalSentences, stopwords,threshold=0.80,): docs=[] for sent, sim in generatedSentences: docs.append(sent) docs.extend(originalSentences) bow_matrix = StemmedTfidfVectorizer(stop_words=stopwords).fit_transform(docs) normalized = TfidfTransformer().fit_transform(bow_matrix) #simMatrix = (normalized[0:] * normalized[0:].T).A simindices=[] #print 'Num original, ', len(originalSentences) for i in xrange(len(generatedSentences)): simGeneratedScores = linear_kernel(normalized[i], normalized[len(generatedSentences):]).flatten() if(max(simGeneratedScores) >= threshold): simindices.append(i) #print simindices finalGen=[sentence for k,sentence in enumerate(generatedSentences) if k not in simindices] #print len(generatedSentences), len(finalGen) return finalGen
Example #9
Source File: komd.py From MKLpy with GNU General Public License v3.0 | 6 votes |
def __kernel_definition__(self): """Select the kernel function Returns ------- kernel : a callable relative to selected kernel """ if hasattr(self.kernel, '__call__'): return self.kernel if self.kernel == 'rbf' or self.kernel == None: return lambda X,Y : rbf_kernel(X,Y,self.rbf_gamma) if self.kernel == 'poly': return lambda X,Y : polynomial_kernel(X, Y, degree=self.degree, gamma=self.rbf_gamma, coef0=self.coef0) if self.kernel == 'linear': return lambda X,Y : linear_kernel(X,Y) if self.kernel == 'precomputed': return lambda X,Y : X
Example #10
Source File: kernel.py From intro_ds with Apache License 2.0 | 5 votes |
def trainModel(data): """ 在模型里使用不同的核函数 """ kernel = [linear_kernel, polynomial_kernel, rbf_kernel, laplacian_kernel] res = [] for i in kernel: model = SVC(kernel=i, coef0=1) model.fit(data[["x1", "x2"]], data["y"]) res.append({"name": i.__name__, "result": model}) return res
Example #11
Source File: query_labels.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, X, y, train_idx, **kwargs): # K: kernel matrix # X = np.asarray(X)[train_idx] y = np.asarray(y)[train_idx] self._train_idx = np.asarray(train_idx) self.y = np.array(y) self.lmbda = kwargs.pop('lambda', 1.) self.kernel = kwargs.pop('kernel', 'rbf') if self.kernel == 'rbf': self.K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma', 1.)) elif self.kernel == 'poly': self.K = polynomial_kernel(X=X, Y=X, coef0=kwargs.pop('coef0', 1), degree=kwargs.pop('degree', 3), gamma=kwargs.pop('gamma', 1.)) elif self.kernel == 'linear': self.K = linear_kernel(X=X, Y=X) elif hasattr(self.kernel, '__call__'): self.K = self.kernel(X=np.array(X), Y=np.array(X)) else: raise NotImplementedError if not isinstance(self.K, np.ndarray): raise TypeError('K should be an ndarray') if self.K.shape != (len(X), len(X)): raise ValueError( 'kernel should have size (%d, %d)' % (len(X), len(X))) self.L = np.linalg.inv(self.K + self.lmbda * np.eye(len(X)))
Example #12
Source File: test_pairwise.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_kernel_symmetry(): # Valid kernels should be symmetric rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) for kernel in (linear_kernel, polynomial_kernel, rbf_kernel, laplacian_kernel, sigmoid_kernel, cosine_similarity): K = kernel(X, X) assert_array_almost_equal(K, K.T, 15)
Example #13
Source File: test_pairwise.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_kernel_sparse(): rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) X_sparse = csr_matrix(X) for kernel in (linear_kernel, polynomial_kernel, rbf_kernel, laplacian_kernel, sigmoid_kernel, cosine_similarity): K = kernel(X, X) K2 = kernel(X_sparse, X_sparse) assert_array_almost_equal(K, K2)
Example #14
Source File: scorer.py From entity2vec with Apache License 2.0 | 5 votes |
def similarity_function(vec1,vec2, similarity): #compute cosine similarity or other similarities v1 = np.array(vec1) v2 = np.array(vec2) if len(v1)*len(v2) == 0: #any of the two is 0 global count count +=1 return 0 else: if similarity == 'cosine': return cosine_similarity([v1],[v2])[0][0] #returns a double array [[sim]] elif similarity == 'softmax': return np.exp(np.dot(v1,v2)) #normalization is useless for relative comparisons elif similarity == 'linear_kernel': return linear_kernel(v1,v2)[0][0] elif similarity == 'euclidean': return euclidean_distances(v1,v2)[0][0] else: raise NameError('Choose a valid similarity function')
Example #15
Source File: doc2vec.py From vec4ir with MIT License | 5 votes |
def query(self, query, k=None, indices=None): if indices is not None: dvs = self.inferred_docvecs[indices] else: dvs = self.inferred_docvecs analyzed_query = self.analyzer(query) qv = self.model.infer_vector(analyzed_query).reshape(1, -1) qv = normalize(qv, copy=False) dists = linear_kernel(qv, dvs)[0] ind = argtopk(dists) return ind
Example #16
Source File: base.py From vec4ir with MIT License | 5 votes |
def query(self, query, k=None, indices=None, return_scores=False, sort=True): if self._fit_X is None: raise NotFittedError q = super().transform([query]) if indices is not None: fit_X = self._fit_X[indices] else: fit_X = self._fit_X # both fit_X and q are l2-normalized D = linear_kernel(q, fit_X) ind = argtopk(D[0], k) if sort else np.arange(D.shape[1]) if return_scores: return ind, D[0,ind] else: return ind
Example #17
Source File: test_pairwise.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_linear_kernel(): rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) K = linear_kernel(X, X) # the diagonal elements of a linear kernel are their squared norm assert_array_almost_equal(K.flat[::6], [linalg.norm(x) ** 2 for x in X])
Example #18
Source File: boolean.py From MKLpy with GNU General Public License v3.0 | 5 votes |
def monotone_conjunctive_kernel(X,Z=None,c=2): L = linear_kernel(X,Z) return binom(L,c)
Example #19
Source File: multi_label.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, X, y, **kwargs): # K: kernel matrix super(QueryMultiLabelQUIRE, self).__init__(X, y) self.lmbda = kwargs.pop('lambda', 1.) self.kernel = kwargs.pop('kernel', 'rbf') if self.kernel == 'rbf': self.K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma', 1.)) elif self.kernel == 'poly': self.K = polynomial_kernel(X=X, Y=X, coef0=kwargs.pop('coef0', 1), degree=kwargs.pop('degree', 3), gamma=kwargs.pop('gamma', 1.)) elif self.kernel == 'linear': self.K = linear_kernel(X=X, Y=X) elif hasattr(self.kernel, '__call__'): self.K = self.kernel(X=np.array(X), Y=np.array(X)) else: raise NotImplementedError if not isinstance(self.K, np.ndarray): raise TypeError('K should be an ndarray') if self.K.shape != (len(X), len(X)): raise ValueError( 'Kernel should have size (%d, %d)' % (len(X), len(X))) self._nsamples, self._nclass = self.y.shape self.L = np.linalg.pinv(self.K + self.lmbda * np.eye(len(X)))
Example #20
Source File: test_pairwise.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_linear_kernel(): rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) K = linear_kernel(X, X) # the diagonal elements of a linear kernel are their squared norm assert_array_almost_equal(K.flat[::6], [linalg.norm(x) ** 2 for x in X])
Example #21
Source File: unit_tests.py From MKLpy with GNU General Public License v3.0 | 5 votes |
def test_lambda(self): funcs = [pairwise_mk.linear_kernel, lambda X,Z : (X @ Z.T)**2] KLtr = [pairwise_mk.linear_kernel(self.Xtr), pairwise_mk.homogeneous_polynomial_kernel(self.Xtr)] KLte = [pairwise_mk.linear_kernel(self.Xte, self.Xtr), pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr)] KLtr_g = Lambda_generator(self.Xtr, kernels=funcs) KLte_g = Lambda_generator(self.Xte, self.Xtr, kernels=funcs) self.assertTrue(matNear(average(KLtr), average(KLtr_g))) self.assertTrue(matNear(average(KLte), average(KLte_g))) self.assertTrue(matNear(KLtr[1], KLtr_g[1])) self.assertTrue(matNear(KLte[0], KLte_g[0]))
Example #22
Source File: unit_tests.py From MKLpy with GNU General Public License v3.0 | 5 votes |
def test_kernel_normalization(self): K = self.X @ self.X.T Kn_torch = preprocessing.kernel_normalization(K) Kn_numpy = preprocessing.kernel_normalization(K.numpy()) self.assertAlmostEqual(Kn_torch.max().item(), 1., places=6) self.assertAlmostEqual(Kn_torch.diag().min().item(), 1., places=6) self.assertEqual(Kn_torch.shape, (5,5)) self.assertTrue(matNear(Kn_torch, Kn_numpy)) self.assertEqual(type(Kn_torch), torch.Tensor) self.assertEqual(type(Kn_numpy), torch.Tensor) linear = pairwise_mk.linear_kernel(preprocessing.normalization(self.X)) self.assertTrue(matNear(Kn_torch, linear, eps=1e-7))
Example #23
Source File: unit_tests.py From MKLpy with GNU General Public License v3.0 | 5 votes |
def test_numpy(self): Xtr = self.Xtr.numpy() self.assertTrue(matNear( pairwise_mk.polynomial_kernel(Xtr, degree=4, gamma=0.1, coef0=2), pairwise_sk.polynomial_kernel(Xtr, degree=4, gamma=0.1, coef0=2))) self.assertTrue(matNear( pairwise_mk.linear_kernel(Xtr), pairwise_sk.linear_kernel(Xtr)))
Example #24
Source File: unit_tests.py From MKLpy with GNU General Public License v3.0 | 5 votes |
def test_HPK_train(self): Ktr = self.Xtr @ self.Xtr.T self.assertTrue(matNear(Ktr,pairwise_sk.linear_kernel(self.Xtr))) self.assertTrue(matNear( pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=4), pairwise_sk.polynomial_kernel(self.Xtr, degree=4, gamma=1, coef0=0))) self.assertTrue(matNear( pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=5), pairwise_sk.polynomial_kernel(self.Xtr, degree=5, gamma=1, coef0=0))) self.assertTrue(matNear(Ktr**3, pairwise_sk.polynomial_kernel(self.Xtr, degree=3, gamma=1, coef0=0))) self.assertTrue(matNear( pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, self.Xtr, degree=3), pairwise_sk.polynomial_kernel(self.Xtr, self.Xtr, degree=3, gamma=1, coef0=0)))
Example #25
Source File: boolean.py From MKLpy with GNU General Public License v3.0 | 5 votes |
def tanimoto_kernel(X,Z=None):#? L = linear_kernel(X,Z) xx = np.linalg.norm(X,axis=1) tt = np.linalg.norm(T,axis=1) pass
Example #26
Source File: boolean.py From MKLpy with GNU General Public License v3.0 | 5 votes |
def monotone_disjunctive_kernel(X,Z=None,d=2): L = linear_kernel(X,Z) n = X.shape[1] XX = np.dot(X.sum(axis=1).reshape(X.shape[0],1), np.ones((1,Z.shape[0]))) TT = np.dot(Z.sum(axis=1).reshape(Z.shape[0],1), np.ones((1,X.shape[0]))) N_x = n - XX N_t = n - TT N_xz = N_x - TT.T + L N_d = binom(n, d) N_x = binom(N_x,d) N_t = binom(N_t,d) N_xz = binom(N_xz,d) return (N_d - N_x - N_t.T + N_xz)
Example #27
Source File: query_labels.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 4 votes |
def __init__(self, X, y, mu=0.1, gamma=0.1, rho=1, lambda_init=0.1, lambda_pace=0.01, **kwargs): try: import cvxpy self._cvxpy = cvxpy except: raise ImportError("This method need cvxpy to solve the QP problem." "Please refer to https://www.cvxpy.org/install/index.html " "install cvxpy manually before using.") # K: kernel matrix super(QueryInstanceSPAL, self).__init__(X, y) ul = unique_labels(self.y) if len(unique_labels(self.y)) != 2: warnings.warn("This query strategy is implemented for binary classification only.", category=FunctionWarning) if len(ul) == 2 and {1, -1} != set(ul): y_temp = np.array(copy.deepcopy(self.y)) y_temp[y_temp == ul[0]] = 1 y_temp[y_temp == ul[1]] = -1 self.y = y_temp self._mu = mu self._gamma = gamma self._rho = rho self._lambda_init = lambda_init self._lambda_pace = lambda_pace self._lambda = lambda_init # calc kernel self._kernel = kwargs.pop('kernel', 'rbf') if self._kernel == 'rbf': self._K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma_ker', 1.)) elif self._kernel == 'poly': self._K = polynomial_kernel(X=X, Y=X, coef0=kwargs.pop('coef0', 1), degree=kwargs.pop('degree', 3), gamma=kwargs.pop('gamma_ker', 1.)) elif self._kernel == 'linear': self._K = linear_kernel(X=X, Y=X) elif hasattr(self._kernel, '__call__'): self._K = self._kernel(X=np.array(X), Y=np.array(X)) else: raise NotImplementedError if not isinstance(self._K, np.ndarray): raise TypeError('K should be an ndarray') if self._K.shape != (len(X), len(X)): raise ValueError( 'kernel should have size (%d, %d)' % (len(X), len(X)))
Example #28
Source File: query_labels.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 4 votes |
def __init__(self, X, y, beta=1000, gamma=0.1, rho=1, **kwargs): try: import cvxpy self._cvxpy = cvxpy except: raise ImportError("This method need cvxpy to solve the QP problem." "Please refer to https://www.cvxpy.org/install/index.html " "install cvxpy manually before using.") # K: kernel matrix super(QueryInstanceBMDR, self).__init__(X, y) ul = unique_labels(self.y) if len(ul) != 2: warnings.warn("This query strategy is implemented for binary classification only.", category=FunctionWarning) if len(ul) == 2 and {1, -1} != set(ul): y_temp = np.array(copy.deepcopy(self.y)) y_temp[y_temp == ul[0]] = 1 y_temp[y_temp == ul[1]] = -1 self.y = y_temp self._beta = beta self._gamma = gamma self._rho = rho # calc kernel self._kernel = kwargs.pop('kernel', 'rbf') if self._kernel == 'rbf': self._K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma_ker', 1.)) elif self._kernel == 'poly': self._K = polynomial_kernel(X=X, Y=X, coef0=kwargs.pop('coef0', 1), degree=kwargs.pop('degree', 3), gamma=kwargs.pop('gamma_ker', 1.)) elif self._kernel == 'linear': self._K = linear_kernel(X=X, Y=X) elif hasattr(self._kernel, '__call__'): self._K = self._kernel(X=np.array(X), Y=np.array(X)) else: raise NotImplementedError if not isinstance(self._K, np.ndarray): raise TypeError('K should be an ndarray') if self._K.shape != (len(X), len(X)): raise ValueError( 'kernel should have size (%d, %d)' % (len(X), len(X)))