Python sklearn.manifold.MDS Examples
The following are 30
code examples of sklearn.manifold.MDS().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.manifold
, or try the search function
.
Example #1
Source File: utils.py From deep-smoke-machine with BSD 3-Clause "New" or "Revised" License | 7 votes |
def learn_manifold(manifold_type, feats, n_components=2): if manifold_type == 'tsne': feats_fitted = manifold.TSNE(n_components=n_components, random_state=0).fit_transform(feats) elif manifold_type == 'isomap': feats_fitted = manifold.Isomap(n_components=n_components).fit_transform(feats) elif manifold_type == 'mds': feats_fitted = manifold.MDS(n_components=n_components).fit_transform(feats) elif manifold_type == 'spectral': feats_fitted = manifold.SpectralEmbedding(n_components=n_components).fit_transform(feats) else: raise Exception('wrong maniford type!') # methods = ['standard', 'ltsa', 'hessian', 'modified'] # feats_fitted = manifold.LocallyLinearEmbedding(n_components=n_components, method=methods[0]).fit_transform(pred) return feats_fitted
Example #2
Source File: utils.py From timeception with GNU General Public License v3.0 | 7 votes |
def learn_manifold(manifold_type, feats, n_components=2): if manifold_type == 'tsne': feats_fitted = manifold.TSNE(n_components=n_components, random_state=0).fit_transform(feats) elif manifold_type == 'isomap': feats_fitted = manifold.Isomap(n_components=n_components).fit_transform(feats) elif manifold_type == 'mds': feats_fitted = manifold.MDS(n_components=n_components).fit_transform(feats) elif manifold_type == 'spectral': feats_fitted = manifold.SpectralEmbedding(n_components=n_components).fit_transform(feats) else: raise Exception('wrong maniford type!') # methods = ['standard', 'ltsa', 'hessian', 'modified'] # feats_fitted = manifold.LocallyLinearEmbedding(n_components=n_components, method=methods[0]).fit_transform(pred) return feats_fitted
Example #3
Source File: embedding.py From DeepDIVA with GNU Lesser General Public License v3.0 | 6 votes |
def mds(features, n_components=2): """ Returns the embedded points for MDS. Parameters ---------- features: numpy.ndarray contains the input feature vectors. n_components: int number of components to transform the features into Returns ------- embedding: numpy.ndarray x,y(z) points that the feature vectors have been transformed into """ embedding = MDS(n_components=n_components, n_jobs=-1).fit_transform(features) return embedding
Example #4
Source File: compute_polar_coordinates.py From masif with Apache License 2.0 | 6 votes |
def compute_theta_all(D, vertices, faces, normals, idx, radius): mymds = MDS(n_components=2, n_init=1, max_iter=50, dissimilarity='precomputed', n_jobs=10) all_theta = [] for i in range(D.shape[0]): if i % 100 == 0: print(i) # Get the pairs of geodesic distances. neigh = D[i].nonzero() ii = np.where(D[i][neigh] < radius)[1] neigh_i = neigh[1][ii] pair_dist_i = D[neigh_i,:][:,neigh_i] pair_dist_i = pair_dist_i.todense() # Plane_i: the 2D plane for all neighbors of i plane_i = call_mds(mymds, pair_dist_i) # Compute the angles on the plane. theta = compute_thetas(plane_i, i, vertices, faces, normals, neigh_i, idx) all_theta.append(theta) return all_theta
Example #5
Source File: clustering.py From anvio with GNU General Public License v3.0 | 6 votes |
def get_scaled_vectors(vectors, user_seed=None, n_components=12, normalize=True, progress=progress): if user_seed: seed = np.random.RandomState(seed=user_seed) else: seed = np.random.RandomState() # FIXME: Make this optional: from sklearn.metrics.pairwise import euclidean_distances as d vectors = get_normalized_vectors(np.array(vectors)) if normalize else np.array(vectors) # compute similarities based on d progress.update('Computing similarity matrix') similarities = d(vectors) progress.update('Scaling using %d components' % n_components) mds = manifold.MDS(n_components=n_components, max_iter=300, eps=1e-10, random_state=seed, dissimilarity="precomputed", n_jobs=1) progress.update('Fitting') scaled_vectors = mds.fit(similarities).embedding_ return scaled_vectors
Example #6
Source File: lens.py From sakmapper with MIT License | 6 votes |
def apply_lens(df, lens='pca', dist='euclidean', n_dim=2, **kwargs): """ input: N x F dataframe of observations output: N x n_dim image of input data under lens function """ if n_dim != 2: raise 'error: image of data set must be two-dimensional' if dist not in ['euclidean', 'correlation']: raise 'error: only euclidean and correlation distance metrics are supported' if lens == 'pca' and dist != 'euclidean': raise 'error: PCA requires the use of euclidean distance metric' if lens == 'pca': df_lens = pd.DataFrame(decomposition.PCA(n_components=n_dim, **kwargs).fit_transform(df), df.index) elif lens == 'mds': D = metrics.pairwise.pairwise_distances(df, metric=dist) df_lens = pd.DataFrame(manifold.MDS(n_components=n_dim, **kwargs).fit_transform(D), df.index) elif lens == 'neighbor': D = metrics.pairwise.pairwise_distances(df, metric=dist) df_lens = pd.DataFrame(manifold.SpectralEmbedding(n_components=n_dim, **kwargs).fit_transform(D), df.index) else: raise 'error: only PCA, MDS, neighborhood lenses are supported' return df_lens
Example #7
Source File: mds.py From pyrsa with GNU Lesser General Public License v3.0 | 6 votes |
def mds(utv): rdm = scipy.spatial.distance.squareform(utv) seed = numpy.random.RandomState(seed=3) mds = MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed, dissimilarity="precomputed", n_jobs=1) pos = mds.fit_transform(rdm) # rescale #pos *= sqrt((X_true ** 2).sum()) / sqrt((pos ** 2).sum()) # Y = mds.fit_transform(RDM) # if itime == 0: # Y = mds.fit_transform(RDM) # else: # d, Y, _ = procrustes( # Y, mds.fit_transform(RDM), scaling=False) # Rotate the data # clf = PCA(n_components=2) # pos = clf.fit_transform(pos) return pos
Example #8
Source File: utils.py From videograph with GNU General Public License v3.0 | 6 votes |
def learn_manifold(manifold_type, feats, n_components=2): if manifold_type == 'tsne': feats_fitted = manifold.TSNE(n_components=n_components, random_state=0).fit_transform(feats) elif manifold_type == 'isomap': feats_fitted = manifold.Isomap(n_components=n_components).fit_transform(feats) elif manifold_type == 'mds': feats_fitted = manifold.MDS(n_components=n_components).fit_transform(feats) elif manifold_type == 'spectral': feats_fitted = manifold.SpectralEmbedding(n_components=n_components).fit_transform(feats) else: raise Exception('wrong maniford type!') # methods = ['standard', 'ltsa', 'hessian', 'modified'] # feats_fitted = manifold.LocallyLinearEmbedding(n_components=n_components, method=methods[0]).fit_transform(pred) return feats_fitted
Example #9
Source File: generate_qc_plots.py From panaroo with MIT License | 5 votes |
def generate_qc_plot(method, input_files, outdir, n_cpu, ref_db=None): # plot MDS if method in ["mds", "all"]: dist_mat, file_names = get_mash_dist(input_gffs=input_files, outdir=outdir, n_cpu=n_cpu, quiet=True) plot_MDS(dist_mat, file_names, outdir) # plot number of genes if method in ["ngenes", "all"]: plot_ngenes(input_gffs=input_files, outdir=outdir) # plot number of contigs if method in ["ncontigs", "all"]: plot_ncontigs(input_gffs=input_files, outdir=outdir) # plot contamination scatter plot if (method in ["contam", "all"]): if ref_db is None: print( "No reference mash database given! Skipping contamination plot..." ) print(("One can be downloaded from https://mash.readthedocs.io" + "/en/latest/tutorials.html#screening-a-read-set-for" + "-containment-of-refseq-genomes")) else: mash_contam_file = get_mash_contam(input_gffs=input_files, mash_ref=ref_db, n_cpu=n_cpu, outdir=outdir) plot_mash_contam(mash_contam_file=mash_contam_file, outdir=outdir) return
Example #10
Source File: RnaseqqcReport.py From CGATPipelines with MIT License | 5 votes |
def __call__(self, track, slice=None): # remove WHERE when table cleaned up to remove header rows statement = ( "SELECT transcript_id, TPM, sample_id FROM sailfish_transcripts") # fetch data df = pd.DataFrame.from_dict(self.getAll(statement)) df = df.pivot('transcript_id', 'sample_id')['TPM'] # calculate dissimilarities similarities = euclidean_distances(df.transpose()) # run MDS mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, dissimilarity="precomputed", n_jobs=1) mds = mds.fit(similarities) pos = pd.DataFrame(mds.embedding_) pos.columns = ["MD1", "MD2"] pos['sample'] = df.columns factors_df = self.getDataFrame( "SELECT * FROM factors WHERE factor != 'genome'") merged_df = pd.merge(pos, factors_df, left_on="sample", right_on="sample_id") return merged_df.reset_index().set_index("factor")
Example #11
Source File: MDS.py From mltk-algo-contrib with Apache License 2.0 | 5 votes |
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=['k', 'max_iter', 'n_init', 'n_jobs'], floats=['eps'], bools=['metric'], aliases={'k': 'n_components'} ) if 'max_iter' not in out_params: out_params.setdefault('max_iter', 300) if 'n_init' not in out_params: out_params.setdefault('n_init', 4) if 'n_jobs' not in out_params: out_params.setdefault('n_jobs', 1) if 'eps' not in out_params: out_params.setdefault('eps', 0.001) if 'metric' not in out_params: out_params.setdefault('metric', True) self.estimator = _MDS(**out_params)
Example #12
Source File: MDS_tensorflow.py From dimensionality_reduction_alo_codes with Apache License 2.0 | 5 votes |
def sklearn_mds(n_com=2): mds = MDS(n_components=n_com) data = load_digits().data target = load_digits().target data_2d = mds.fit_transform(data) plt.scatter(data_2d[:, 0], data_2d[:, 1], c = target) plt.show()
Example #13
Source File: views.py From texta with GNU General Public License v3.0 | 5 votes |
def index(request): template = loader.get_template('conceptualiser.html') lexicons = [] for lexicon in Lexicon.objects.all().filter(author=request.user): setattr(lexicon,'size',Word.objects.all().filter(lexicon=lexicon.id).count()) lexicons.append(lexicon) methods = ["PCA","TSNE","MDS"] datasets = Datasets().get_allowed_datasets(request.user) language_models =Task.objects.filter(task_type=TaskTypes.TRAIN_MODEL.value).filter(status__iexact=Task.STATUS_COMPLETED).order_by('-pk') return HttpResponse(template.render({'STATIC_URL':STATIC_URL,'lexicons':lexicons,'methods':methods, 'language_models': language_models, 'allowed_datasets': datasets},request))
Example #14
Source File: test_manifold.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.manifold.LocallyLinearEmbedding, manifold.LocallyLinearEmbedding) self.assertIs(df.manifold.Isomap, manifold.Isomap) self.assertIs(df.manifold.MDS, manifold.MDS) self.assertIs(df.manifold.SpectralEmbedding, manifold.SpectralEmbedding) self.assertIs(df.manifold.TSNE, manifold.TSNE)
Example #15
Source File: static_wordmesh.py From word-mesh with MIT License | 5 votes |
def recreate_wordmesh(self): """ Can be used to change the word placement in case the current one isn't suitable. Since the steps involved in the creation of the wordmesh are random, the result will come out looking different every time. """ #raise all the clustering flag, so as to run the MDS algorithm again self._flag_clustering_criteria = True self._generate_embeddings()
Example #16
Source File: static_wordmesh.py From word-mesh with MIT License | 5 votes |
def _generate_embeddings(self): if self._flag_clustering_criteria: mds = MDS(2, dissimilarity='precomputed').\ fit_transform(self.similarity_matrix) self._initial_embeds = mds if self._clustering_algorithm == 'TSNE': self._initial_embeds = TSNE(metric='precomputed', perplexity=3, init=mds).\ fit_transform(self.similarity_matrix) if self._flag_fontsizes or self._flag_fontcolors or self._flag_vis: self._visualizer = PlotlyVisualizer(words = self.keywords, fontsizes_norm =self.fontsizes_norm, height = self._resolution[0], width = self._resolution[1], textcolors=self.fontcolors, bg_color = self._bg_color) self.bounding_box_width_height = self._visualizer.bounding_box_dimensions if self._flag_fontsizes or self._flag_clustering_criteria: bbd = self.bounding_box_width_height fdm = ForceDirectedModel(self._initial_embeds, bbd, num_iters=NUM_ITERS, apply_delaunay=self._apply_delaunay, delaunay_multiplier=self._delaunay_factor) self._force_directed_model = fdm self.embeddings = fdm.equilibrium_position() #turn off all flags self._flag_clustering_criteria = False self._flag_fontsizes = False self._flag_fontcolors = False
Example #17
Source File: plot.py From DensityPeakCluster with MIT License | 5 votes |
def plot_cluster(cluster): ''' Plot scatter diagram for final points that using multi-dimensional scaling for data Args: cluster : DensityPeakCluster object ''' logger.info("PLOT: cluster result, start multi-dimensional scaling") dp = np.zeros((cluster.max_id, cluster.max_id), dtype = np.float32) cls = [] for i in xrange(1, cluster.max_id): for j in xrange(i + 1, cluster.max_id + 1): dp[i - 1, j - 1] = cluster.distances[(i, j)] dp[j - 1, i - 1] = cluster.distances[(i, j)] cls.append(cluster.cluster[i]) cls.append(cluster.cluster[cluster.max_id]) cls = np.array(cls, dtype = np.float32) fo = open(r'./tmp.txt', 'w') fo.write('\n'.join(map(str, cls))) fo.close() #seed = np.random.RandomState(seed=3) mds = manifold.MDS(max_iter=200, eps=1e-4, n_init=1,dissimilarity='precomputed') dp_mds = mds.fit_transform(dp.astype(np.float64)) logger.info("PLOT: end mds, start plot") plot_scatter_diagram(1, dp_mds[:, 0], dp_mds[:, 1], title='2D Nonclassical Multidimensional Scaling', style_list = cls) plt.savefig("2D Nonclassical Multidimensional Scaling.jpg")
Example #18
Source File: test_wren_holliday_sweep.py From VeRyPy with MIT License | 5 votes |
def fill_missing_pts_as_needed(points, D): #print(points, dd_points) if points is None: # We do not have point coodrinates, but we have D! from sklearn import manifold mds = manifold.MDS(n_components=2, dissimilarity='precomputed', random_state=42) mds_results = mds.fit(D) return list( mds_results.embedding_ ) return points
Example #19
Source File: cvrp_ops.py From VeRyPy with MIT License | 5 votes |
def generate_missing_coordinates(for_D): from sklearn import manifold mds = manifold.MDS(n_components=2, dissimilarity='precomputed', random_state=42) mds_results = mds.fit(for_D) points = list( mds_results.embedding_ ) edge_weight_type = "EUC_2D" if _is_all_integer_array(for_D) else "EXACT_2D" return points, edge_weight_type
Example #20
Source File: plot.py From DensityPeakCluster with MIT License | 5 votes |
def plot_cluster(cluster): ''' Plot scatter diagram for final points that using multi-dimensional scaling for data Args: cluster : DensityPeakCluster object ''' logger.info("PLOT: cluster result, start multi-dimensional scaling") dp = np.zeros((cluster.max_id, cluster.max_id), dtype=np.float32) cls = [] for i in xrange(1, cluster.max_id): for j in xrange(i + 1, cluster.max_id + 1): dp[i - 1, j - 1] = cluster.distances[(i, j)] dp[j - 1, i - 1] = cluster.distances[(i, j)] cls.append(cluster.cluster[i]) cls.append(cluster.cluster[cluster.max_id]) cls = np.array(cls, dtype=np.float32) fo = open(r'./tmp.txt', 'w') fo.write('\n'.join(map(str, cls))) fo.close() version = versiontuple(sklearn_version)[1] > 14 if version[0] > 0 or version[1] > 14: mds = manifold.MDS(max_iter=200, eps=1e-4, n_init=1, dissimilarity='precomputed') else: mds = manifold.MDS(max_iter=200, eps=1e-4, n_init=1) dp_mds = mds.fit_transform(dp) logger.info("PLOT: end mds, start plot") plot_scatter_diagram(1, dp_mds[:, 0], dp_mds[ :, 1], title='cluster', style_list=cls)
Example #21
Source File: analysis.py From smallrnaseq with GNU General Public License v3.0 | 5 votes |
def do_mds(X): """Do MDS""" from sklearn import manifold seed = np.random.RandomState(seed=3) mds = manifold.MDS(n_components=3, max_iter=3000, eps=1e-9, random_state=seed, n_jobs=1) pX = mds.fit(X.values).embedding_ pX = pd.DataFrame(pX,index=X.index) return pX
Example #22
Source File: demo_mds.py From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License | 5 votes |
def plot_demo_1(): X = np.c_[np.ones(5), 2 * np.ones(5), 10 * np.ones(5)].T y = np.array([0, 1, 2]) fig = pylab.figure(figsize=(10, 4)) ax = fig.add_subplot(121, projection='3d') ax.set_axis_bgcolor('white') mds = manifold.MDS(n_components=3) Xtrans = mds.fit_transform(X) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black') pylab.title("MDS on example data set in 3 dimensions") ax.view_init(10, -15) mds = manifold.MDS(n_components=2) Xtrans = mds.fit_transform(X) ax = fig.add_subplot(122) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black') pylab.title("MDS on example data set in 2 dimensions") filename = "mds_demo_1.png" pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
Example #23
Source File: generate_qc_plots.py From panaroo with MIT License | 4 votes |
def plot_MDS(dist_mat, file_names, outdir): # get MDS projection mds = manifold.MDS(n_components=2, dissimilarity="precomputed") projection = mds.fit(dist_mat) coords = projection.embedding_ #write MDS coordinates to disk with open(outdir + "mds_coords.txt", "w") as contig_out: contig_out.write("sample\tcoordx\tcoordy\n") for i, coord in zip(file_names, coords): contig_out.write("%s\t%s\t%s\n" % (i, coord[0], coord[1])) # find margins for plot c_min = np.min(coords) - abs(np.quantile(coords, 0.05)) c_max = np.max(coords) + abs(np.quantile(coords, 0.05)) # generate static plot plt.style.use('ggplot') fig = plt.figure() plt.scatter(coords[:, 0], coords[:, 1]) plt.grid(True) plt.xlabel("MDS Dimension 1") plt.ylabel("MDS Dimension 2") plt.xlim((c_min, c_max)) plt.ylim((c_min, c_max)) plt.tight_layout() fig.savefig(outdir + "MDS_mash_plot.png") # generate interactive plot trace = go.Scatter(x=coords[:, 0], y=coords[:, 1], text=file_names, mode='markers') layout = go.Layout(xaxis=dict(autorange=True, showgrid=True, zeroline=True, showline=False, ticks='', range=[c_min, c_max], type="linear", exponentformat="SI", showexponent='none', showticklabels=True), yaxis=dict(autorange=True, showgrid=True, zeroline=True, showline=False, ticks='', range=[c_min, c_max], type="linear", exponentformat="SI", showexponent='none', showticklabels=True)) data = [trace] fig = go.Figure(data=data, layout=layout) offline.plot(fig, filename=outdir + "MDS_mash_plot.html", auto_open=False) return
Example #24
Source File: compute_polar_coordinates.py From masif with Apache License 2.0 | 4 votes |
def compute_theta_all_fast(D, vertices, faces, normals, idx, radius): """ compute_theta_all_fast: compute the theta coordinate using an approximation. The approximation consists of taking only the inner radius/2 for the multidimensional scaling. Then, for points farther than radius/2, the shortest line to the center is used. This speeds up the method by a factor of about 100. """ mymds = MDS(n_components=2, n_init=1, eps=0.1, max_iter=50, dissimilarity='precomputed', n_jobs=1) all_theta = [] start_loop = time.clock() only_mds = 0.0 for i in range(D.shape[0]): # Get the pairs of geodesic distances. neigh = D[i].nonzero() # We will run MDS on only a subset of the points. ii = np.where(D[i][neigh] < radius/2)[1] neigh_i = neigh[1][ii] pair_dist_i = D[neigh_i,:][:,neigh_i] pair_dist_i = pair_dist_i.todense() # Plane_i: the 2D plane for all neighbors of i tic = time.clock() plane_i = call_mds(mymds, pair_dist_i) toc = time.clock() only_mds += (toc - tic) # Compute the angles on the plane. theta = compute_thetas(plane_i, i, vertices, faces, normals, neigh_i, idx) # We now must assign angles to all points kk that are between radius/2 and radius from the center. kk = np.where(D[i][neigh] >= radius/2)[1] neigh_k = neigh[1][kk] dist_kk = D[neigh_k,:][:,neigh_i] dist_kk = dist_kk.todense() dist_kk[dist_kk == 0] = float('inf') closest = np.argmin(dist_kk, axis=1) closest = np.squeeze(closest) closest = neigh_i[closest] theta[neigh_k] = theta[closest] all_theta.append(theta) end_loop = time.clock() print('Only MDS time: {:.2f}s'.format(only_mds)) print('Full loop time: {:.2f}s'.format(end_loop-start_loop)) return all_theta
Example #25
Source File: spectral_outlier.py From ad_examples with MIT License | 4 votes |
def fit_transform(self, x_in): n = nrow(x_in) x = normalize_and_center_by_feature_range(x_in) dists = np.zeros(shape=(n, n), dtype=float) for i in range(n): for j in range(i, n): dists[i, j] = euclidean_dist(x[i, :], x[j, :]) dists[j, i] = dists[i, j] logger.debug(dists[0, 0:10]) neighbors = np.zeros(shape=(n, self.n_neighbors), dtype=int) for i in range(n): neighbors[i, :] = np.argsort(dists[i, :])[0:self.n_neighbors] logger.debug(neighbors[0, 0:10]) W = np.zeros(shape=(n, n)) for i in range(n): for j in neighbors[i, :]: # diagonal elements of W will be zeros if i != j: W[i, j] = np.exp(-(dists[i, j] ** 2) / self.k2) W[j, i] = W[i, j] D = W.sum(axis=1) # logger.debug(str(list(D[0:10]))) iDroot = np.diag(np.sqrt(D) ** (-1)) S = iDroot.dot(W.dot(iDroot)) # logger.debug("S: %s" % str(list(S[0, 0:10]))) B = np.eye(n) - self.alpha * S # logger.debug("B: %s" % str(list(B[0, 0:10]))) A = np.linalg.inv(B) tdA = np.diag(np.sqrt(np.diag(A)) ** (-1)) A = tdA.dot(A.dot(tdA)) # logger.debug("A: %s" % str(list(A[0, 0:10]))) d = 1 - A # logger.debug("d: %s" % str(list(d[0, 0:10]))) # logger.debug("min(d): %f, max(d): %f" % (np.min(d), np.max(d))) mds = manifold.MDS(self.n_components, metric=self.metric, dissimilarity='precomputed') # using abs below because some zeros are represented as -0; other values are positive. embedding = mds.fit_transform(np.abs(d)) return embedding
Example #26
Source File: visualization.py From TensorFlow_DCIGN with MIT License | 4 votes |
def visualize_encodings(encodings, file_name=None, grid=None, skip_every=999, fast=False, fig=None, interactive=False): encodings = manual_pca(encodings) if encodings.shape[1] <= 3: return print_data_only(encodings, file_name, fig=fig, interactive=interactive) encodings = encodings[0:720] hessian_euc = dist.squareform(dist.pdist(encodings[0:720], 'euclidean')) hessian_cos = dist.squareform(dist.pdist(encodings[0:720], 'cosine')) grid = (3, 4) if grid is None else grid project_ops = [] n = 2 project_ops.append(("LLE ltsa N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='ltsa'))) project_ops.append(("LLE modified N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='modified'))) project_ops.append(('MDS euclidean N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed'))) project_ops.append(("TSNE 30/2000 N:%d" % n, TSNE(perplexity=30, n_components=n, init='pca', n_iter=2000))) n = 3 project_ops.append(("LLE ltsa N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='ltsa'))) project_ops.append(("LLE modified N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='modified'))) project_ops.append(('MDS euclidean N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed'))) project_ops.append(('MDS cosine N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed'))) plot_places = [] for i in range(12): u, v = int(i / (skip_every - 1)), i % (skip_every - 1) j = v + u * skip_every + 1 plot_places.append(j) fig = get_figure(fig) fig.set_size_inches(fig.get_size_inches()[0] * grid[0] / 1., fig.get_size_inches()[1] * grid[1] / 2.0) for i, (name, manifold) in enumerate(project_ops): is3d = 'N:3' in name try: if is3d: subplot = plt.subplot(grid[0], grid[1], plot_places[i], projection='3d') else: subplot = plt.subplot(grid[0], grid[1], plot_places[i]) data_source = encodings if not _needs_hessian(manifold) else \ (hessian_cos if 'cosine' in name else hessian_euc) projections = manifold.fit_transform(data_source) scatter(subplot, projections, is3d, _build_radial_colors(len(data_source))) subplot.set_title(name) except: print(name, "Unexpected error: ", sys.exc_info()[0], sys.exc_info()[1] if len(sys.exc_info()) > 1 else '') visualize_data_same(encodings, grid=grid, places=plot_places[-4:]) if not interactive: save_fig(file_name, fig) ut.print_time('visualization finished')
Example #27
Source File: server.py From Seq2Seq-Vis with Apache License 2.0 | 4 votes |
def get_close_words(**request): current_project = list(projects.values())[0] # type: S2SProject loc = request['loc'] # "src" or "tgt" limit = request['limit'] p_method = request["p_method"] t2i = current_project.dicts['t2i'][loc] i2t = current_project.dicts['i2t'][loc] if loc == 'src': embeddings = current_project.embeddings[ 'encoder'] # TODO: change !! else: embeddings = current_project.embeddings['decoder'] word = request['in'] my_vec = embeddings[t2i[word]] matrix = embeddings[:] matrix_norms = current_project.cached_norm(loc, matrix) dotted = matrix.dot(my_vec) vector_norm = np.sqrt(np.sum(my_vec * my_vec)) matrix_vector_norms = np.multiply(matrix_norms, vector_norm) neighbors = np.divide(dotted, matrix_vector_norms) neighbour_ids = np.argsort(neighbors)[-limit:].tolist() names = [i2t[x] for x in neighbour_ids] # projection methods: MDS, PCA, tSNE -- all with standard params positions = [] if p_method != "none": positions = P_METHODS[p_method].fit_transform( matrix[neighbour_ids, :]) return {'word': names, # 'word_vector': matrix[neighbour_ids, :].tolist(), 'score': neighbors[neighbour_ids].tolist(), 'pos': positions.tolist() }
Example #28
Source File: document_clustering.py From text-analytics-with-python with Apache License 2.0 | 4 votes |
def plot_clusters(num_clusters, feature_matrix, cluster_data, movie_data, plot_size=(16,8)): # generate random color for clusters def generate_random_color(): color = '#%06x' % random.randint(0, 0xFFFFFF) return color # define markers for clusters markers = ['o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd'] # build cosine distance matrix cosine_distance = 1 - cosine_similarity(feature_matrix) # dimensionality reduction using MDS mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1) # get coordinates of clusters in new low-dimensional space plot_positions = mds.fit_transform(cosine_distance) x_pos, y_pos = plot_positions[:, 0], plot_positions[:, 1] # build cluster plotting data cluster_color_map = {} cluster_name_map = {} for cluster_num, cluster_details in cluster_data.items(): # assign cluster features to unique label cluster_color_map[cluster_num] = generate_random_color() cluster_name_map[cluster_num] = ', '.join(cluster_details['key_features'][:5]).strip() # map each unique cluster label with its coordinates and movies cluster_plot_frame = pd.DataFrame({'x': x_pos, 'y': y_pos, 'label': movie_data['Cluster'].values.tolist(), 'title': movie_data['Title'].values.tolist() }) grouped_plot_frame = cluster_plot_frame.groupby('label') # set plot figure size and axes fig, ax = plt.subplots(figsize=plot_size) ax.margins(0.05) # plot each cluster using co-ordinates and movie titles for cluster_num, cluster_frame in grouped_plot_frame: marker = markers[cluster_num] if cluster_num < len(markers) \ else np.random.choice(markers, size=1)[0] ax.plot(cluster_frame['x'], cluster_frame['y'], marker=marker, linestyle='', ms=12, label=cluster_name_map[cluster_num], color=cluster_color_map[cluster_num], mec='none') ax.set_aspect('auto') ax.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off') ax.tick_params(axis= 'y', which='both', left='off', top='off', labelleft='off') fontP = FontProperties() fontP.set_size('small') ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.01), fancybox=True, shadow=True, ncol=5, numpoints=1, prop=fontP) #add labels as the film titles for index in range(len(cluster_plot_frame)): ax.text(cluster_plot_frame.ix[index]['x'], cluster_plot_frame.ix[index]['y'], cluster_plot_frame.ix[index]['title'], size=8) # show the plot plt.show()
Example #29
Source File: plot_gromov_barycenter.py From POT with MIT License | 4 votes |
def smacof_mds(C, dim, max_iter=3000, eps=1e-9): """ Returns an interpolated point cloud following the dissimilarity matrix C using SMACOF multidimensional scaling (MDS) in specific dimensionned target space Parameters ---------- C : ndarray, shape (ns, ns) dissimilarity matrix dim : int dimension of the targeted space max_iter : int Maximum number of iterations of the SMACOF algorithm for a single run eps : float relative tolerance w.r.t stress to declare converge Returns ------- npos : ndarray, shape (R, dim) Embedded coordinates of the interpolated point cloud (defined with one isometry) """ rng = np.random.RandomState(seed=3) mds = manifold.MDS( dim, max_iter=max_iter, eps=1e-9, dissimilarity='precomputed', n_init=1) pos = mds.fit(C).embedding_ nmds = manifold.MDS( 2, max_iter=max_iter, eps=1e-9, dissimilarity="precomputed", random_state=rng, n_init=1) npos = nmds.fit_transform(C, init=pos) return npos ############################################################################## # Data preparation # ---------------- # # The four distributions are constructed from 4 simple images
Example #30
Source File: demo_mds.py From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License | 4 votes |
def plot_iris_mds(): iris = datasets.load_iris() X = iris.data y = iris.target # MDS fig = pylab.figure(figsize=(10, 4)) ax = fig.add_subplot(121, projection='3d') ax.set_axis_bgcolor('white') mds = manifold.MDS(n_components=3) Xtrans = mds.fit_transform(X) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black') pylab.title("MDS on Iris data set in 3 dimensions") ax.view_init(10, -15) mds = manifold.MDS(n_components=2) Xtrans = mds.fit_transform(X) ax = fig.add_subplot(122) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black') pylab.title("MDS on Iris data set in 2 dimensions") filename = "mds_demo_iris.png" pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight") # PCA fig = pylab.figure(figsize=(10, 4)) ax = fig.add_subplot(121, projection='3d') ax.set_axis_bgcolor('white') pca = decomposition.PCA(n_components=3) Xtrans = pca.fit(X).transform(X) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black') pylab.title("PCA on Iris data set in 3 dimensions") ax.view_init(50, -35) pca = decomposition.PCA(n_components=2) Xtrans = pca.fit_transform(X) ax = fig.add_subplot(122) for cl, color, marker in zip(np.unique(y), colors, markers): ax.scatter( Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black') pylab.title("PCA on Iris data set in 2 dimensions") filename = "pca_demo_iris.png" pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")