Python sklearn.manifold.MDS Examples

The following are 30 code examples of sklearn.manifold.MDS(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.manifold , or try the search function .
Example #1
Source File: utils.py    From deep-smoke-machine with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def learn_manifold(manifold_type, feats, n_components=2):
    if manifold_type == 'tsne':
        feats_fitted = manifold.TSNE(n_components=n_components, random_state=0).fit_transform(feats)
    elif manifold_type == 'isomap':
        feats_fitted = manifold.Isomap(n_components=n_components).fit_transform(feats)
    elif manifold_type == 'mds':
        feats_fitted = manifold.MDS(n_components=n_components).fit_transform(feats)
    elif manifold_type == 'spectral':
        feats_fitted = manifold.SpectralEmbedding(n_components=n_components).fit_transform(feats)
    else:
        raise Exception('wrong maniford type!')

    # methods = ['standard', 'ltsa', 'hessian', 'modified']
    # feats_fitted = manifold.LocallyLinearEmbedding(n_components=n_components, method=methods[0]).fit_transform(pred)

    return feats_fitted 
Example #2
Source File: utils.py    From timeception with GNU General Public License v3.0 7 votes vote down vote up
def learn_manifold(manifold_type, feats, n_components=2):
    if manifold_type == 'tsne':
        feats_fitted = manifold.TSNE(n_components=n_components, random_state=0).fit_transform(feats)
    elif manifold_type == 'isomap':
        feats_fitted = manifold.Isomap(n_components=n_components).fit_transform(feats)
    elif manifold_type == 'mds':
        feats_fitted = manifold.MDS(n_components=n_components).fit_transform(feats)
    elif manifold_type == 'spectral':
        feats_fitted = manifold.SpectralEmbedding(n_components=n_components).fit_transform(feats)
    else:
        raise Exception('wrong maniford type!')

    # methods = ['standard', 'ltsa', 'hessian', 'modified']
    # feats_fitted = manifold.LocallyLinearEmbedding(n_components=n_components, method=methods[0]).fit_transform(pred)

    return feats_fitted 
Example #3
Source File: embedding.py    From DeepDIVA with GNU Lesser General Public License v3.0 6 votes vote down vote up
def mds(features, n_components=2):
    """
    Returns the embedded points for MDS.
    Parameters
    ----------
    features: numpy.ndarray
        contains the input feature vectors.
    n_components: int
        number of components to transform the features into

    Returns
    -------
    embedding: numpy.ndarray
        x,y(z) points that the feature vectors have been transformed into
    """
    embedding = MDS(n_components=n_components, n_jobs=-1).fit_transform(features)
    return embedding 
Example #4
Source File: compute_polar_coordinates.py    From masif with Apache License 2.0 6 votes vote down vote up
def compute_theta_all(D, vertices, faces, normals, idx, radius):
    mymds = MDS(n_components=2, n_init=1, max_iter=50, dissimilarity='precomputed', n_jobs=10)
    all_theta = []
    for i in range(D.shape[0]):
        if i % 100 == 0:
            print(i)
        # Get the pairs of geodesic distances.
        neigh = D[i].nonzero()
        ii = np.where(D[i][neigh] < radius)[1]
        neigh_i = neigh[1][ii]
        pair_dist_i = D[neigh_i,:][:,neigh_i]
        pair_dist_i = pair_dist_i.todense()

        # Plane_i: the 2D plane for all neighbors of i
        plane_i = call_mds(mymds, pair_dist_i)
    
        # Compute the angles on the plane.
        theta = compute_thetas(plane_i, i, vertices, faces, normals, neigh_i, idx)
        all_theta.append(theta)
    return all_theta 
Example #5
Source File: clustering.py    From anvio with GNU General Public License v3.0 6 votes vote down vote up
def get_scaled_vectors(vectors, user_seed=None, n_components=12, normalize=True, progress=progress):
    if user_seed:
        seed = np.random.RandomState(seed=user_seed)
    else:
        seed = np.random.RandomState()

    # FIXME: Make this optional:
    from sklearn.metrics.pairwise import euclidean_distances as d

    vectors = get_normalized_vectors(np.array(vectors)) if normalize else np.array(vectors)

    # compute similarities based on d
    progress.update('Computing similarity matrix')
    similarities = d(vectors)

    progress.update('Scaling using %d components' % n_components)
    mds = manifold.MDS(n_components=n_components, max_iter=300, eps=1e-10, random_state=seed,
                       dissimilarity="precomputed", n_jobs=1)

    progress.update('Fitting')
    scaled_vectors = mds.fit(similarities).embedding_

    return scaled_vectors 
Example #6
Source File: lens.py    From sakmapper with MIT License 6 votes vote down vote up
def apply_lens(df, lens='pca', dist='euclidean', n_dim=2, **kwargs):
    """
    input: N x F dataframe of observations
    output: N x n_dim image of input data under lens function
    """
    if n_dim != 2:
        raise 'error: image of data set must be two-dimensional'
    if dist not in ['euclidean', 'correlation']:
        raise 'error: only euclidean and correlation distance metrics are supported'
    if lens == 'pca' and dist != 'euclidean':
        raise 'error: PCA requires the use of euclidean distance metric'

    if lens == 'pca':
        df_lens = pd.DataFrame(decomposition.PCA(n_components=n_dim, **kwargs).fit_transform(df), df.index)
    elif lens == 'mds':
        D = metrics.pairwise.pairwise_distances(df, metric=dist)
        df_lens = pd.DataFrame(manifold.MDS(n_components=n_dim, **kwargs).fit_transform(D), df.index)
    elif lens == 'neighbor':
        D = metrics.pairwise.pairwise_distances(df, metric=dist)
        df_lens = pd.DataFrame(manifold.SpectralEmbedding(n_components=n_dim, **kwargs).fit_transform(D), df.index)
    else:
        raise 'error: only PCA, MDS, neighborhood lenses are supported'
    
    return df_lens 
Example #7
Source File: mds.py    From pyrsa with GNU Lesser General Public License v3.0 6 votes vote down vote up
def mds(utv):
    rdm = scipy.spatial.distance.squareform(utv)
    seed = numpy.random.RandomState(seed=3)
    mds = MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
                   dissimilarity="precomputed", n_jobs=1)
    pos = mds.fit_transform(rdm)

    # rescale
    #pos *= sqrt((X_true ** 2).sum()) / sqrt((pos ** 2).sum())


   # Y = mds.fit_transform(RDM)
#    if itime == 0:
#        Y = mds.fit_transform(RDM)
#    else:
#        d, Y, _ = procrustes(
#            Y, mds.fit_transform(RDM), scaling=False)

    # Rotate the data
    # clf = PCA(n_components=2)
    # pos = clf.fit_transform(pos)
    return pos 
Example #8
Source File: utils.py    From videograph with GNU General Public License v3.0 6 votes vote down vote up
def learn_manifold(manifold_type, feats, n_components=2):
    if manifold_type == 'tsne':
        feats_fitted = manifold.TSNE(n_components=n_components, random_state=0).fit_transform(feats)
    elif manifold_type == 'isomap':
        feats_fitted = manifold.Isomap(n_components=n_components).fit_transform(feats)
    elif manifold_type == 'mds':
        feats_fitted = manifold.MDS(n_components=n_components).fit_transform(feats)
    elif manifold_type == 'spectral':
        feats_fitted = manifold.SpectralEmbedding(n_components=n_components).fit_transform(feats)
    else:
        raise Exception('wrong maniford type!')

    # methods = ['standard', 'ltsa', 'hessian', 'modified']
    # feats_fitted = manifold.LocallyLinearEmbedding(n_components=n_components, method=methods[0]).fit_transform(pred)

    return feats_fitted 
Example #9
Source File: generate_qc_plots.py    From panaroo with MIT License 5 votes vote down vote up
def generate_qc_plot(method, input_files, outdir, n_cpu, ref_db=None):

    # plot MDS
    if method in ["mds", "all"]:
        dist_mat, file_names = get_mash_dist(input_gffs=input_files,
                                             outdir=outdir,
                                             n_cpu=n_cpu,
                                             quiet=True)
        plot_MDS(dist_mat, file_names, outdir)

    # plot number of genes
    if method in ["ngenes", "all"]:
        plot_ngenes(input_gffs=input_files, outdir=outdir)

    # plot number of contigs
    if method in ["ncontigs", "all"]:
        plot_ncontigs(input_gffs=input_files, outdir=outdir)

    # plot contamination scatter plot
    if (method in ["contam", "all"]):
        if ref_db is None:
            print(
                "No reference mash database given! Skipping contamination plot..."
            )
            print(("One can be downloaded from https://mash.readthedocs.io" +
                   "/en/latest/tutorials.html#screening-a-read-set-for" +
                   "-containment-of-refseq-genomes"))
        else:
            mash_contam_file = get_mash_contam(input_gffs=input_files,
                                               mash_ref=ref_db,
                                               n_cpu=n_cpu,
                                               outdir=outdir)
            plot_mash_contam(mash_contam_file=mash_contam_file, outdir=outdir)

    return 
Example #10
Source File: RnaseqqcReport.py    From CGATPipelines with MIT License 5 votes vote down vote up
def __call__(self, track,  slice=None):

        # remove WHERE when table cleaned up to remove header rows
        statement = (
            "SELECT transcript_id, TPM, sample_id FROM sailfish_transcripts")

        # fetch data
        df = pd.DataFrame.from_dict(self.getAll(statement))

        df = df.pivot('transcript_id', 'sample_id')['TPM']

        # calculate dissimilarities
        similarities = euclidean_distances(df.transpose())

        # run MDS
        mds = manifold.MDS(n_components=2, max_iter=3000,
                           eps=1e-9, dissimilarity="precomputed", n_jobs=1)
        mds = mds.fit(similarities)
        pos = pd.DataFrame(mds.embedding_)

        pos.columns = ["MD1", "MD2"]
        pos['sample'] = df.columns

        factors_df = self.getDataFrame(
            "SELECT * FROM factors WHERE factor != 'genome'")

        merged_df = pd.merge(pos, factors_df,
                             left_on="sample", right_on="sample_id")
        return merged_df.reset_index().set_index("factor") 
Example #11
Source File: MDS.py    From mltk-algo-contrib with Apache License 2.0 5 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)
        out_params = convert_params(
            options.get('params', {}),
            ints=['k', 'max_iter', 'n_init', 'n_jobs'],
            floats=['eps'],
            bools=['metric'],
            aliases={'k': 'n_components'}
        )

        if 'max_iter' not in out_params:
            out_params.setdefault('max_iter', 300)

        if 'n_init' not in out_params:
            out_params.setdefault('n_init', 4)

        if 'n_jobs' not in out_params:
            out_params.setdefault('n_jobs', 1)

        if 'eps' not in out_params:
            out_params.setdefault('eps', 0.001)

        if 'metric' not in out_params:
            out_params.setdefault('metric', True)

        self.estimator = _MDS(**out_params) 
Example #12
Source File: MDS_tensorflow.py    From dimensionality_reduction_alo_codes with Apache License 2.0 5 votes vote down vote up
def sklearn_mds(n_com=2):
    mds = MDS(n_components=n_com)
    data = load_digits().data
    target = load_digits().target
    data_2d = mds.fit_transform(data)
    plt.scatter(data_2d[:, 0], data_2d[:, 1], c = target)
    plt.show() 
Example #13
Source File: views.py    From texta with GNU General Public License v3.0 5 votes vote down vote up
def index(request):
    template = loader.get_template('conceptualiser.html')

    lexicons = []
    for lexicon in Lexicon.objects.all().filter(author=request.user):
        setattr(lexicon,'size',Word.objects.all().filter(lexicon=lexicon.id).count())
        lexicons.append(lexicon)

    methods = ["PCA","TSNE","MDS"]

    datasets = Datasets().get_allowed_datasets(request.user)
    language_models =Task.objects.filter(task_type=TaskTypes.TRAIN_MODEL.value).filter(status__iexact=Task.STATUS_COMPLETED).order_by('-pk')
    
    return HttpResponse(template.render({'STATIC_URL':STATIC_URL,'lexicons':lexicons,'methods':methods, 'language_models': language_models, 'allowed_datasets': datasets},request)) 
Example #14
Source File: test_manifold.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.manifold.LocallyLinearEmbedding,
                      manifold.LocallyLinearEmbedding)
        self.assertIs(df.manifold.Isomap, manifold.Isomap)
        self.assertIs(df.manifold.MDS, manifold.MDS)
        self.assertIs(df.manifold.SpectralEmbedding, manifold.SpectralEmbedding)
        self.assertIs(df.manifold.TSNE, manifold.TSNE) 
Example #15
Source File: static_wordmesh.py    From word-mesh with MIT License 5 votes vote down vote up
def recreate_wordmesh(self):
        """
        Can be used to change the word placement in case the current
        one isn't suitable. Since the steps involved in the creation of the
        wordmesh are random, the result will come out looking different every 
        time.
        """
        
        #raise all the clustering flag, so as to run the MDS algorithm again
        self._flag_clustering_criteria = True
        self._generate_embeddings() 
Example #16
Source File: static_wordmesh.py    From word-mesh with MIT License 5 votes vote down vote up
def _generate_embeddings(self):
        
        if self._flag_clustering_criteria:
            
            mds = MDS(2, dissimilarity='precomputed').\
                                 fit_transform(self.similarity_matrix)
            self._initial_embeds = mds
            
            if self._clustering_algorithm == 'TSNE':
                self._initial_embeds = TSNE(metric='precomputed', 
                                            perplexity=3, init=mds).\
                                            fit_transform(self.similarity_matrix)
            
        if self._flag_fontsizes or self._flag_fontcolors or self._flag_vis:
            self._visualizer = PlotlyVisualizer(words = self.keywords,
                                                fontsizes_norm =self.fontsizes_norm, 
                                                height = self._resolution[0],
                                                width = self._resolution[1], 
                                                textcolors=self.fontcolors,
                                                bg_color = self._bg_color)
            self.bounding_box_width_height = self._visualizer.bounding_box_dimensions
        
        if self._flag_fontsizes or self._flag_clustering_criteria:
            bbd = self.bounding_box_width_height
            fdm = ForceDirectedModel(self._initial_embeds, bbd, num_iters=NUM_ITERS,
                                     apply_delaunay=self._apply_delaunay,
                                     delaunay_multiplier=self._delaunay_factor)
            self._force_directed_model = fdm
            self.embeddings = fdm.equilibrium_position()
            
        #turn off all flags
        self._flag_clustering_criteria = False
        self._flag_fontsizes = False
        self._flag_fontcolors = False 
Example #17
Source File: plot.py    From DensityPeakCluster with MIT License 5 votes vote down vote up
def plot_cluster(cluster):
	'''
	Plot scatter diagram for final points that using multi-dimensional scaling for data

	Args:
		cluster : DensityPeakCluster object
	'''
	logger.info("PLOT: cluster result, start multi-dimensional scaling")
	dp = np.zeros((cluster.max_id, cluster.max_id), dtype = np.float32)
	cls = []
	for i in xrange(1, cluster.max_id):
		for j in xrange(i + 1, cluster.max_id + 1):
			dp[i - 1, j - 1] = cluster.distances[(i, j)]
			dp[j - 1, i - 1] = cluster.distances[(i, j)]
		cls.append(cluster.cluster[i])
	cls.append(cluster.cluster[cluster.max_id])
	cls = np.array(cls, dtype = np.float32)
	fo = open(r'./tmp.txt', 'w')
	fo.write('\n'.join(map(str, cls)))
	fo.close()
	#seed = np.random.RandomState(seed=3)
	mds = manifold.MDS(max_iter=200, eps=1e-4, n_init=1,dissimilarity='precomputed')
	dp_mds = mds.fit_transform(dp.astype(np.float64))
	logger.info("PLOT: end mds, start plot")
	plot_scatter_diagram(1, dp_mds[:, 0], dp_mds[:, 1], title='2D Nonclassical Multidimensional Scaling', style_list = cls)
	plt.savefig("2D Nonclassical Multidimensional Scaling.jpg") 
Example #18
Source File: test_wren_holliday_sweep.py    From VeRyPy with MIT License 5 votes vote down vote up
def fill_missing_pts_as_needed(points, D):
    #print(points, dd_points)
    if points is None:
        # We do not have point coodrinates, but we have D!
        from sklearn import manifold
        mds = manifold.MDS(n_components=2, dissimilarity='precomputed',
                           random_state=42)
        mds_results = mds.fit(D)
        return list( mds_results.embedding_ )
    return points 
Example #19
Source File: cvrp_ops.py    From VeRyPy with MIT License 5 votes vote down vote up
def generate_missing_coordinates(for_D):
    from sklearn import manifold
    mds = manifold.MDS(n_components=2, dissimilarity='precomputed',
                       random_state=42)
    mds_results = mds.fit(for_D)
    points = list( mds_results.embedding_ )
    edge_weight_type = "EUC_2D" if _is_all_integer_array(for_D) else "EXACT_2D"
    return points, edge_weight_type 
Example #20
Source File: plot.py    From DensityPeakCluster with MIT License 5 votes vote down vote up
def plot_cluster(cluster):
    '''
    Plot scatter diagram for final points that using multi-dimensional scaling for data

    Args:
            cluster : DensityPeakCluster object
    '''
    logger.info("PLOT: cluster result, start multi-dimensional scaling")
    dp = np.zeros((cluster.max_id, cluster.max_id), dtype=np.float32)
    cls = []
    for i in xrange(1, cluster.max_id):
        for j in xrange(i + 1, cluster.max_id + 1):
            dp[i - 1, j - 1] = cluster.distances[(i, j)]
            dp[j - 1, i - 1] = cluster.distances[(i, j)]
        cls.append(cluster.cluster[i])
    cls.append(cluster.cluster[cluster.max_id])
    cls = np.array(cls, dtype=np.float32)
    fo = open(r'./tmp.txt', 'w')
    fo.write('\n'.join(map(str, cls)))
    fo.close()
    version = versiontuple(sklearn_version)[1] > 14
    if version[0] > 0 or version[1] > 14:
        mds = manifold.MDS(max_iter=200, eps=1e-4, n_init=1,
                           dissimilarity='precomputed')
    else:
        mds = manifold.MDS(max_iter=200, eps=1e-4, n_init=1)
    dp_mds = mds.fit_transform(dp)
    logger.info("PLOT: end mds, start plot")
    plot_scatter_diagram(1, dp_mds[:, 0], dp_mds[
                         :, 1], title='cluster', style_list=cls) 
Example #21
Source File: analysis.py    From smallrnaseq with GNU General Public License v3.0 5 votes vote down vote up
def do_mds(X):
    """Do MDS"""

    from sklearn import manifold
    seed = np.random.RandomState(seed=3)
    mds = manifold.MDS(n_components=3, max_iter=3000, eps=1e-9, random_state=seed,
                        n_jobs=1)
    pX = mds.fit(X.values).embedding_
    pX = pd.DataFrame(pX,index=X.index)
    return pX 
Example #22
Source File: demo_mds.py    From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License 5 votes vote down vote up
def plot_demo_1():
    X = np.c_[np.ones(5), 2 * np.ones(5), 10 * np.ones(5)].T
    y = np.array([0, 1, 2])

    fig = pylab.figure(figsize=(10, 4))

    ax = fig.add_subplot(121, projection='3d')
    ax.set_axis_bgcolor('white')

    mds = manifold.MDS(n_components=3)
    Xtrans = mds.fit_transform(X)

    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black')
    pylab.title("MDS on example data set in 3 dimensions")
    ax.view_init(10, -15)

    mds = manifold.MDS(n_components=2)
    Xtrans = mds.fit_transform(X)

    ax = fig.add_subplot(122)
    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black')
    pylab.title("MDS on example data set in 2 dimensions")

    filename = "mds_demo_1.png"
    pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight") 
Example #23
Source File: generate_qc_plots.py    From panaroo with MIT License 4 votes vote down vote up
def plot_MDS(dist_mat, file_names, outdir):

    # get MDS projection
    mds = manifold.MDS(n_components=2, dissimilarity="precomputed")
    projection = mds.fit(dist_mat)
    coords = projection.embedding_

    #write MDS coordinates to disk
    with open(outdir + "mds_coords.txt", "w") as contig_out:
        contig_out.write("sample\tcoordx\tcoordy\n")
        for i, coord in zip(file_names, coords):
            contig_out.write("%s\t%s\t%s\n" % (i, coord[0], coord[1]))

    # find margins for plot
    c_min = np.min(coords) - abs(np.quantile(coords, 0.05))
    c_max = np.max(coords) + abs(np.quantile(coords, 0.05))

    # generate static plot
    plt.style.use('ggplot')
    fig = plt.figure()
    plt.scatter(coords[:, 0], coords[:, 1])
    plt.grid(True)
    plt.xlabel("MDS Dimension 1")
    plt.ylabel("MDS Dimension 2")
    plt.xlim((c_min, c_max))
    plt.ylim((c_min, c_max))
    plt.tight_layout()
    fig.savefig(outdir + "MDS_mash_plot.png")

    # generate interactive plot
    trace = go.Scatter(x=coords[:, 0],
                       y=coords[:, 1],
                       text=file_names,
                       mode='markers')
    layout = go.Layout(xaxis=dict(autorange=True,
                                  showgrid=True,
                                  zeroline=True,
                                  showline=False,
                                  ticks='',
                                  range=[c_min, c_max],
                                  type="linear",
                                  exponentformat="SI",
                                  showexponent='none',
                                  showticklabels=True),
                       yaxis=dict(autorange=True,
                                  showgrid=True,
                                  zeroline=True,
                                  showline=False,
                                  ticks='',
                                  range=[c_min, c_max],
                                  type="linear",
                                  exponentformat="SI",
                                  showexponent='none',
                                  showticklabels=True))
    data = [trace]
    fig = go.Figure(data=data, layout=layout)
    offline.plot(fig, filename=outdir + "MDS_mash_plot.html", auto_open=False)

    return 
Example #24
Source File: compute_polar_coordinates.py    From masif with Apache License 2.0 4 votes vote down vote up
def compute_theta_all_fast(D, vertices, faces, normals, idx, radius):
    """
        compute_theta_all_fast: compute the theta coordinate using an approximation.
        The approximation consists of taking only the inner radius/2 for the multidimensional
        scaling. Then, for points farther than radius/2, the shortest line to the center is used. 
        This speeds up the method by a factor of about 100.
    """
    mymds = MDS(n_components=2, n_init=1, eps=0.1, max_iter=50, dissimilarity='precomputed', n_jobs=1)
    all_theta = []
    start_loop = time.clock()
    only_mds = 0.0
    for i in range(D.shape[0]):
        # Get the pairs of geodesic distances.
        neigh = D[i].nonzero()
        # We will run MDS on only a subset of the points.
        ii = np.where(D[i][neigh] < radius/2)[1]
        neigh_i = neigh[1][ii]
        pair_dist_i = D[neigh_i,:][:,neigh_i]
        pair_dist_i = pair_dist_i.todense()

        # Plane_i: the 2D plane for all neighbors of i
        tic = time.clock()
        plane_i = call_mds(mymds, pair_dist_i)
        toc = time.clock()
        only_mds += (toc - tic)
    
        # Compute the angles on the plane.
        theta = compute_thetas(plane_i, i, vertices, faces, normals, neigh_i, idx)

        # We now must assign angles to all points kk that are between radius/2 and radius from the center.
        kk = np.where(D[i][neigh] >= radius/2)[1]
        neigh_k = neigh[1][kk]
        dist_kk = D[neigh_k,:][:,neigh_i]
        dist_kk = dist_kk.todense()
        dist_kk[dist_kk == 0] = float('inf')
        closest = np.argmin(dist_kk, axis=1)
        closest = np.squeeze(closest)
        closest = neigh_i[closest]
        theta[neigh_k] = theta[closest]

        
        all_theta.append(theta)
    end_loop = time.clock()
    print('Only MDS time: {:.2f}s'.format(only_mds))
    print('Full loop time: {:.2f}s'.format(end_loop-start_loop))
    return all_theta 
Example #25
Source File: spectral_outlier.py    From ad_examples with MIT License 4 votes vote down vote up
def fit_transform(self, x_in):
        n = nrow(x_in)
        x = normalize_and_center_by_feature_range(x_in)
        dists = np.zeros(shape=(n, n), dtype=float)
        for i in range(n):
            for j in range(i, n):
                dists[i, j] = euclidean_dist(x[i, :], x[j, :])
                dists[j, i] = dists[i, j]

        logger.debug(dists[0, 0:10])

        neighbors = np.zeros(shape=(n, self.n_neighbors), dtype=int)
        for i in range(n):
            neighbors[i, :] = np.argsort(dists[i, :])[0:self.n_neighbors]

        logger.debug(neighbors[0, 0:10])

        W = np.zeros(shape=(n, n))
        for i in range(n):
            for j in neighbors[i, :]:
                # diagonal elements of W will be zeros
                if i != j:
                    W[i, j] = np.exp(-(dists[i, j] ** 2) / self.k2)
                    W[j, i] = W[i, j]

        D = W.sum(axis=1)
        # logger.debug(str(list(D[0:10])))

        iDroot = np.diag(np.sqrt(D) ** (-1))

        S = iDroot.dot(W.dot(iDroot))
        # logger.debug("S: %s" % str(list(S[0, 0:10])))

        B = np.eye(n) - self.alpha * S
        # logger.debug("B: %s" % str(list(B[0, 0:10])))

        A = np.linalg.inv(B)
        tdA = np.diag(np.sqrt(np.diag(A)) ** (-1))
        A = tdA.dot(A.dot(tdA))
        # logger.debug("A: %s" % str(list(A[0, 0:10])))

        d = 1 - A
        # logger.debug("d: %s" % str(list(d[0, 0:10])))
        # logger.debug("min(d): %f, max(d): %f" % (np.min(d), np.max(d)))

        mds = manifold.MDS(self.n_components,
                           metric=self.metric, dissimilarity='precomputed')
        # using abs below because some zeros are represented as -0; other values are positive.
        embedding = mds.fit_transform(np.abs(d))

        return embedding 
Example #26
Source File: visualization.py    From TensorFlow_DCIGN with MIT License 4 votes vote down vote up
def visualize_encodings(encodings, file_name=None,
                        grid=None, skip_every=999, fast=False, fig=None, interactive=False):
  encodings = manual_pca(encodings)
  if encodings.shape[1] <= 3:
    return print_data_only(encodings, file_name, fig=fig, interactive=interactive)

  encodings = encodings[0:720]
  hessian_euc = dist.squareform(dist.pdist(encodings[0:720], 'euclidean'))
  hessian_cos = dist.squareform(dist.pdist(encodings[0:720], 'cosine'))
  grid = (3, 4) if grid is None else grid
  project_ops = []

  n = 2
  project_ops.append(("LLE ltsa       N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='ltsa')))
  project_ops.append(("LLE modified   N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='modified')))
  project_ops.append(('MDS euclidean  N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed')))
  project_ops.append(("TSNE 30/2000   N:%d" % n, TSNE(perplexity=30, n_components=n, init='pca', n_iter=2000)))
  n = 3
  project_ops.append(("LLE ltsa       N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='ltsa')))
  project_ops.append(("LLE modified   N:%d" % n, mn.LocallyLinearEmbedding(10, n, method='modified')))
  project_ops.append(('MDS euclidean  N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed')))
  project_ops.append(('MDS cosine     N:%d' % n, mn.MDS(n, max_iter=300, n_init=1, dissimilarity='precomputed')))

  plot_places = []
  for i in range(12):
    u, v = int(i / (skip_every - 1)), i % (skip_every - 1)
    j = v + u * skip_every + 1
    plot_places.append(j)

  fig = get_figure(fig)
  fig.set_size_inches(fig.get_size_inches()[0] * grid[0] / 1.,
                      fig.get_size_inches()[1] * grid[1] / 2.0)

  for i, (name, manifold) in enumerate(project_ops):
    is3d = 'N:3' in name

    try:
      if is3d:
        subplot = plt.subplot(grid[0], grid[1], plot_places[i], projection='3d')
      else:
        subplot = plt.subplot(grid[0], grid[1], plot_places[i])

      data_source = encodings if not _needs_hessian(manifold) else \
        (hessian_cos if 'cosine' in name else hessian_euc)
      projections = manifold.fit_transform(data_source)
      scatter(subplot, projections, is3d, _build_radial_colors(len(data_source)))
      subplot.set_title(name)
    except:
      print(name, "Unexpected error: ", sys.exc_info()[0], sys.exc_info()[1] if len(sys.exc_info()) > 1 else '')

  visualize_data_same(encodings, grid=grid, places=plot_places[-4:])
  if not interactive:
    save_fig(file_name, fig)
  ut.print_time('visualization finished') 
Example #27
Source File: server.py    From Seq2Seq-Vis with Apache License 2.0 4 votes vote down vote up
def get_close_words(**request):
    current_project = list(projects.values())[0]  # type: S2SProject
    loc = request['loc']  # "src" or "tgt"
    limit = request['limit']
    p_method = request["p_method"]
    t2i = current_project.dicts['t2i'][loc]
    i2t = current_project.dicts['i2t'][loc]

    if loc == 'src':
        embeddings = current_project.embeddings[
            'encoder']  # TODO: change !!
    else:
        embeddings = current_project.embeddings['decoder']

    word = request['in']

    my_vec = embeddings[t2i[word]]

    matrix = embeddings[:]
    matrix_norms = current_project.cached_norm(loc, matrix)

    dotted = matrix.dot(my_vec)

    vector_norm = np.sqrt(np.sum(my_vec * my_vec))
    matrix_vector_norms = np.multiply(matrix_norms, vector_norm)
    neighbors = np.divide(dotted, matrix_vector_norms)

    neighbour_ids = np.argsort(neighbors)[-limit:].tolist()

    names = [i2t[x] for x in neighbour_ids]

    # projection methods: MDS, PCA, tSNE -- all with standard params
    positions = []
    if p_method != "none":
        positions = P_METHODS[p_method].fit_transform(
            matrix[neighbour_ids, :])

    return {'word': names,
            # 'word_vector': matrix[neighbour_ids, :].tolist(),
            'score': neighbors[neighbour_ids].tolist(),
            'pos': positions.tolist()
            } 
Example #28
Source File: document_clustering.py    From text-analytics-with-python with Apache License 2.0 4 votes vote down vote up
def plot_clusters(num_clusters, feature_matrix,
                  cluster_data, movie_data,
                  plot_size=(16,8)):
    # generate random color for clusters                  
    def generate_random_color():
        color = '#%06x' % random.randint(0, 0xFFFFFF)
        return color
    # define markers for clusters    
    markers = ['o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd']
    # build cosine distance matrix
    cosine_distance = 1 - cosine_similarity(feature_matrix) 
    # dimensionality reduction using MDS
    mds = MDS(n_components=2, dissimilarity="precomputed", 
              random_state=1)
    # get coordinates of clusters in new low-dimensional space
    plot_positions = mds.fit_transform(cosine_distance)  
    x_pos, y_pos = plot_positions[:, 0], plot_positions[:, 1]
    # build cluster plotting data
    cluster_color_map = {}
    cluster_name_map = {}
    for cluster_num, cluster_details in cluster_data.items():
        # assign cluster features to unique label
        cluster_color_map[cluster_num] = generate_random_color()
        cluster_name_map[cluster_num] = ', '.join(cluster_details['key_features'][:5]).strip()
    # map each unique cluster label with its coordinates and movies
    cluster_plot_frame = pd.DataFrame({'x': x_pos,
                                       'y': y_pos,
                                       'label': movie_data['Cluster'].values.tolist(),
                                       'title': movie_data['Title'].values.tolist()
                                        })
    grouped_plot_frame = cluster_plot_frame.groupby('label')
    # set plot figure size and axes
    fig, ax = plt.subplots(figsize=plot_size) 
    ax.margins(0.05)
    # plot each cluster using co-ordinates and movie titles
    for cluster_num, cluster_frame in grouped_plot_frame:
         marker = markers[cluster_num] if cluster_num < len(markers) \
                  else np.random.choice(markers, size=1)[0]
         ax.plot(cluster_frame['x'], cluster_frame['y'], 
                 marker=marker, linestyle='', ms=12,
                 label=cluster_name_map[cluster_num], 
                 color=cluster_color_map[cluster_num], mec='none')
         ax.set_aspect('auto')
         ax.tick_params(axis= 'x', which='both', bottom='off', top='off',        
                        labelbottom='off')
         ax.tick_params(axis= 'y', which='both', left='off', top='off',         
                        labelleft='off')
    fontP = FontProperties()
    fontP.set_size('small')    
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.01), fancybox=True, 
              shadow=True, ncol=5, numpoints=1, prop=fontP) 
    #add labels as the film titles
    for index in range(len(cluster_plot_frame)):
        ax.text(cluster_plot_frame.ix[index]['x'], 
                cluster_plot_frame.ix[index]['y'], 
                cluster_plot_frame.ix[index]['title'], size=8)  
    # show the plot           
    plt.show() 
Example #29
Source File: plot_gromov_barycenter.py    From POT with MIT License 4 votes vote down vote up
def smacof_mds(C, dim, max_iter=3000, eps=1e-9):
    """
    Returns an interpolated point cloud following the dissimilarity matrix C
    using SMACOF multidimensional scaling (MDS) in specific dimensionned
    target space

    Parameters
    ----------
    C : ndarray, shape (ns, ns)
        dissimilarity matrix
    dim : int
          dimension of the targeted space
    max_iter :  int
        Maximum number of iterations of the SMACOF algorithm for a single run
    eps : float
        relative tolerance w.r.t stress to declare converge

    Returns
    -------
    npos : ndarray, shape (R, dim)
           Embedded coordinates of the interpolated point cloud (defined with
           one isometry)
    """

    rng = np.random.RandomState(seed=3)

    mds = manifold.MDS(
        dim,
        max_iter=max_iter,
        eps=1e-9,
        dissimilarity='precomputed',
        n_init=1)
    pos = mds.fit(C).embedding_

    nmds = manifold.MDS(
        2,
        max_iter=max_iter,
        eps=1e-9,
        dissimilarity="precomputed",
        random_state=rng,
        n_init=1)
    npos = nmds.fit_transform(C, init=pos)

    return npos


##############################################################################
# Data preparation
# ----------------
#
# The four distributions are constructed from 4 simple images 
Example #30
Source File: demo_mds.py    From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License 4 votes vote down vote up
def plot_iris_mds():

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    # MDS

    fig = pylab.figure(figsize=(10, 4))

    ax = fig.add_subplot(121, projection='3d')
    ax.set_axis_bgcolor('white')

    mds = manifold.MDS(n_components=3)
    Xtrans = mds.fit_transform(X)

    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black')
    pylab.title("MDS on Iris data set in 3 dimensions")
    ax.view_init(10, -15)

    mds = manifold.MDS(n_components=2)
    Xtrans = mds.fit_transform(X)

    ax = fig.add_subplot(122)
    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black')
    pylab.title("MDS on Iris data set in 2 dimensions")

    filename = "mds_demo_iris.png"
    pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")

    # PCA

    fig = pylab.figure(figsize=(10, 4))

    ax = fig.add_subplot(121, projection='3d')
    ax.set_axis_bgcolor('white')

    pca = decomposition.PCA(n_components=3)
    Xtrans = pca.fit(X).transform(X)

    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], Xtrans[y == cl][:, 2], c=color, marker=marker, edgecolor='black')
    pylab.title("PCA on Iris data set in 3 dimensions")
    ax.view_init(50, -35)

    pca = decomposition.PCA(n_components=2)
    Xtrans = pca.fit_transform(X)

    ax = fig.add_subplot(122)
    for cl, color, marker in zip(np.unique(y), colors, markers):
        ax.scatter(
            Xtrans[y == cl][:, 0], Xtrans[y == cl][:, 1], c=color, marker=marker, edgecolor='black')
    pylab.title("PCA on Iris data set in 2 dimensions")

    filename = "pca_demo_iris.png"
    pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")