```import numpy as np
import scipy
from scipy import sparse, linalg
from sklearn import preprocessing
from sklearn.utils.extmath import randomized_svd

from nodevectors.embedders import BaseNodeEmbedder
import csrgraph as cg

class ProNE(BaseNodeEmbedder):
def __init__(self, n_components=32, step=10, mu=0.2, theta=0.5,
exponent=0.75, verbose=True):
"""
Fast first order, global method.

Embeds by doing spectral propagation over an initial SVD embedding.
This can be seen as augmented spectral propagation.

Parameters :
--------------
step : int >= 1
Step of recursion in post processing step.
More means a more refined embedding.
Generally 5-10 is enough
mu : float
Damping factor on optimization post-processing
You rarely have to change it
theta : float
Bessel function parameter in Chebyshev polynomial approximation
You rarely have to change it
exponent : float in [0, 1]
Exponent on negative sampling
You rarely have to change it
References:
--------------
Reference impl: https://github.com/THUDM/ProNE
Reference Paper: https://www.ijcai.org/Proceedings/2019/0594.pdf
"""
self.n_components = n_components
self.step = step
self.mu = mu
self.theta = theta
self.exponent = exponent
self.verbose = verbose

def fit_transform(self, graph):
"""
NOTE: Currently only support str or int as node name for graph
Parameters
----------
nxGraph : graph data
Graph to embed
Can be any graph type that's supported by CSRGraph library
(NetworkX, numpy 2d array, scipy CSR matrix, CSR matrix components)
"""
G = cg.csrgraph(graph)
features_matrix = self.pre_factorization(G.mat,
self.n_components,
self.exponent)
vectors = ProNE.chebyshev_gaussian(
G.mat, features_matrix, self.n_components,
step=self.step, mu=self.mu, theta=self.theta)
self.model = dict(zip(G.nodes(), vectors))
return vectors

def fit(self, graph):
"""
NOTE: Currently only support str or int as node name for graph
Parameters
----------
nxGraph : graph data
Graph to embed
Can be any graph type that's supported by CSRGraph library
(NetworkX, numpy 2d array, scipy CSR matrix, CSR matrix components)
"""
G = cg.csrgraph(graph)
features_matrix = self.pre_factorization(G.mat,
self.n_components,
self.exponent)
vectors = ProNE.chebyshev_gaussian(
G.mat, features_matrix, self.n_components,
step=self.step, mu=self.mu, theta=self.theta)
self.model = dict(zip(G.nodes(), vectors))

@staticmethod
def tsvd_rand(matrix, n_components):
"""
Sparse randomized tSVD for fast embedding
"""
l = matrix.shape
# Is this csc conversion necessary?
smat = sparse.csc_matrix(matrix)
U, Sigma, VT = randomized_svd(smat,
n_components=n_components,
n_iter=5, random_state=None)
U = U * np.sqrt(Sigma)
U = preprocessing.normalize(U, "l2")
return U

@staticmethod
def pre_factorization(G, n_components, exponent):
"""
Network Embedding as Sparse Matrix Factorization
"""
C1 = preprocessing.normalize(G, "l1")
# Prepare negative samples
neg = np.array(C1.sum(axis=0)) ** exponent
neg = neg / neg.sum()
neg = sparse.diags(neg, format="csr")
neg = G.dot(neg)
# Set negative elements to 1 -> 0 when log
C1.data[C1.data <= 0] = 1
neg.data[neg.data <= 0] = 1
C1.data = np.log(C1.data)
neg.data = np.log(neg.data)
C1 -= neg
features_matrix = ProNE.tsvd_rand(C1, n_components=n_components)
return features_matrix

@staticmethod
def svd_dense(matrix, dimension):
"""
dense embedding via linalg SVD
"""
U, s, Vh = linalg.svd(matrix, full_matrices=False,
check_finite=False,
overwrite_a=True)
U = np.array(U)
U = U[:, :dimension]
s = s[:dimension]
s = np.sqrt(s)
U = U * s
U = preprocessing.normalize(U, "l2")
return U

@staticmethod
def chebyshev_gaussian(G, a, n_components=32, step=10,
mu=0.5, theta=0.5):
"""
NE Enhancement via Spectral Propagation

G : Graph (csr graph matrix)
a : features matrix from tSVD
mu : damping factor
theta : bessel function parameter
"""
nnodes = G.shape
if step == 1:
return a
A = sparse.eye(nnodes) + G
DA = preprocessing.normalize(A, norm='l1')
# L is graph laplacian
L = sparse.eye(nnodes) - DA
M = L - mu * sparse.eye(nnodes)
Lx0 = a
Lx1 = M.dot(a)
Lx1 = 0.5 * M.dot(Lx1) - a
conv = scipy.special.iv(0, theta) * Lx0
conv -= 2 * scipy.special.iv(1, theta) * Lx1
# Use Bessel function to get Chebyshev polynomials
for i in range(2, step):
Lx2 = M.dot(Lx1)
Lx2 = (M.dot(Lx2) - 2 * Lx1) - Lx0
if i % 2 == 0:
conv += 2 * scipy.special.iv(i, theta) * Lx2
else:
conv -= 2 * scipy.special.iv(i, theta) * Lx2
Lx0 = Lx1
Lx1 = Lx2
del Lx2
mm = A.dot(a - conv)
emb = ProNE.svd_dense(mm, n_components)
return emb```