# 2018/12/03~ # Fernando Gama, fgama@seas.upenn.edu. # Luana Ruiz, rubruiz@seas.upenn.edu. """ graphTools.py Tools for handling graphs Functions: plotGraph: plots a graph from an adjacency matrix printGraph: prints (saves) a graph from an adjacency matrix adjacencyToLaplacian: transform an adjacency matrix into a Laplacian matrix normalizeAdjacency: compute the normalized adjacency normalizeLaplacian: compute the normalized Laplacian computeGFT: Computes the eigenbasis of a GSO matrixPowers: computes the matrix powers computeNonzeroRows: compute nonzero elements across rows computeNeighborhood: compute the neighborhood of a graph computeSourceNodes: compute source nodes for the source localization problem isConnected: determines if a graph is connected sparsifyGraph: sparsifies a given graph matrix createGraph: creates an adjacency marix permIdentity: identity permutation permDegree: order nodes by degree permSpectralProxies: order nodes by spectral proxies score permEDS: order nodes by EDS score edgeFailSampling: samples the edges of a given graph splineBasis: Returns the B-spline basis (taken from github.com/mdeff) Classes: Graph: class containing a graph """ import numpy as np import scipy.sparse import scipy.spatial as sp from sklearn.cluster import SpectralClustering import os import matplotlib matplotlib.rcParams['text.usetex'] = True matplotlib.rcParams['font.family'] = 'serif' import matplotlib.pyplot as plt zeroTolerance = 1e-9 # Values below this number are considered zero. # If adjacency matrices are not symmetric these functions might not work as # desired: the degree will be the in-degree to each node, and the Laplacian # is not defined for directed graphs. Same caution is advised when using # graphs with self-loops. def plotGraph(adjacencyMatrix, **kwargs): """ plotGraph(A): plots a graph from adjacency matrix A of size N x N Optional keyword arguments: 'positions' (np.array, default: points in a circle of radius 1): size N x 2 of positions for each node 'figSize' (int, default: 5): size of the figure 'linewidth' (int, default: 1): edge width 'markerSize' (int, default: 15): node size 'markerShape' (string, default: 'o'): node shape 'color' (hex code string, default: '#01256E'): color of the nodes 'nodeLabel' (list, default: None): list of length N where each element corresponds to the label of each node """ # Data # Adjacency matrix W = adjacencyMatrix assert W.shape[0] == W.shape[1] N = W.shape[0] # Positions (optional) if 'positions' in kwargs.keys(): pos = kwargs['positions'] else: angle = np.linspace(0, 2*np.pi*(1-1/N), num = N) radius = 1 pos = np.array([ radius * np.sin(angle), radius * np.cos(angle) ]) # Create figure # Figure size if 'figSize' in kwargs.keys(): figSize = kwargs['figSize'] else: figSize = 5 # Line width if 'lineWidth' in kwargs.keys(): lineWidth = kwargs['lineWidth'] else: lineWidth = 1 # Marker Size if 'markerSize' in kwargs.keys(): markerSize = kwargs['markerSize'] else: markerSize = 15 # Marker shape if 'markerShape' in kwargs.keys(): markerShape = kwargs['markerShape'] else: markerShape = 'o' # Marker color if 'color' in kwargs.keys(): markerColor = kwargs['color'] else: markerColor = '#01256E' # Node labeling if 'nodeLabel' in kwargs.keys(): doText = True nodeLabel = kwargs['nodeLabel'] assert len(nodeLabel) == N else: doText = False # Plot figGraph = plt.figure(figsize = (1*figSize, 1*figSize)) for i in range(N): for j in range(N): if W[i,j] > 0: plt.plot([pos[0,i], pos[0,j]], [pos[1,i], pos[1,j]], linewidth = W[i,j] * lineWidth, color = '#A8AAAF') for i in range(N): plt.plot(pos[0,i], pos[1,i], color = markerColor, marker = markerShape, markerSize = markerSize) if doText: plt.text(pos[0,i], pos[1,i], nodeLabel[i], verticalalignment = 'center', horizontalalignment = 'center', color = '#F2F2F3') return figGraph def printGraph(adjacencyMatrix, **kwargs): """ printGraph(A): Wrapper for plot graph to directly save it as a graph (with no axis, nor anything else like that, more aesthetic, less changes) Optional keyword arguments: 'saveDir' (os.path, default: '.'): directory where to save the graph 'legend' (default: None): Text for a legend 'xLabel' (str, default: None): Text for the x axis 'yLabel' (str, default: None): Text for the y axis 'graphName' (str, default: 'graph'): name to save the file 'positions' (np.array, default: points in a circle of radius 1): size N x 2 of positions for each node 'figSize' (int, default: 5): size of the figure 'linewidth' (int, default: 1): edge width 'markerSize' (int, default: 15): node size 'markerShape' (string, default: 'o'): node shape 'color' (hex code string, default: '#01256E'): color of the nodes 'nodeLabel' (list, default: None): list of length N where each element corresponds to the label of each node """ # Wrapper for plot graph to directly save it as a graph (with no axis, # nor anything else like that, more aesthetic, less changes) W = adjacencyMatrix assert W.shape[0] == W.shape[1] # Printing options if 'saveDir' in kwargs.keys(): saveDir = kwargs['saveDir'] else: saveDir = '.' if 'legend' in kwargs.keys(): doLegend = True legendText = kwargs['legend'] else: doLegend = False if 'xLabel' in kwargs.keys(): doXlabel = True xLabelText = kwargs['xLabel'] else: doXlabel = False if 'yLabel' in kwargs.keys(): doYlabel = True yLabelText = kwargs['yLabel'] else: doYlabel = False if 'graphName' in kwargs.keys(): graphName = kwargs['graphName'] else: graphName = 'graph' figGraph = plotGraph(adjacencyMatrix, **kwargs) plt.axis('off') if doXlabel: plt.xlabel(xLabelText) if doYlabel: plt.yLabel(yLabelText) if doLegend: plt.legend(legendText) figGraph.savefig(os.path.join(saveDir, '%s.pdf' % graphName), bbox_inches = 'tight', transparent = True) def adjacencyToLaplacian(W): """ adjacencyToLaplacian: Computes the Laplacian from an Adjacency matrix Input: W (np.array): adjacency matrix Output: L (np.array): Laplacian matrix """ # Check that the matrix is square assert W.shape[0] == W.shape[1] # Compute the degree vector d = np.sum(W, axis = 1) # And build the degree matrix D = np.diag(d) # Return the Laplacian return D - W def normalizeAdjacency(W): """ NormalizeAdjacency: Computes the degree-normalized adjacency matrix Input: W (np.array): adjacency matrix Output: A (np.array): degree-normalized adjacency matrix """ # Check that the matrix is square assert W.shape[0] == W.shape[1] # Compute the degree vector d = np.sum(W, axis = 1) # Invert the square root of the degree d = 1/np.sqrt(d) # And build the square root inverse degree matrix D = np.diag(d) # Return the Normalized Adjacency return D @ W @ D def normalizeLaplacian(L): """ NormalizeLaplacian: Computes the degree-normalized Laplacian matrix Input: L (np.array): Laplacian matrix Output: normL (np.array): degree-normalized Laplacian matrix """ # Check that the matrix is square assert L.shape[0] == L.shape[1] # Compute the degree vector (diagonal elements of L) d = np.diag(L) # Invert the square root of the degree d = 1/np.sqrt(d) # And build the square root inverse degree matrix D = np.diag(d) # Return the Normalized Laplacian return D @ L @ D def computeGFT(S, order = 'no'): """ computeGFT: Computes the frequency basis (eigenvectors) and frequency coefficients (eigenvalues) of a given GSO Input: S (np.array): graph shift operator matrix order (string): 'no', 'increasing', 'totalVariation' chosen order of frequency coefficients (default: 'no') Output: E (np.array): diagonal matrix with the frequency coefficients (eigenvalues) in the diagonal V (np.array): matrix with frequency basis (eigenvectors) """ # Check the correct order input assert order == 'totalVariation' or order == 'no' or order == 'increasing' # Check the matrix is square assert S.shape[0] == S.shape[1] # Check if it is symmetric symmetric = np.allclose(S, S.T, atol = zeroTolerance) # Then, compute eigenvalues and eigenvectors if symmetric: e, V = np.linalg.eigh(S) else: e, V = np.linalg.eig(S) # Sort the eigenvalues by the desired error: if order == 'totalVariation': eMax = np.max(e) sortIndex = np.argsort(np.abs(e - eMax)) elif order == 'increasing': sortIndex = np.argsort(np.abs(e)) else: sortIndex = np.arange(0, S.shape[0]) e = e[sortIndex] V = V[:, sortIndex] E = np.diag(e) return E, V def matrixPowers(S,K): """ matrixPowers(A, K) Computes the matrix powers A^k for k = 0, ..., K-1 Inputs: A: either a single N x N matrix or a collection E x N x N of E matrices. K: integer, maximum power to be computed (up to K-1) Outputs: AK: either a collection of K matrices K x N x N (if the input was a single matrix) or a collection E x K x N x N (if the input was a collection of E matrices). """ # S can be either a single GSO (N x N) or a collection of GSOs (E x N x N) if len(S.shape) == 2: N = S.shape[0] assert S.shape[1] == N E = 1 S = S.reshape(1, N, N) scalarWeights = True elif len(S.shape) == 3: E = S.shape[0] N = S.shape[1] assert S.shape[2] == N scalarWeights = False # Now, let's build the powers of S: thisSK = np.tile(np.eye(N, N).reshape(1,N,N), [E, 1, 1]) SK = thisSK.reshape(E, 1, N, N) for k in range(1,K): thisSK = thisSK @ S SK = np.concatenate((SK, thisSK.reshape(E, 1, N, N)), axis = 1) # Take out the first dimension if it was a single GSO if scalarWeights: SK = SK.reshape(K, N, N) return SK def computeNonzeroRows(S, Nl = 'all'): """ computeNonzeroRows: Find the position of the nonzero elements of each row of a matrix Input: S (np.array): matrix Nl (int or 'all'): number of rows to compute the nonzero elements; if 'all', then Nl = S.shape[0]. Rows are counted from the top. Output: nonzeroElements (list): list of size Nl where each element is an array of the indices of the nonzero elements of the corresponding row. """ # Find the position of the nonzero elements of each row of the matrix S. # Nl = 'all' means for all rows, otherwise, it will be an int. if Nl == 'all': Nl = S.shape[0] assert Nl <= S.shape[0] # Save neighborhood variable neighborhood = [] # For each of the selected nodes for n in range(Nl): neighborhood += [np.flatnonzero(S[n,:])] return neighborhood def computeNeighborhood(S, K, N = 'all', nb = 'all', outputType = 'list'): """ computeNeighborhood: compute the set of nodes within the K-hop neighborhood of a graph (i.e. all nodes that can be reached within K-hops of each node) computeNeighborhood(W, K, N = 'all', nb = 'all', outputType = 'list') Input: W (np.array): adjacency matrix K (int): K-hop neighborhood to compute the neighbors N (int or 'all'): how many nodes (from top) to compute the neighbors from (default: 'all'). nb (int or 'all'): how many nodes to consider valid when computing the neighborhood (i.e. nodes beyond nb are not trimmed out of the neighborhood; note that nodes smaller than nb that can be reached by nodes greater than nb, are included. default: 'all') outputType ('list' or 'matrix'): choose if the output is given in the form of a list of arrays, or a matrix with zero-padding of neighbors with neighborhoods smaller than the maximum neighborhood (default: 'list') Output: neighborhood (np.array or list): contains the indices of the neighboring nodes following the order established by the adjacency matrix. """ # outputType is either a list (a list of np.arrays) or a matrix. assert outputType == 'list' or outputType == 'matrix' # Here, we can assume S is already sparse, in which case is a list of # sparse matrices, or that S is full, in which case it is a 3-D array. if isinstance(S, list): # If it is a list, it has to be a list of matrices, where the length # of the list has to be the number of edge weights. But we actually need # to sum over all edges to be sure we consider all reachable nodes on # at least one of the edge dimensions newS = 0. for e in len(S): # First check it's a matrix, and a square one assert len(S[e]) == 2 assert S[e].shape[0] == S[e].shape[1] # For each edge, convert to sparse (in COO because we care about # coordinates to find the neighborhoods) newS += scipy.sparse.coo_matrix( (np.abs(S[e]) > zeroTolerance).astype(S[e].dtype)) S = (newS > zeroTolerance).astype(newS.dtype) else: # if S is not a list, check that it is either a E x N x N or a N x N # array. assert len(S.shape) == 2 or len(S.shape) == 3 if len(S.shape) == 3: assert S.shape[1] == S.shape[2] # If it has an edge feature dimension, just add over that dimension. # We only need one non-zero value along the vector to have an edge # there. (Obs.: While normally assume that all weights are positive, # let's just add on abs() value to avoid any cancellations). S = np.sum(np.abs(S), axis = 0) S = scipy.sparse.coo_matrix((S > zeroTolerance).astype(S.dtype)) else: # In this case, if it is a 2-D array, we do not need to add over the # edge dimension, so we just sparsify it assert S.shape[0] == S.shape[1] S = scipy.sparse.coo_matrix((S > zeroTolerance).astype(S.dtype)) # Now, we finally have a sparse, binary matrix, with the connections. # Now check that K and N are correct inputs. # K is an int (target K-hop neighborhood) # N is either 'all' or an int determining how many rows assert K >= 0 # K = 0 is just the identity # Check how many nodes we want to obtain if N == 'all': N = S.shape[0] if nb == 'all': nb = S.shape[0] assert N >= 0 and N <= S.shape[0] # Cannot return more nodes than there are assert nb >= 0 and nb <= S.shape[0] # All nodes are in their own neighborhood, so allNeighbors = [ [n] for n in range(S.shape[0])] # Now, if K = 0, then these are all the neighborhoods we need. # And also keep track only about the nodes we care about neighbors = [ [n] for n in range(N)] # But if K > 0 if K > 0: # Let's start with the one-hop neighborhood of all nodes (we need this) nonzeroS = list(S.nonzero()) # This is a tuple with two arrays, the first one containing the row # index of the nonzero elements, and the second one containing the # column index of the nonzero elements. # Now, we want the one-hop neighborhood of all nodes (and all nodes have # a one-hop neighborhood, since the graphs are connected) for n in range(len(nonzeroS[0])): # The list in index 0 is the nodes, the list in index 1 is the # corresponding neighbor allNeighbors[nonzeroS[0][n]].append(nonzeroS[1][n]) # Now that we have the one-hop neighbors, we just need to do a depth # first search looking for the one-hop neighborhood of each neighbor # and so on. oneHopNeighbors = allNeighbors.copy() # We have already visited the nodes themselves, since we already # gathered the one-hop neighbors. visitedNodes = [ [n] for n in range(N)] # Keep only the one-hop neighborhood of the ones we're interested in neighbors = [list(set(allNeighbors[n])) for n in range(N)] # For each hop for k in range(1,K): # For each of the nodes we care about for i in range(N): # Store the new neighbors to be included for node i newNeighbors = [] # Take each of the neighbors we already have for j in neighbors[i]: # and if we haven't visited those neighbors yet if j not in visitedNodes[i]: # Just look for our neighbor's one-hop neighbors and # add them to the neighborhood list newNeighbors.extend(oneHopNeighbors[j]) # And don't forget to add the node to the visited ones # (we already have its one-hope neighborhood) visitedNodes[i].append(j) # And now that we have added all the new neighbors, we add them # to the old neighbors neighbors[i].extend(newNeighbors) # And get rid of those that appear more than once neighbors[i] = list(set(neighbors[i])) # Now that all nodes have been collected, get rid of those beyond nb for i in range(N): # Get the neighborhood thisNeighborhood = neighbors[i].copy() # And get rid of the excess nodes neighbors[i] = [j for j in thisNeighborhood if j < nb] if outputType == 'matrix': # List containing all the neighborhood sizes neighborhoodSizes = [len(x) for x in neighbors] # Obtain max number of neighbors maxNeighborhoodSize = max(neighborhoodSizes) # then we have to check each neighborhood and find if we need to add # more nodes (itself) to pad it so we can build a matrix paddedNeighbors = [] for n in range(N): paddedNeighbors += [np.concatenate( (neighbors[n], n * np.ones(maxNeighborhoodSize - neighborhoodSizes[n])) )] # And now that every element in the list paddedNeighbors has the same # length, we can make it a matrix neighbors = np.array(paddedNeighbors, dtype = np.int) return neighbors def computeSourceNodes(A, C): """ computeSourceNodes: compute source nodes for the source localization problem Input: A (np.array): adjacency matrix of shape N x N C (int): number of classes Output: sourceNodes (list): contains the indices of the C source nodes Uses the adjacency matrix to compute C communities by means of spectral clustering, and then selects the node with largest degree within each community """ sourceNodes = [] degree = np.sum(A, axis = 0) # degree of each vector # Compute communities communityClusters = SpectralClustering(n_clusters = C, affinity = 'precomputed', assign_labels = 'discretize') communityClusters = communityClusters.fit(A) communityLabels = communityClusters.labels_ # For each community for c in range(C): communityNodes = np.nonzero(communityLabels == c)[0] degreeSorted = np.argsort(degree[communityNodes]) sourceNodes = sourceNodes + [communityNodes[degreeSorted[-1]]] return sourceNodes def isConnected(W): """ isConnected: determine if a graph is connected Input: W (np.array): adjacency matrix Output: connected (bool): True if the graph is connected, False otherwise Obs.: If the graph is directed, we consider it is connected when there is at least one edge that would make it connected (i.e. if we drop the direction of all edges, and just keep them as undirected, then the resulting graph would be connected). """ undirected = np.allclose(W, W.T, atol = zeroTolerance) if not undirected: W = 0.5 * (W + W.T) L = adjacencyToLaplacian(W) E, V = computeGFT(L) e = np.diag(E) # only eigenvavlues # Check how many values are greater than zero: nComponents = np.sum(e < zeroTolerance) # Number of connected components if nComponents == 1: connected = True else: connected = False return connected def sparsifyGraph(W, sparsificationType, p): """ sparsifyGraph: sparsifies a given graph matrix Input: W (np.array): adjacency matrix sparsificationType ('threshold' or 'NN'): threshold or nearest-neighbor sparsificationParameter (float): sparsification parameter (value of the threshold under which edges are deleted or the number of NN to keep) Output: W (np.array): adjacency matrix of sparsified graph Observation: - If it is an undirected graph, when computing the kNN edges, the resulting graph might be directed. Then, the graph is converted into an undirected one by taking the average of incoming and outgoing edges (this might result in a graph where some nodes have more than kNN neighbors). - If it is a directed graph, remember that element (i,j) of the adjacency matrix corresponds to edge (j,i). This means that each row of the matrix has nonzero elements on all the incoming edges. In the directed case, the number of nearest neighbors is with respect to the incoming edges (i.e. kNN incoming edges are kept). - If the original graph is connected, then thresholding might lead to a disconnected graph. If this is the case, the threshold will be increased in small increments until the resulting graph is connected. To recover the actual treshold used (higher than the one specified) do np.min(W[np.nonzero(W)]). In the case of kNN, if the resulting graph is disconnected, the parameter k is increased in 1 until the resultin graph is connected. """ # Check input arguments N = W.shape[0] assert W.shape[1] == N assert sparsificationType == 'threshold' or sparsificationType == 'NN' connected = isConnected(W) undirected = np.allclose(W, W.T, atol = zeroTolerance) # np.allclose() gives true if matrices W and W.T are the same up to # atol. # Start with thresholding if sparsificationType == 'threshold': Wnew = W.copy() Wnew[np.abs(Wnew) < p] = 0. # If the original graph was connected, we need to be sure this one is # connected as well if connected: # Check if the new graph is connected newGraphIsConnected = isConnected(Wnew) # While it is not connected while not newGraphIsConnected: # We need to reduce the size of p until we get it connected p = p/2. Wnew = W.copy() Wnew[np.abs(Wnew) < p] = 0. # Check if it is connected now newGraphIsConnected = isConnected(Wnew) # Now, let's move to k nearest neighbors elif sparsificationType == 'NN': # We sort the values of each row (in increasing order) Wsorted = np.sort(W, axis = 1) # Pick the kth largest kthLargest = Wsorted[:, -p] # array of size N # Find the elements that are greater or equal that these values maskOfEdgesToKeep = (W >= kthLargest.reshape([N,1])).astype(W.dtype) # And keep those edges Wnew = W * maskOfEdgesToKeep # If the original graph was connected if connected: # Check if the new graph is connected newGraphIsConnected = isConnected(Wnew) # While it is not connected while not newGraphIsConnected: # Increase the number of k-NN by 1 p = p + 1 # Compute the new kth Largest kthLargest = Wsorted[:, -p] # array of size N # Find the elements that are greater or equal that these values maskOfEdgesToKeep = (W >= kthLargest.reshape([N,1]))\ .astype(W.dtype) # And keep those edges Wnew = W * maskOfEdgesToKeep # Check if it is connected now newGraphIsConnected = isConnected(Wnew) # if it's undirected, this is the moment to reconvert it as undirected if undirected: Wnew = 0.5 * (Wnew + Wnew.T) return Wnew def createGraph(graphType, N, graphOptions): """ createGraph: creates a graph of a specified type Input: graphType (string): 'SBM', 'SmallWorld', 'fuseEdges', and 'adjacency' N (int): Number of nodes graphOptions (dict): Depends on the type selected. Obs.: More types to come. Output: W (np.array): adjacency matrix of shape N x N Optional inputs (by keyword): graphType: 'SBM' 'nCommunities': (int) number of communities 'probIntra': (float) probability of drawing an edge between nodes inside the same community 'probInter': (float) probability of drawing an edge between nodes of different communities Obs.: This always results in a connected graph. graphType: 'SmallWorld' 'probEdge': probability of drawing an edge between nodes 'probRewiring': probability of rewiring an edge Obs.: This always results in a connected graph. graphType: 'fuseEdges' (Given a collection of adjacency matrices of graphs with the same number of nodes, this graph type is a fusion of the edges of the collection of graphs, following different desirable properties) 'adjacencyMatrices' (np.array): collection of matrices in a tensor np.array of dimension nGraphs x N x N 'aggregationType' ('sum' or 'avg'): if 'sum', edges are summed across the collection of matrices, if 'avg' they are averaged 'normalizationType' ('rows', 'cols' or 'no'): if 'rows', the values of the rows (after aggregated) are normalized to sum to one, if 'cols', it is for the columns, if it is 'no' there is no normalization. 'isolatedNodes' (bool): if True, keep isolated nodes should there be any 'forceUndirected' (bool): if True, make the resulting graph undirected by replacing directed edges by the average of the outgoing and incoming edges between each pair of nodes 'forceConnected' (bool): if True, make the graph connected by taking the largest connected component 'nodeList' (list): this is an empty list that, after calling the function, will contain a list of the nodes that were kept when creating the adjacency matrix out of fusing the given ones with the desired options 'extraComponents' (list, optional): if the resulting fused adjacency matrix is not connected, and then forceConnected = True, then this list will contain two lists, the first one with the adjacency matrices of the smaller connected components, and the second one a corresponding list with the index of the nodes that were kept for each of the smaller connected components (Obs.: If a given single graph is required to be adapted with any of the options in this function, then it can just be expanded to have one dimension along axis = 0 and fed to this function to obtain the corresponding graph with the desired properties) graphType: 'adjacency' 'adjacencyMatrix' (np.array): just return the given adjacency matrix (after checking it has N nodes) """ # Check assert N >= 0 if graphType == 'SBM': assert(len(graphOptions.keys())) == 3 C = graphOptions['nCommunities'] # Number of communities assert int(C) == C # Check that the number of communities is an integer pii = graphOptions['probIntra'] # Intracommunity probability pij = graphOptions['probInter'] # Intercommunity probability assert 0 <= pii <= 1 # Check that they are valid probabilities assert 0 <= pij <= 1 # We create the SBM as follows: we generate random numbers between # 0 and 1 and then we compare them elementwise to a matrix of the # same size of pii and pij to set some of them to one and other to # zero. # Let's start by creating the matrix of pii and pij. # First, we need to know how many numbers on each community. nNodesC = [N//C] * C # Number of nodes per community: floor division c = 0 # counter for community while sum(nNodesC) < N: # If there are still nodes to put in communities # do it one for each (balanced communities) nNodesC[c] = nNodesC[c] + 1 c += 1 # So now, the list nNodesC has how many nodes are on each community. # We proceed to build the probability matrix. # We create a zero matrix probMatrix = np.zeros([N,N]) # And fill ones on the block diagonals following the number of nodes. # For this, we need the cumulative sum of the number of nodes nNodesCIndex = [0] + np.cumsum(nNodesC).tolist() # The zero is added because it is the first index for c in range(C): probMatrix[ nNodesCIndex[c] : nNodesCIndex[c+1] , \ nNodesCIndex[c] : nNodesCIndex[c+1] ] = \ np.ones([nNodesC[c], nNodesC[c]]) # The matrix probMatrix has one in the block diagonal, which should # have probabilities p_ii and 0 in the offdiagonal that should have # probabilities p_ij. So that probMatrix = pii * probMatrix + pij * (1 - probMatrix) # has pii in the intracommunity blocks and pij in the intercommunity # blocks. # Now we're finally ready to generate a connected graph connectedGraph = False while not connectedGraph: # Generate random matrix W = np.random.rand(N,N) W = (W < probMatrix).astype(np.float64) # This matrix will have a 1 if the element ij is less or equal than # p_ij, so that if p_ij = 0.8, then it will be 1 80% of the times # (on average). # We need to make it undirected and without self-loops, so keep the # upper triangular part after the main diagonal W = np.triu(W, 1) # And add it to the lower triangular part W = W + W.T # Now let's check that it is connected connectedGraph = isConnected(W) elif graphType == 'SmallWorld': # Function provided by Tuomo Mäki-Marttunen # Connectedness introduced by Dr. S. Segarra. # Adapted to numpy by Fernando Gama. p = graphOptions['probEdge'] # Edge probability q = graphOptions['probRewiring'] # Rewiring probability # Positions on a circle posX = np.cos(2*np.pi*np.arange(0,N)/N).reshape([N,1]) # x axis posY = np.sin(2*np.pi*np.arange(0,N)/N).reshape([N,1]) # y axis pos = np.concatenate((posX, posY), axis = 1) # N x 2 position matrix connectedGraph = False W = np.zeros([N,N], dtype = pos.dtype) # Empty adjacency matrix D = sp.distance.squareform(sp.distance.pdist(pos)) ** 2 # Squared # distance matrix while not connectedGraph: # 1. The generation of locally connected network with given # in-degree: for n in range(N): # Go through all nodes in order nn = np.random.binomial(N, p) # Possible inputs are all but the node itself: pind = np.concatenate((np.arange(0,n), np.arange(n+1, N))) sortedIndices = np.argsort(D[n,pind]) dists = D[n,pind[sortedIndices]] inds_equallyfar = np.nonzero(dists == dists[nn])[0] if len(inds_equallyfar) == 1: # if a unique farthest node to # be chosen as input W[pind[sortedIndices[0:nn]],n] = 1 # choose as inputs all # from closest to the farthest-to-be-chosen else: W[pind[sortedIndices[0:np.min(inds_equallyfar)]],n] = 1 # choose each nearer than farthest-to-be-chosen r=np.random.permutation(len(inds_equallyfar)).astype(np.int) # choose randomly between the ones that are as far as # be-chosen W[pind[sortedIndices[np.min(inds_equallyfar)\ +r[0:nn-np.min(inds_equallyfar)+1]]],n] = 1; # 2. Watts-Strogatz perturbation: for n in range(N): A = np.nonzero(W[:,n])[0] # find the in-neighbours of n for j in range(len(A)): if np.random.rand() < q: freeind = 1 - W[:,n] # possible new candidates are # all the ones not yet outputting to n # (excluding n itself) freeind[n] = 0 freeind[A[j]] = 1 B = np.nonzero(freeind)[0] r = np.floor(np.random.rand()*len(B)).astype(np.int) W[A[j],n] = 0 W[B[r],n] = 1; # symmetrize M W = np.triu(W) W = W + W.T # Check that graph is connected connectedGraph = isConnected(W) elif graphType == 'fuseEdges': # This alternative assumes that there are multiple graphs that have to # be fused into one. # This will be done in two ways: average or sum. # On top, options will include: to symmetrize it or not, to make it # connected or not. # The input data is a tensor E x N x N where E are the multiple edge # features that we want to fuse. # Argument N is ignored # Data assert 7 <= len(graphOptions.keys()) <= 8 W = graphOptions['adjacencyMatrices'] # Data in format E x N x N assert len(W.shape) == 3 N = W.shape[1] # Number of nodes assert W.shape[1] == W.shape[2] # Name the list with all nodes to keep nodeList = graphOptions['nodeList'] # This should be an empty list # If there is an 8th argument, this is where we are going to save the # extra components which are not the largest if len(graphOptions.keys()) == 8: logExtraComponents = True extraComponents = graphOptions['extraComponents'] # This will be a list with two elements, the first elements will be # the adjacency matrix of the other (smaller) components, whereas # the second elements will be a list of the same size, where each # elements is yet another list of nodes to keep from the original # graph to build such an adjacency matrix (akin to nodeList) else: logExtraComponents = False # Flag to know if we need to log the # extra components or not allNodes = np.arange(N) # What type of node aggregation aggregationType = graphOptions['aggregationType'] assert aggregationType == 'sum' or aggregationType == 'avg' if aggregationType == 'sum': W = np.sum(W, axis = 0) elif aggregationType == 'avg': W = np.mean(W, axis = 0) # Normalization (sum of rows or columns is equal to 1) normalizationType = graphOptions['normalizationType'] if normalizationType == 'rows': rowSum = np.sum(W, axis = 1).reshape([N, 1]) rowSum[np.abs(rowSum) < zeroTolerance] = 1. W = W/np.tile(rowSum, [1, N]) elif normalizationType == 'cols': colSum = np.sum(W, axis = 0).reshape([1, N]) colSum[np.abs(colSum) < zeroTolerance] = 1. W = W/np.tile(colSum, [N, 1]) # Discarding isolated nodes isolatedNodes = graphOptions['isolatedNodes'] # if True, isolated nodes # are allowed, if not, discard them if isolatedNodes == False: # A Node is isolated when it's degree is zero degVector = np.sum(np.abs(W), axis = 0) # Keep nodes whose degree is not zero keepNodes = np.nonzero(degVector > zeroTolerance) # Get the first element of the output tuple, for some reason if # we take keepNodes, _ as the output it says it cannot unpack it. keepNodes = keepNodes[0] if len(keepNodes) < N: W = W[keepNodes][:, keepNodes] # Update the nodes kept allNodes = allNodes[keepNodes] # Check if we need to make it undirected or not forceUndirected = graphOptions['forceUndirected'] # if True, make it # undirected by using the average between nodes (careful, some # edges might cancel) if forceUndirected == True: W = 0.5 * (W + W.T) # Finally, making it a connected graph forceConnected = graphOptions['forceConnected'] # if True, make the # graph connected if forceConnected == True: # Check if the given graph is already connected connectedFlag = isConnected(W) # If it is not connected if not connectedFlag: # Find all connected components nComponents, nodeLabels = \ scipy.sparse.csgraph.connected_components(W) # Now, we have to pick the connected component with the largest # number of nodes, because that's the one to output. # Momentarily store the rest. # Let's get the list of nodes we have so far partialNodes = np.arange(W.shape[0]) # Create the lists to store the adjacency matrices and # the official lists of nodes to keep eachAdjacency = [None] * nComponents eachNodeList = [None] * nComponents # And we want to keep the one with largest number of nodes, but # we will do only one for, so we need to be checking which one # is, so we will compare against the maximum number of nodes # registered so far nNodesMax = 0 # To start for l in range(nComponents): # Find the nodes belonging to the lth connected component thisNodesToKeep = partialNodes[nodeLabels == l] # This adjacency matrix eachAdjacency[l] = W[thisNodesToKeep][:, thisNodesToKeep] # The actual list eachNodeList[l] = allNodes[thisNodesToKeep] # Check the number of nodes thisNumberOfNodes = len(thisNodesToKeep) # And see if this is the largest if thisNumberOfNodes > nNodesMax: # Store the new number of maximum nodes nNodesMax = thisNumberOfNodes # Store the element of the list that satisfies it indexLargestComponent = l # Once we have been over all the connected components, just # output the one with largest number of nodes W = eachAdjacency.pop(indexLargestComponent) allNodes = eachNodeList.pop(indexLargestComponent) # Check that it is effectively connected assert isConnected(W) # And, if we have the extra argument, return all the other # connected components if logExtraComponents == True: extraComponents.append(eachAdjacency) extraComponents.append(eachNodeList) # To end, update the node list, so that it is returned through argument nodeList.extend(allNodes.tolist()) elif graphType == 'adjacency': assert 'adjacencyMatrix' in graphOptions.keys() W = graphOptions['adjacencyMatrix'] assert W.shape[0] == W.shape[1] == N return W # Permutation functions def permIdentity(S): """ permIdentity: determines the identity permnutation Input: S (np.array): matrix Output: permS (np.array): matrix permuted (since, there's no permutation, it's the same input matrix) order (list): list of indices to make S become permS. """ assert len(S.shape) == 2 or len(S.shape) == 3 if len(S.shape) == 2: assert S.shape[0] == S.shape[1] S = S.reshape([1, S.shape[0], S.shape[1]]) scalarWeights = True else: assert S.shape[1] == S.shape[2] scalarWeights = False # Number of nodes N = S.shape[1] # Identity order order = np.arange(N) # If the original GSO assumed scalar weights, get rid of the extra dimension if scalarWeights: S = S.reshape([N, N]) return S, order.tolist() def permDegree(S): """ permDegree: determines the permutation by degree (nodes ordered from highest degree to lowest) Input: S (np.array): matrix Output: permS (np.array): matrix permuted order (list): list of indices to permute S to turn into permS. """ assert len(S.shape) == 2 or len(S.shape) == 3 if len(S.shape) == 2: assert S.shape[0] == S.shape[1] S = S.reshape([1, S.shape[0], S.shape[1]]) scalarWeights = True else: assert S.shape[1] == S.shape[2] scalarWeights = False # Compute the degree d = np.sum(np.sum(S, axis = 1), axis = 0) # Sort ascending order (from min degree to max degree) order = np.argsort(d) # Reverse sorting order = np.flip(order,0) # And update S S = S[:,order,:][:,:,order] # If the original GSO assumed scalar weights, get rid of the extra dimension if scalarWeights: S = S.reshape([S.shape[1], S.shape[2]]) return S, order.tolist() def permSpectralProxies(S): """ permSpectralProxies: determines the permutation by the spectral proxies score (from highest to lowest) Input: S (np.array): matrix Output: permS (np.array): matrix permuted order (list): list of indices to permute S to turn into permS. """ # Design decisions: k = 8 # Parameter of the spectral proxies method. This is fixed for # consistency with the calls of the other permutation functions. # Design decisions: If we are given a multi-edge GSO, we're just going to # average all the edge dimensions and use that to compute the spectral # proxies. # Check S is of correct shape assert len(S.shape) == 2 or len(S.shape) == 3 # If it is a matrix, just use it if len(S.shape) == 2: assert S.shape[0] == S.shape[1] scalarWeights = True simpleS = S.copy() # If it is a tensor of shape E x N x N, average over dimension E. else: assert S.shape[1] == S.shape[2] scalarWeights = False # Average over dimension E simpleS = np.mean(S, axis = 0) N = simpleS.shape[0] # Number of nodes ST = simpleS.conj().T # Transpose of S, needed for the method Sk = np.linalg.matrix_power(simpleS,k) # S^k STk = np.linalg.matrix_power(ST,k) # (S^T)^k STkSk = STk @ Sk # (S^T)^k * S^k, needed for the method nodes = [] # Where to save the nodes, order according the criteria it = 1 M = N # This opens up the door if we want to use this code for the actual # selection of nodes, instead of just ordering while len(nodes) < M: remainingNodes = [n for n in range(N) if n not in nodes] # Computes the eigenvalue decomposition phi_eig, phi_ast_k = np.linalg.eig( STkSk[remainingNodes][:,remainingNodes]) phi_ast_k = phi_ast_k[:][:,np.argmin(phi_eig.real)] abs_phi_ast_k_2 = np.square(np.absolute(phi_ast_k)) newNodePos = np.argmax(abs_phi_ast_k_2) nodes.append(remainingNodes[newNodePos]) it += 1 if scalarWeights: S = S[nodes,:][:,nodes] else: S = S[:,nodes,:][:,:,nodes] return S, nodes def permEDS(S): """ permEDS: determines the permutation by the experimentally designed sampling score (from highest to lowest) Input: S (np.array): matrix Output: permS (np.array): matrix permuted order (list): list of indices to permute S to turn into permS. """ # Design decisions: If we are given a multi-edge GSO, we're just going to # average all the edge dimensions and use that to compute the spectral # proxies. # Check S is of correct shape assert len(S.shape) == 2 or len(S.shape) == 3 # If it is a matrix, just use it if len(S.shape) == 2: assert S.shape[0] == S.shape[1] scalarWeights = True simpleS = S.copy() # If it is a tensor of shape E x N x N, average over dimension E. else: assert S.shape[1] == S.shape[2] scalarWeights = False # Average over dimension E simpleS = np.mean(S, axis = 0) E, V = np.linalg.eig(simpleS) # Eigendecomposition of S kappa = np.max(np.absolute(V), axis=1) kappa2 = np.square(kappa) # The probabilities assigned to each node are # proportional to kappa2, so in the mean, the ones with largest kappa^2 # would be "sampled" more often, and as suche are more important (i.e. # they have a higher score) # Sort ascending order (from min degree to max degree) order = np.argsort(kappa2) # Reverse sorting order = np.flip(order,0) if scalarWeights: S = S[order,:][:,order] else: S = S[:,order,:][:,:,order] return S, order.tolist() def edgeFailSampling(W, p): """ edgeFailSampling: randomly delete the edges of a given graph Input: W (np.array): adjacency matrix p (float): probability of deleting an edge Output: W (np.array): adjacency matrix with some edges randomly deleted Obs.: The resulting graph need not be connected (even if the input graph is) """ assert 0 <= p <= 1 N = W.shape[0] assert W.shape[1] == N undirected = np.allclose(W, W.T, atol = zeroTolerance) maskEdges = np.random.rand(N, N) maskEdges = (maskEdges > p).astype(W.dtype) # Put a 1 with probability 1-p W = maskEdges * W if undirected: W = np.triu(W) W = W + W.T return W class Graph(): """ Graph: class to handle a graph with several of its properties Initialization: graphType (string): 'SBM', 'SmallWorld', 'fuseEdges', and 'adjacency' N (int): number of nodes [optionalArguments]: related to the specific type of graph; see createGraph() for details. Attributes: .N (int): number of nodes .M (int): number of edges .W (np.array): weighted adjacency matrix .D (np.array): degree matrix .A (np.array): unweighted adjacency matrix .L (np.array): Laplacian matrix (if graph is undirected and has no self-loops) .S (np.array): graph shift operator (weighted adjacency matrix by default) .E (np.array): eigenvalue (diag) matrix (graph frequency coefficients) .V (np.array): eigenvector matrix (graph frequency basis) .undirected (bool): True if the graph is undirected .selfLoops (bool): True if the graph has self-loops Methods: .computeGFT(): computes the GFT of the existing stored GSO and stores it internally in self.V and self.E (if this is never called, the corresponding attributes are set to None) .setGSO(S, GFT = 'no'): sets a new GSO Inputs: S (np.array): new GSO matrix (has to have the same number of nodes), updates attribute .S GFT ('no', 'increasing' or 'totalVariation'): order of eigendecomposition; if 'no', no eigendecomposition is made, and the attributes .V and .E are set to None """ # in this class we provide, easily as attributes, the basic notions of # a graph. This serve as a building block for more complex notions as well. def __init__(self, graphType, N, graphOptions): assert N > 0 #\\\ Create the graph (Outputs adjacency matrix): self.W = createGraph(graphType, N, graphOptions) # TODO: Let's start easy: make it just an N x N matrix. We'll see later # the rest of the things just as handling multiple features and stuff. #\\\ Number of nodes: self.N = (self.W).shape[0] #\\\ Bool for graph being undirected: self.undirected = np.allclose(self.W, (self.W).T, atol = zeroTolerance) # np.allclose() gives true if matrices W and W.T are the same up to # atol. #\\\ Bool for graph having self-loops: self.selfLoops = True \ if np.sum(np.abs(np.diag(self.W)) > zeroTolerance) > 0 \ else False #\\\ Degree matrix: self.D = np.diag(np.sum(self.W, axis = 1)) #\\\ Number of edges: self.M = int(np.sum(np.triu(self.W)) if self.undirected \ else np.sum(self.W)) #\\\ Unweighted adjacency: self.A = (np.abs(self.W) > 0).astype(self.W.dtype) #\\\ Laplacian matrix: # Only if the graph is undirected and has no self-loops if self.undirected and not self.selfLoops: self.L = adjacencyToLaplacian(self.W) else: self.L = None #\\\ GSO (Graph Shift Operator): # The weighted adjacency matrix by default self.S = self.W #\\\ GFT: Declare variables but do not compute it unless specifically # requested self.E = None # Eigenvalues self.V = None # Eigenvectors def computeGFT(self): # Compute the GFT of the stored GSO if self.S is not None: #\\ GFT: # Compute the eigenvalues (E) and eigenvectors (V) self.E, self.V = computeGFT(self.S, order = 'totalVariation') def setGSO(self, S, GFT = 'no'): # This simply sets a matrix as a new GSO. It has to have the same number # of nodes (otherwise, it's a different graph!) and it can or cannot # compute the GFT, depending on the options for GFT assert S.shape[0] == S.shape[1] == self.N assert GFT == 'no' or GFT == 'increasing' or GFT == 'totalVariation' # Set the new GSO self.S = S if GFT == 'no': self.E = None self.V = None else: self.E, self.V = computeGFT(self.S, order = GFT) def splineBasis(K, x, degree=3): # Function written by M. Defferrard, taken verbatim (except for function # name), from # https://github.com/mdeff/cnn_graph/blob/master/lib/models.py#L662 """ Return the B-spline basis. K: number of control points. x: evaluation points or number of evenly distributed evaluation points. degree: degree of the spline. Cubic spline by default. """ if np.isscalar(x): x = np.linspace(0, 1, x) # Evenly distributed knot vectors. kv1 = x.min() * np.ones(degree) kv2 = np.linspace(x.min(), x.max(), K-degree+1) kv3 = x.max() * np.ones(degree) kv = np.concatenate((kv1, kv2, kv3)) # Cox - DeBoor recursive function to compute one spline over x. def cox_deboor(k, d): # Test for end conditions, the rectangular degree zero spline. if (d == 0): return ((x - kv[k] >= 0) & (x - kv[k + 1] < 0)).astype(int) denom1 = kv[k + d] - kv[k] term1 = 0 if denom1 > 0: term1 = ((x - kv[k]) / denom1) * cox_deboor(k, d - 1) denom2 = kv[k + d + 1] - kv[k + 1] term2 = 0 if denom2 > 0: term2 = ((-(x - kv[k + d + 1]) / denom2) * cox_deboor(k + 1, d - 1)) return term1 + term2 # Compute basis for each point basis = np.column_stack([cox_deboor(k, degree) for k in range(K)]) basis[-1,-1] = 1 return basis def coarsen(A, levels, self_connections=False): # Function written by M. Defferrard, taken (almost) verbatim, from # https://github.com/mdeff/cnn_graph/blob/master/lib/coarsening.py#L5 """ Coarsen a graph, represented by its adjacency matrix A, at multiple levels. """ graphs, parents = metis(A, levels) perms = compute_perm(parents) for i, A in enumerate(graphs): M, M = A.shape if not self_connections: A = A.tocoo() A.setdiag(0) if i < levels: A = perm_adjacency(A, perms[i]) A = A.tocsr() A.eliminate_zeros() graphs[i] = A # Mnew, Mnew = A.shape # print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added),' # '|E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2)) return graphs, perms[0] if levels > 0 else None def metis(W, levels, rid=None): # Function written by M. Defferrard, taken verbatim, from # https://github.com/mdeff/cnn_graph/blob/master/lib/coarsening.py#L34 """ Coarsen a graph multiple times using the METIS algorithm. INPUT W: symmetric sparse weight (adjacency) matrix levels: the number of coarsened graphs OUTPUT graph[0]: original graph of size N_1 graph[2]: coarser graph of size N_2 < N_1 graph[levels]: coarsest graph of Size N_levels < ... < N_2 < N_1 parents[i] is a vector of size N_i with entries ranging from 1 to N_{i+1} which indicate the parents in the coarser graph[i+1] nd_sz{i} is a vector of size N_i that contains the size of the supernode in the graph{i} NOTE if "graph" is a list of length k, then "parents" will be a list of length k-1 """ N, N = W.shape if rid is None: rid = np.random.permutation(range(N)) parents = [] degree = W.sum(axis=0) - W.diagonal() graphs = [] graphs.append(W) #supernode_size = np.ones(N) #nd_sz = [supernode_size] #count = 0 #while N > maxsize: for _ in range(levels): #count += 1 # CHOOSE THE WEIGHTS FOR THE PAIRING # weights = ones(N,1) # metis weights weights = degree # graclus weights # weights = supernode_size # other possibility weights = np.array(weights).squeeze() # PAIR THE VERTICES AND CONSTRUCT THE ROOT VECTOR idx_row, idx_col, val = scipy.sparse.find(W) perm = np.argsort(idx_row) rr = idx_row[perm] cc = idx_col[perm] vv = val[perm] cluster_id = metis_one_level(rr,cc,vv,rid,weights) # rr is ordered parents.append(cluster_id) # TO DO # COMPUTE THE SIZE OF THE SUPERNODES AND THEIR DEGREE #supernode_size = full( sparse(cluster_id, ones(N,1) , # supernode_size ) ) #print(cluster_id) #print(supernode_size) #nd_sz{count+1}=supernode_size; # COMPUTE THE EDGES WEIGHTS FOR THE NEW GRAPH nrr = cluster_id[rr] ncc = cluster_id[cc] nvv = vv Nnew = cluster_id.max() + 1 # CSR is more appropriate: row,val pairs appear multiple times W = scipy.sparse.csr_matrix((nvv,(nrr,ncc)), shape=(Nnew,Nnew)) W.eliminate_zeros() # Add new graph to the list of all coarsened graphs graphs.append(W) N, N = W.shape # COMPUTE THE DEGREE (OMIT OR NOT SELF LOOPS) degree = W.sum(axis=0) #degree = W.sum(axis=0) - W.diagonal() # CHOOSE THE ORDER IN WHICH VERTICES WILL BE VISTED AT THE NEXT PASS #[~, rid]=sort(ss); # arthur strategy #[~, rid]=sort(supernode_size); # thomas strategy #rid=randperm(N); # metis/graclus strategy ss = np.array(W.sum(axis=0)).squeeze() rid = np.argsort(ss) return graphs, parents # Coarsen a graph given by rr,cc,vv. rr is assumed to be ordered def metis_one_level(rr,cc,vv,rid,weights): # Function written by M. Defferrard, taken verbatim, from # https://github.com/mdeff/cnn_graph/blob/master/lib/coarsening.py#L119 nnz = rr.shape[0] N = rr[nnz-1] + 1 marked = np.zeros(N, np.bool) rowstart = np.zeros(N, np.int32) rowlength = np.zeros(N, np.int32) cluster_id = np.zeros(N, np.int32) oldval = rr[0] count = 0 clustercount = 0 for ii in range(nnz): rowlength[count] = rowlength[count] + 1 if rr[ii] > oldval: oldval = rr[ii] rowstart[count+1] = ii count = count + 1 for ii in range(N): tid = rid[ii] if not marked[tid]: wmax = 0.0 rs = rowstart[tid] marked[tid] = True bestneighbor = -1 for jj in range(rowlength[tid]): nid = cc[rs+jj] if marked[nid]: tval = 0.0 else: tval = vv[rs+jj] * (1.0/weights[tid] + 1.0/weights[nid]) if tval > wmax: wmax = tval bestneighbor = nid cluster_id[tid] = clustercount if bestneighbor > -1: cluster_id[bestneighbor] = clustercount marked[bestneighbor] = True clustercount += 1 return cluster_id def compute_perm(parents): # Function written by M. Defferrard, taken verbatim, from # https://github.com/mdeff/cnn_graph/blob/master/lib/coarsening.py#L167 """ Return a list of indices to reorder the adjacency and data matrices so that the union of two neighbors from layer to layer forms a binary tree. """ # Order of last layer is random (chosen by the clustering algorithm). indices = [] if len(parents) > 0: M_last = max(parents[-1]) + 1 indices.append(list(range(M_last))) for parent in parents[::-1]: #print('parent: {}'.format(parent)) # Fake nodes go after real ones. pool_singeltons = len(parent) indices_layer = [] for i in indices[-1]: indices_node = list(np.where(parent == i)[0]) assert 0 <= len(indices_node) <= 2 #print('indices_node: {}'.format(indices_node)) # Add a node to go with a singelton. if len(indices_node) == 1: indices_node.append(pool_singeltons) pool_singeltons += 1 #print('new singelton: {}'.format(indices_node)) # Add two nodes as children of a singelton in the parent. elif len(indices_node) == 0: indices_node.append(pool_singeltons+0) indices_node.append(pool_singeltons+1) pool_singeltons += 2 #print('singelton childrens: {}'.format(indices_node)) indices_layer.extend(indices_node) indices.append(indices_layer) # Sanity checks. for i,indices_layer in enumerate(indices): M = M_last*2**i # Reduction by 2 at each layer (binary tree). assert len(indices[0] == M) # The new ordering does not omit an indice. assert sorted(indices_layer) == list(range(M)) return indices[::-1] def perm_adjacency(A, indices): # Function written by M. Defferrard, taken verbatim, from # https://github.com/mdeff/cnn_graph/blob/master/lib/coarsening.py#L242 """ Permute adjacency matrix, i.e. exchange node ids, so that binary unions form the clustering tree. """ if indices is None: return A M, M = A.shape Mnew = len(indices) assert Mnew >= M A = A.tocoo() # Add Mnew - M isolated vertices. if Mnew > M: rows = scipy.sparse.coo_matrix((Mnew-M, M), dtype=np.float32) cols = scipy.sparse.coo_matrix((Mnew, Mnew-M), dtype=np.float32) A = scipy.sparse.vstack([A, rows]) A = scipy.sparse.hstack([A, cols]) # Permute the rows and the columns. perm = np.argsort(indices) A.row = np.array(perm)[A.row] A.col = np.array(perm)[A.col] # assert np.abs(A - A.T).mean() < 1e-9 assert type(A) is scipy.sparse.coo.coo_matrix return A def permCoarsening(x, indices): # Original function written by M. Defferrard, found in # https://github.com/mdeff/cnn_graph/blob/master/lib/coarsening.py#L219 # Function name has been changed, and it has been further adapted to handle # multiple features as # number_data_points x number_features x number_nodes # instead of the original # number_data_points x number_nodes """ Permute data matrix, i.e. exchange node ids, so that binary unions form the clustering tree. """ if indices is None: return x B, F, N = x.shape Nnew = len(indices) assert Nnew >= N xnew = np.empty((B, F, Nnew)) for i,j in enumerate(indices): # Existing vertex, i.e. real data. if j < N: xnew[:,:,i] = x[:,:,j] # Fake vertex because of singeltons. # They will stay 0 so that max pooling chooses the singelton. # Or -infty ? else: xnew[:,:,i] = np.zeros([B, F]) return xnew