python source code of competing

__author__ = 'Georgios Rizos (georgerizos@iti.gr)'

import copy
import networkx as nx
import community
import numpy as np
import scipy.sparse as sparse
import scipy.sparse.linalg as spla

from reveal_graph_embedding.embedding.laplacian import get_normalized_laplacian


def mroc(adjacency_matrix, alpha):
    """
    Extracts hierarchical community features using the MROC method.

    Introduced in: Wang, X., Tang, L., Liu, H., & Wang, L. (2013).
                   Learning with multi-resolution overlapping communities.
                   Knowledge and information systems, 36(2), 517-535.

    Inputs:  - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix.
             - alpha: A maximum community size stopping threshold.

    Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix.
    """
    # Find number of nodes
    number_of_nodes = adjacency_matrix.shape[0]

    ####################################################################################################################
    # Base community calculation
    ####################################################################################################################
    # Initialize empty lists
    base_list = list()
    base_row = list()
    base_col = list()

    # Save function handles for speed
    append_base_list = base_list.append
    append_base_row = base_row.append
    append_base_col = base_col.append

    # Find base communities
    adjacency_matrix = adjacency_matrix.tocsc()
    number_of_base_communities = 0
    for i in range(number_of_nodes):
        # Calculate base community
        base_community = set(adjacency_matrix.getcol(i).indices)
        base_community.add(i)
        flag = True
        for c in base_list:
            if c == base_community:
                flag = False
                break
        if flag:
            append_base_list(base_community)

            for n in base_community:
                append_base_row(n)
                append_base_col(number_of_base_communities)

            number_of_base_communities += 1

    # Form sparse matrices
    base_row = np.array(base_row)
    base_col = np.array(base_col)
    base_data = np.ones(base_row.size, dtype=np.float64)
    features = sparse.coo_matrix((base_data, (base_row, base_col)),
                                 shape=(number_of_nodes, number_of_base_communities))

    features = features.tocsr()
    base_community_number = features.shape[1]

    print('Base communities calculated.')

    reverse_index_csr = copy.copy(features)
    reverse_index_csc = reverse_index_csr.tocsc()
    reverse_index_csr = reverse_index_csr.tocsr()

    reverse_index_rows = np.ndarray(number_of_nodes, dtype=np.ndarray)
    reverse_index_cols = np.ndarray(number_of_nodes, dtype=np.ndarray)
    for n in range(number_of_nodes):
        reverse_index_row = reverse_index_csr.getrow(n)
        reverse_index_rows[n] = reverse_index_row.indices

        if n < base_community_number:
            reverse_index_col = reverse_index_csc.getcol(n)
            reverse_index_cols[n] = reverse_index_col.indices

    flag = True

    print('Start merge iterations.')

    iteration = 0

    while flag:
        level_row = list()
        level_col = list()

        append_level_row = level_row.append
        append_level_col = level_col.append

        unavailable_communities = -1*np.ones(reverse_index_csc.shape[1])
        unavailable_communities_counter = 0
        next_level_communities = list()
        append_next_level_community = next_level_communities.append
        number_of_communities = 0
        for j in range(reverse_index_csr.shape[1]):
            if j in unavailable_communities:
                continue
            must_break = reverse_index_csr.shape[1] - unavailable_communities_counter
            print(must_break)
            if must_break < 1:
                break
            unavailable_communities[unavailable_communities_counter] = j
            unavailable_communities_counter += 1
            c_j = reverse_index_cols[j]

            indices = community_neighbors(c_j, reverse_index_rows, unavailable_communities, unavailable_communities_counter)

            max_similarity = -1
            community_index = 0
            for jj in indices:
                c_jj = reverse_index_cols[jj]
                similarity = jaccard(c_j, c_jj)
                if similarity > max_similarity:
                    max_similarity = similarity
                    community_index = jj

            jj = community_index
            if max_similarity > 0:
                # Merge two communities
                c_jj = reverse_index_cols[jj]
                c_new = np.union1d(c_j, c_jj)

                flag_1 = np.setdiff1d(c_new, c_j)
                flag_2 = np.setdiff1d(c_new, c_jj)
                if (flag_1.size != 0) and (flag_2.size != 0):
                    for n in c_new:
                        append_level_row(n)
                        append_level_col(number_of_communities)

                    if c_new.size < alpha:
                        append_next_level_community(number_of_communities)

                    number_of_communities += 1
                unavailable_communities[unavailable_communities_counter] = jj
                unavailable_communities_counter += 1

        level_row = np.array(level_row)
        level_col = np.array(level_col)
        level_data = np.ones(level_row.size, dtype=np.float64)
        communities = sparse.coo_matrix((level_data, (level_row, level_col)),
                                        shape=(number_of_nodes, number_of_communities))

        if communities.getnnz() == 0:
            break

        features = sparse.hstack([features, communities])

        reverse_index_csc = copy.copy(communities)
        reverse_index_csc = reverse_index_csc.tocsc()
        reverse_index_csc = reverse_index_csc[:, np.array(next_level_communities)]
        reverse_index_csr = reverse_index_csc.tocsr()

        reverse_index_rows = np.ndarray(number_of_nodes, dtype=np.ndarray)
        reverse_index_cols = np.ndarray(len(next_level_communities), dtype=np.ndarray)
        for n in range(number_of_nodes):
            reverse_index_row = reverse_index_csr.getrow(n)
            reverse_index_rows[n] = reverse_index_row.indices

            if n < len(next_level_communities):
                reverse_index_col = reverse_index_csc.getcol(n)
                reverse_index_cols[n] = reverse_index_col.indices

        if len(next_level_communities) > 1:
            flag = True

        iteration += 1
        print('Iteration: ', iteration)
        print('List length', len(next_level_communities))

    return features


def community_neighbors(c_j, reverse_index_rows, unavailable_communities, unavailable_communities_counter):
    """
    Finds communities with shared nodes to a seed community. Called by mroc.

    Inputs:  - c_j: The seed community for which we want to find which communities overlap.
             - reverse_index_rows: A node to community indicator matrix.
             - unavailable_communities: A set of communities that have already either been merged or failed to merge.
             - unavailable_communities_counter: The number of such communities.

    Outputs: - indices: An array containing the communities that exhibit overlap with the seed community.
    """
    indices = list()
    extend = indices.extend
    for node in c_j:
        extend(reverse_index_rows[node])

    indices = np.array(indices)
    indices = np.setdiff1d(indices, unavailable_communities[:unavailable_communities_counter+1])

    return indices


def jaccard(c_1, c_2):
    """
    Calculates the Jaccard similarity between two sets of nodes. Called by mroc.

    Inputs:  - c_1: Community (set of nodes) 1.
             - c_2: Community (set of nodes) 2.

    Outputs: - jaccard_similarity: The Jaccard similarity of these two communities.
    """
    nom = np.intersect1d(c_1, c_2).size
    denom = np.union1d(c_1, c_2).size
    return nom/denom


def louvain(adjacency_matrix):
    """
    Performs community embedding using the LOUVAIN method.

    Introduced in: Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008).
                   Fast unfolding of communities in large networks.
                   Journal of Statistical Mechanics: Theory and Experiment, 2008(10), P10008.

    Inputs:  - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix.

    Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix.
    """
    # Convert to networkx undirected graph.
    adjacency_matrix = nx.from_scipy_sparse_matrix(adjacency_matrix, create_using=nx.Graph())

    # Call LOUVAIN algorithm to calculate a hierarchy of communities.
    tree = community.generate_dendogram(adjacency_matrix, part_init=None)

    # Embed communities
    row = list()
    col = list()
    append_row = row.append
    append_col = col.append

    community_counter = 0
    for i in range(len(tree)):
        partition = community.partition_at_level(tree, i)
        for n, c in partition.items():
            append_row(n)
            append_col(community_counter + c)

        community_counter += max(partition.values()) + 1

    row = np.array(row)
    col = np.array(col)
    data = np.ones(row.size, dtype=np.float64)

    louvain_features = sparse.coo_matrix((data, (row, col)), shape=(len(partition.keys()), community_counter),
                                         dtype=np.float64)

    return louvain_features


def laplacian_eigenmaps(adjacency_matrix, k):
    """
    Performs spectral graph embedding using the graph symmetric normalized Laplacian matrix.

    Introduced in: Belkin, M., & Niyogi, P. (2003).
                   Laplacian eigenmaps for dimensionality reduction and data representation.
                   Neural computation, 15(6), 1373-1396.

    Inputs:  -   A in R^(nxn): Adjacency matrix of an network represented as a SciPy Sparse COOrdinate matrix.
             -              k: The number of eigenvectors to extract.

    Outputs: - X in R^(nxk): The latent space embedding represented as a NumPy array. We discard the first eigenvector.
    """
    # Calculate sparse graph Laplacian.
    laplacian = get_normalized_laplacian(adjacency_matrix)

    # Calculate bottom k+1 eigenvalues and eigenvectors of normalized Laplacian.
    try:
        eigenvalues, eigenvectors = spla.eigsh(laplacian,
                                               k=k,
                                               which='SM',
                                               return_eigenvectors=True)
    except spla.ArpackNoConvergence as e:
        print("ARPACK has not converged.")
        eigenvalue = e.eigenvalues
        eigenvectors = e.eigenvectors

    # Discard the eigenvector corresponding to the zero-valued eigenvalue.
    eigenvectors = eigenvectors[:, 1:]

    return eigenvectors


def replicator_eigenmaps(adjacency_matrix, k):
    """
    Performs spectral graph embedding on the centrality reweighted adjacency matrix

    Inputs:  - A in R^(nxn): Adjacency matrix of an undirected network represented as a scipy.sparse.coo_matrix
             -            k: The number of social dimensions/eigenvectors to extract
             -      max_iter: The maximum number of iterations for the iterative eigensolution method

    Outputs: - S in R^(nxk): The social dimensions represented as a numpy.array matrix
    """
    number_of_nodes = adjacency_matrix.shape[0]

    max_eigenvalue = spla.eigsh(adjacency_matrix,
                                k=1,
                                which='LM',
                                return_eigenvectors=False)

    # Calculate Replicator matrix
    eye_matrix = sparse.eye(number_of_nodes, number_of_nodes, dtype=np.float64)
    eye_matrix = eye_matrix.tocsr()
    eye_matrix.data = eye_matrix.data*max_eigenvalue
    replicator = eye_matrix - adjacency_matrix

    # Calculate bottom k+1 eigenvalues and eigenvectors of normalised Laplacian
    try:
        eigenvalues, eigenvectors = spla.eigsh(replicator,
                                               k=k+1,
                                               which='SM',
                                               return_eigenvectors=True)
    except spla.ArpackNoConvergence as e:
        print("ARPACK has not converged.")
        eigenvalue = e.eigenvalues
        eigenvectors = e.eigenvectors

    eigenvectors = eigenvectors[:, 1:]

    return eigenvectors


def base_communities(adjacency_matrix):
    """
    Forms the community indicator normalized feature matrix for any graph.

    Inputs:  - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix.

    Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix.
    """
    number_of_nodes = adjacency_matrix.shape[0]

    # X = A + I
    adjacency_matrix = adjacency_matrix.tocsr()
    adjacency_matrix = adjacency_matrix.transpose()
    features = sparse.csr_matrix(sparse.eye(number_of_nodes, number_of_nodes)) + adjacency_matrix.tocsr()
    features = features.tocsr()
    features.data = np.ones_like(features.data)

    return features