```#!/usr/bin/env python
"""Provides utilities for aligning graphs."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import random
import numpy as np
import networkx as nx
from eden.graph import vertex_vectorize
from sklearn.neighbors import NearestNeighbors
from itertools import product
from collections import defaultdict
from eden.display import draw_graph

import logging

def stable(rankings, list_a, list_b):
"""Compute stable alignment.

rankings[(a, n)] = partner that a ranked n^th
>>> from itertools import product
>>> A = ['1','2','3','4','5','6']
>>> B = ['a','b','c','d','e','f']
>>> rank = dict()
>>> rank['1'] = (1,4,2,6,5,3)
>>> rank['2'] = (3,1,2,4,5,6)
>>> rank['3'] = (1,2,4,3,5,6)
>>> rank['4'] = (4,1,2,5,3,6)
>>> rank['5'] = (1,2,3,6,4,5)
>>> rank['6'] = (2,1,4,3,5,6)
>>> rank['a'] = (1,2,3,4,5,6)
>>> rank['b'] = (2,1,4,3,5,6)
>>> rank['c'] = (5,1,6,3,2,4)
>>> rank['d'] = (1,3,2,5,4,6)
>>> rank['e'] = (4,1,3,6,2,5)
>>> rank['f'] = (2,1,4,3,6,5)
>>> Arankings = dict(((a, rank[a][b_]), B[b_]) for (a, b_) in product(A, range(0, 6)))
>>> Brankings = dict(((b, rank[b][a_]), A[a_]) for (b, a_) in product(B, range(0, 6)))
>>> rankings = Arankings
>>> rankings.update(Brankings)
>>> stable(rankings, A, B)
[('1', 'a'), ('2', 'b'), ('3', 'd'), ('4', 'f'), ('5', 'c'), ('6', 'e')]
"""
partners = dict((a, (rankings[(a, 1)], 1)) for a in list_a)
# whether the current pairing (given by `partners`) is stable
is_stable = False
while is_stable is False:
is_stable = True
for b in list_b:
is_paired = False  # whether b has a pair which b ranks <= to n
for n in range(1, len(list_b) + 1):
a = rankings[(b, n)]
a_partner, a_n = partners[a]
if a_partner == b:
if is_paired:
is_stable = False
partners[a] = (rankings[(a, a_n + 1)], a_n + 1)
else:
is_paired = True
stable_list = sorted((a, b) for (a, (b, n)) in partners.items())
return stable_list

def init_vec(orig_graph):
graph = orig_graph.copy()
for u in graph.nodes():
graph.node[u]['vec'] = []
return graph

def annotate_with_bfs(orig_graph, start, max_depth=20):
graph = orig_graph.copy()
for u in graph.nodes():
graph.node[u]['vec'].append(1)

visited, queue, dist = set(), [start], dict()
dist[start] = 0
while queue:
vertex = queue.pop(0)
if dist[vertex] <= max_depth:
if vertex not in visited:
val = max_depth - dist[vertex]
graph.node[vertex]['vec'][-1] = val
next_nodes = [u for u in graph.neighbors(vertex)
if u not in visited]
next_dist = dist[vertex] + 1
for u in next_nodes:
dist[u] = next_dist
queue.extend(next_nodes)
return graph

def make_same_size(GA_orig, GB_orig):
GA = GA_orig.copy()
GB = GB_orig.copy()

na = len(GA)
nb = len(GB)
if na > nb:
for i in range(nb, na):
if nb > na:
for i in range(na, nb):
return GA, GB

def trim_pairings(pairings, GA_orig, GB_orig):
npairings = []
for a, b in pairings:
i, j = int(a[1:]) - 1, int(b[1:]) - 1
if i < len(GA_orig) and j < len(GB_orig):
npairings.append((i, j))

return npairings

def match(GA_orig, GB_orig, order=3, max_depth=10, complexity=4):
if len(GA_orig) > len(GB_orig):
GA, GB = GB_orig.copy(), GA_orig.copy()
logging.warning('Warning: reference graph is B not A')
else:
GA, GB = GA_orig.copy(), GB_orig.copy()
# logging.warning('Matching graph A (%d nodes) to graph B (%d nodes)' % (len(GA_orig), len(GB_orig)))

GA, GB = make_same_size(GA, GB)

M = vertex_vectorize([GA, GB], complexity=complexity, normalization=True, inner_normalization=True)
MA, MB = M[0], M[1]

nnA = NearestNeighbors(n_neighbors=len(GA)).fit(MA)
d, BprefA = nnA.kneighbors(MB)

nnB = NearestNeighbors(n_neighbors=len(GB)).fit(MB)
d, AprefB = nnB.kneighbors(MA)

# mark bfv in vec attribute
GA, GB = init_vec(GA), init_vec(GB)
for k in range(order):
ds = d[:, 0]
id_max_A = np.argsort(ds)[k]
id_max_B = AprefB[id_max_A][0]

GA = annotate_with_bfs(GA, id_max_A, max_depth=max_depth)
GB = annotate_with_bfs(GB, id_max_B, max_depth=max_depth)
# draw_graph_set([GA,GB],n_graphs_per_line=2, size=9, secondary_vertex_label='vec')

# vectorize 2nd time with real values this time
M = vertex_vectorize([GA, GB], complexity=complexity, discrete=False, normalization=False, inner_normalization=False)
MA, MB = M[0], M[1]

nnA = NearestNeighbors(n_neighbors=len(GA)).fit(MA)
d, BprefA = nnA.kneighbors(MB)

nnB = NearestNeighbors(n_neighbors=len(GB)).fit(MB)
d, AprefB = nnB.kneighbors(MA)

A = ['A%d' % (i + 1) for i in range(len(GA))]
B = ['B%d' % (i + 1) for i in range(len(GB))]

Arankings = dict(((A[i], j + 1), B[AprefB[i, j]]) for i, j in product(range(len(GA)), range(len(GA))))
Brankings = dict(((B[i], j + 1), A[BprefA[i, j]]) for i, j in product(range(len(GB)), range(len(GB))))

rankings = Arankings
rankings.update(Brankings)
pairings = stable(rankings, A, B)

# remove dummy node pairings
npairings = trim_pairings(pairings, GA_orig, GB_orig)
orderA, orderB = list(zip(*sorted(npairings)))
return orderB

def _max_common_subgraph(GA, GB, pairings):
matches = dict([(i, j) for i, j in enumerate(pairings)])
node_ids = []
for i, j in GA.edges():
ii = matches[i]
jj = matches[j]
li = GA.node[i]['label']
lii = GB.node[ii]['label']
lj = GA.node[j]['label']
ljj = GB.node[jj]['label']
if ((ii, jj) in GB.edges() or (jj, ii) in GB.edges()) and li == lii and lj == ljj:
node_ids.append(ii)
node_ids.append(jj)
G = nx.subgraph(GB, node_ids)
cc = nx.connected_components(G)
return cc, G

def max_common_subgraph(GA, GB, pairings):
cc, G = _max_common_subgraph(GA, GB, pairings)
len_ccs = [(len(ci), ci) for ci in cc]
if not len_ccs:
return None
n_cc, max_cc_ids = max(len_ccs)
max_cc = nx.subgraph(G, max_cc_ids)
return max_cc

def max_common_subgraphs(GA, GB, pairings):
cc, G = _max_common_subgraph(GA, GB, pairings)
cc_graphs = [nx.subgraph(G, ci) for ci in cc]
return cc_graphs

def compute_matching_edges_fraction(GA, GB, pairings):
count = 0
matches = dict([(i, j) for i, j in enumerate(pairings)])
for i, j in GA.edges():
ii = matches[i]
jj = matches[j]
if (ii, jj) in GB.edges():
count += 1
return float(count) / len(GA.edges())

def compute_matching_neighborhoods_fraction(GA, GB, pairings):
count = 0
matches = dict([(i, j) for i, j in enumerate(pairings)])
matching_edges = defaultdict(list)
for i, j in GA.edges():
ii = matches[i]
jj = matches[j]
if (ii, jj) in GB.edges():
matching_edges[i].append(j)
matching_edges[j].append(i)
for u in GA.nodes():
if matching_edges.get(u, False):
neighbors = nx.neighbors(GA, u)
matches_neighborhood = True
for v in neighbors:
if v not in matching_edges[u]:
matches_neighborhood = False
break
if matches_neighborhood:
count += 1
return float(count) / len(GA.nodes())

def compute_max_common_subgraph_size(GA, GB, pairings):
G = max_common_subgraph(GA, GB, pairings)
if G is None:
return 0
return len(G) / len(GB)

def compute_max_common_subgraphs_size(GA, GB, pairings):
gs = max_common_subgraphs(GA, GB, pairings)
if not gs:
return 0
return sum(len(g) if g else 0 for g in gs) / float(len(GB))

def compute_quality(GA, GB, pairings):
return compute_max_common_subgraphs_size(GA, GB, pairings)

def random_optimize(GA, GB, n_iter=20):
best_c = None
best_order = None
best_depth = None
best_quality = -1
for it in range(n_iter):
c = random.randint(1, 7)
order = random.randint(1, 10)
depth = random.randint(1, 20)
pairings = match(GA, GB, complexity=c, order=order, max_depth=depth)
quality = compute_quality(GA, GB, pairings)
if quality > best_quality:
best_quality = quality
best_c = c
best_order = order
best_depth = depth
logging.debug('[random search] quality:%.2f c:%d o:%d d:%d' % (best_quality, best_c, best_order, best_depth))
return best_quality, best_c, best_order, best_depth

def line_optimize(
GA,
GB,
n_iter,
best_c,
best_order,
best_depth):

best_quality = -1
c_range = range(8, 0, -1)
order_range = range(10, 0, -1)
depth_range = range(20, 0, -1)

for it in range(n_iter):
c = best_c
order = best_order
depth = best_depth
for c in c_range:
pairings = match(GA, GB, complexity=c, order=order, max_depth=depth)
quality = compute_quality(GA, GB, pairings)
if quality >= best_quality:
best_quality = quality
best_c = c
best_order = order
best_depth = depth
c = best_c
order = best_order
depth = best_depth
for order in order_range:
pairings = match(GA, GB, complexity=c, order=order, max_depth=depth)
quality = compute_quality(GA, GB, pairings)
if quality >= best_quality:
best_quality = quality
best_c = c
best_order = order
best_depth = depth
c = best_c
order = best_order
depth = best_depth
for depth in depth_range:
pairings = match(GA, GB, complexity=c, order=order, max_depth=depth)
quality = compute_quality(GA, GB, pairings)
if quality >= best_quality:
best_quality = quality
best_c = c
best_order = order
best_depth = depth
logging.debug('[line search]   quality:%.2f c:%d o:%d d:%d' % (best_quality, best_c, best_order, best_depth))
return best_quality, best_c, best_order, best_depth

def optimize(GA, GB, n_iter_random=20, n_iter_line=4):
best_quality, best_c, best_order, best_depth = random_optimize(GA, GB, n_iter=n_iter_random)
best_quality, best_c, best_order, best_depth = line_optimize(GA, GB, n_iter_line, best_c, best_order, best_depth)
return best_quality, best_c, best_order, best_depth

def draw_match(GA, GB, pairings, size=10):
G = nx.disjoint_union(GA, GB)
for i, jj in enumerate(pairings):
j = len(GA) + jj
if G.node[i]['label'] == G.node[j]['label']:
draw_graph(
G,
size=size,
vertex_border=False,
vertex_size=400,
edge_label=None,
edge_alpha=.2,
dark_edge_color='label',
dark_edge_dotted=False,
dark_edge_alpha=1,
colormap='Set2',
ignore_for_layout='nesting')

# -----------------------------------------------------------------------------

class GraphAligner(object):
"""Aligns graphs."""

def __init__(self, complexity=3, order=1, max_depth=5):
"""construct."""
self.set_params(complexity, order, max_depth)
self.pairings = None
self.quality = None
self.max_common_graph = None

def set_params(self, complexity=3, order=1, max_depth=5):
self.complexity = complexity
self.order = order
self.max_depth = max_depth

def _store(self, graph_a, graph_b):
GA = graph_a.copy()
GB = graph_b.copy()
if len(GA) > len(GB):
GA, GB = GB, GA
self.ref_graph = GA
self.second_ref_graph = GB

def _optimize(self, n_iter_random=30, n_iter_line=3):
GA, GB = self.ref_graph, self.second_ref_graph
q, c, o, d = optimize(
GA,
GB,
n_iter_random=n_iter_random,
n_iter_line=n_iter_line)
self.quality = q
self.complexity = c
self.order = o
self.max_depth = d

def match(self, graph_a, graph_b, n_iter_random=30, n_iter_line=3):
self._store(graph_a, graph_b)
self._optimize(n_iter_random, n_iter_line)
self._match(graph_a, graph_b)
return self

def _match(self, graph_a, graph_b):
self._store(graph_a, graph_b)
GA, GB = self.ref_graph, self.second_ref_graph
self.pairings = match(
GA,
GB,
complexity=self.complexity,
order=self.order,
max_depth=self.max_depth)
self.quality = compute_quality(GA, GB, self.pairings)
return self

def get_matching_quality(self):
return self.quality

def draw_match(self):
GA, GB = self.ref_graph, self.second_ref_graph
draw_match(GA, GB, self.pairings)
return self

def max_common_subgraph(self):
GA, GB = self.ref_graph, self.second_ref_graph
self.max_common_graph = max_common_subgraph(GA, GB, self.pairings)
return self.max_common_graph

def max_common_subgraphs(self):
GA, GB = self.ref_graph, self.second_ref_graph
self.common_graphs = max_common_subgraphs(GA, GB, self.pairings)
return self.common_graphs
```