""" ASNE model implementation.""" import json import math import random import numpy as np import pandas as pd import networkx as nx from tqdm import tqdm import tensorflow as tf from scipy import sparse from utils import map_edges from texttable import Texttable class ASNE: """ A method to parse up command line parameters. By default it gives an embedding of the Wiki Chameleons. The default hyperparameters give a good quality representation without grid search. Representations are sorted by node ID. """ def __init__(self, args, graph, features): """ Constructor of ASNE object. :param args: Arguments object. :param graph: Networkx graph. :param features: Dictionary of features. """ self.args = args self.graph = graph self.features = features self.edges = map_edges(self.graph) self.nodes = self.graph.nodes() self.node_count = len(self.nodes) self.feature_count = max(map(lambda x: max(x+[0]), self.features.values())) + 1 self._build_model() def _setup_variables(self): """ Creating TensorFlow variables and placeholders. """ self.node_embedding = tf.random_uniform([self.node_count, self.args.node_embedding_dimensions], -1.0, 1.0) self.node_embedding = tf.Variable(self.node_embedding, dtype=tf.float32) self.feature_embedding = tf.random_uniform([self.feature_count, self.args.feature_embedding_dimensions], -1.0, 1.0) self.feature_embedding = tf.Variable(self.feature_embedding, dtype=tf.float32) self.combined_dimensions = self.args.node_embedding_dimensions + self.args.feature_embedding_dimensions self.noise_embedding = tf.Variable(tf.truncated_normal([self.node_count, self.combined_dimensions], stddev=1.0/math.sqrt(self.combined_dimensions)), dtype=tf.float32) self.noise_bias = tf.Variable(tf.zeros([self.node_count]), dtype=tf.float32) self.noise_bias = tf.Variable(tf.zeros([self.node_count]), dtype=tf.float32) self.left_nodes = tf.placeholder(tf.int32, shape=[None]) self.node_features = tf.sparse_placeholder(tf.float32, shape=[None, self.feature_count]) self.right_nodes = tf.placeholder(tf.int32, shape=[None, 1]) def _build_model(self): """ Creating computation graph of ASNE. """ self.graph = tf.Graph() with self.graph.as_default(): self._setup_variables() self.node_embed = tf.nn.embedding_lookup(self.node_embedding, self.left_nodes, max_norm=1) self.feature_embed = tf.sparse_tensor_dense_matmul(self.node_features, self.feature_embedding) self.combined_embed = tf.cast(tf.concat([self.node_embed, self.args.alpha*self.feature_embed], 1), tf.float32) self.loss = tf.reduce_mean(tf.nn.sampled_softmax_loss(weights=self.noise_embedding, biases=self.noise_bias, labels=self.right_nodes, inputs=self.combined_embed, num_sampled=self.args.negative_samples, num_classes=self.node_count)) self.optimizer = tf.train.AdamOptimizer().minimize(self.loss) init = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init) def _generate_batch(self, i): """ Creating a batch of node indices and features. :param i: Batch index :return feed_dict: Dictionary with numpy arras for indices and features. """ left_nodes = np.array([e[0] for e in self.edges[self.args.batch_size*i:self.args.batch_size*(i+1)]]) right_nodes = np.array([[e[1]] for e in self.edges[self.args.batch_size*i:self.args.batch_size*(i+1)]]) node_indices = [index for index, edge in enumerate(self.edges[self.args.batch_size*i:self.args.batch_size*(i+1)]) for feature in self.features[edge[0]]] feature_indices = [feature for edge in self.edges[self.args.batch_size*i:self.args.batch_size*(i+1)] for feature in self.features[edge[0]]] values = np.ones(len(node_indices)) features = sparse.coo_matrix((values, (node_indices, feature_indices)), shape=(self.args.batch_size, self.feature_count), dtype=np.float32) features = tf.SparseTensorValue(indices=np.array([features.row, features.col]).T, values=features.data, dense_shape=features.shape) feed_dict = {self.left_nodes: left_nodes, self.node_features: features, self.right_nodes: right_nodes} return feed_dict def _optimize(self, feed_dict): """ Running weight optimization on a batch. :param feed_dict: Dictionary with inputs. """ loss, opt = self.sess.run((self.loss, self.optimizer), feed_dict=feed_dict) self.costs = self.costs + [loss] def _epoch_start(self, epoch): """ Printing the epoch number and setting up the cost list. :param epoch: Epoch number. """ random.shuffle(self.edges) self.costs = [] t = Texttable() t.add_rows([["Epoch: ", str(epoch+1)+"/"+str(self.args.epochs)+"."]]) print(t.draw()) def _epoch_end(self, epoch): """ Printing the epoch average loss. :param epoch: Epoch number. """ t = Texttable() t.add_rows([["Average Loss: ", round(np.mean(self.costs), 4)]]) print(t.draw()) def train(self): """ Training the ASNE model. """ self.total_batch = int(len(self.edges) / self.args.batch_size) for epoch in range(self.args.epochs): self._epoch_start(epoch) for i in tqdm(range(self.total_batch)): feed_dict = self._generate_batch(i) self._optimize(feed_dict) self._epoch_end(epoch) def save_embedding(self): """ Saving the embedding at the default path. """ print("\nSaving the embedding.\n") embedding = self.sess.run(self.noise_embedding) ids = np.array(self.nodes).reshape(-1, 1) embedding = np.concatenate([ids, embedding], axis=1) columns = ["id"] + list(map(lambda x: "X_"+str(x), range(embedding.shape[1]-1))) embedding = pd.DataFrame(embedding, columns=columns) embedding.to_csv(self.args.output_path, index=None)