```import numpy as np
import scipy.sparse as sp

from collections import Counter

from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer, normalize

def to_binary_bag_of_words(features):
"""Converts TF/IDF features to binary bag-of-words features."""
features_copy = features.tocsr()
features_copy.data[:] = 1.0
return features_copy

"""Compute D^-1/2 * A * D^-1/2."""
# Make sure that there are no self-loops
A = eliminate_self_loops(A)
D = np.ravel(A.sum(1))
D[D == 0] = 1  # avoid division by 0 error
D_sqrt = np.sqrt(D)
return A / D_sqrt[:, None] / D_sqrt[None, :]

"""Renormalize the adjacency matrix (as in the GCN paper)."""
A_tilde = A.tolil()
A_tilde.setdiag(1)
A_tilde = A_tilde.tocsr()
A_tilde.eliminate_zeros()
D = np.ravel(A.sum(1))
D_sqrt = np.sqrt(D)
return A / D_sqrt[:, None] / D_sqrt[None, :]

def row_normalize(matrix):
"""Normalize the matrix so that the rows sum up to 1."""
return normalize(matrix, norm='l1', axis=1)

"""Set the diagonal."""
A = A.tolil()  # make sure we work on a copy of the original matrix
A.setdiag(value)
A = A.tocsr()
if value == 0:
A.eliminate_zeros()
return A

def eliminate_self_loops(A):
"""Remove self-loops from the adjacency matrix."""
A = A.tolil()
A.setdiag(0)
A = A.tocsr()
A.eliminate_zeros()
return A

def largest_connected_components(sparse_graph, n_components=1):
"""Select the largest connected components in the graph.

Parameters
----------
sparse_graph : SparseGraph
Input graph.
n_components : int, default 1
Number of largest connected components to keep.

Returns
-------
sparse_graph : SparseGraph
Subgraph of the input graph where only the nodes in largest n_components are kept.

"""
component_sizes = np.bincount(component_indices)
components_to_keep = np.argsort(component_sizes)[::-1][:n_components]  # reverse order to sort descending
nodes_to_keep = [
idx for (idx, component) in enumerate(component_indices) if component in components_to_keep
]
return create_subgraph(sparse_graph, nodes_to_keep=nodes_to_keep)

def create_subgraph(sparse_graph, _sentinel=None, nodes_to_remove=None, nodes_to_keep=None):
"""Create a graph with the specified subset of nodes.

Exactly one of (nodes_to_remove, nodes_to_keep) should be provided, while the other stays None.
Note that to avoid confusion, it is required to pass node indices as named arguments to this function.

Parameters
----------
sparse_graph : SparseGraph
Input graph.
_sentinel : None
Internal, to prevent passing positional arguments. Do not use.
nodes_to_remove : array-like of int
Indices of nodes that have to removed.
nodes_to_keep : array-like of int
Indices of nodes that have to be kept.

Returns
-------
sparse_graph : SparseGraph
Graph with specified nodes removed.

"""
# Check that arguments are passed correctly
if _sentinel is not None:
raise ValueError("Only call `create_subgraph` with named arguments',"
" (nodes_to_remove=...) or (nodes_to_keep=...)")
if nodes_to_remove is None and nodes_to_keep is None:
raise ValueError("Either nodes_to_remove or nodes_to_keep must be provided.")
elif nodes_to_remove is not None and nodes_to_keep is not None:
raise ValueError("Only one of nodes_to_remove or nodes_to_keep must be provided.")
elif nodes_to_remove is not None:
nodes_to_keep = [i for i in range(sparse_graph.num_nodes()) if i not in nodes_to_remove]
elif nodes_to_keep is not None:
nodes_to_keep = sorted(nodes_to_keep)
else:
raise RuntimeError("This should never happen.")

if sparse_graph.attr_matrix is not None:
sparse_graph.attr_matrix = sparse_graph.attr_matrix[nodes_to_keep]
if sparse_graph.labels is not None:
sparse_graph.labels = sparse_graph.labels[nodes_to_keep]
if sparse_graph.node_names is not None:
sparse_graph.node_names = sparse_graph.node_names[nodes_to_keep]
return sparse_graph

def binarize_labels(labels, sparse_output=False, return_classes=False):
"""Convert labels vector to a binary label matrix.

In the default single-label case, labels look like
labels = [y1, y2, y3, ...].
Also supports the multi-label format.
In this case, labels should look something like
labels = [[y11, y12], [y21, y22, y23], [y31], ...].

Parameters
----------
labels : array-like, shape [num_samples]
Array of node labels in categorical single- or multi-label format.
sparse_output : bool, default False
Whether return the label_matrix in CSR format.
return_classes : bool, default False
Whether return the classes corresponding to the columns of the label matrix.

Returns
-------
label_matrix : np.ndarray or sp.csr_matrix, shape [num_samples, num_classes]
Binary matrix of class labels.
num_classes = number of unique values in "labels" array.
label_matrix[i, k] = 1 <=> node i belongs to class k.
classes : np.array, shape [num_classes], optional
Classes that correspond to each column of the label_matrix.

"""
if hasattr(labels[0], '__iter__'):  # labels[0] is iterable <=> multilabel format
binarizer = MultiLabelBinarizer(sparse_output=sparse_output)
else:
binarizer = LabelBinarizer(sparse_output=sparse_output)
label_matrix = binarizer.fit_transform(labels).astype(np.float32)
return (label_matrix, binarizer.classes_) if return_classes else label_matrix

def remove_underrepresented_classes(g, train_examples_per_class, val_examples_per_class):
"""Remove nodes from graph that correspond to a class of which there are less than
num_classes * train_examples_per_class + num_classes * val_examples_per_class nodes.

Those classes would otherwise break the training procedure.
"""
min_examples_per_class = train_examples_per_class + val_examples_per_class
examples_counter = Counter(g.labels)
keep_classes = set(class_ for class_, count in examples_counter.items() if count > min_examples_per_class)
keep_indices = [i for i in range(len(g.labels)) if g.labels[i] in keep_classes]

return create_subgraph(g, nodes_to_keep=keep_indices)
```