
import org.apache.commons.lang3.ArrayUtils;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.Utils;
import org.jgrapht.EdgeFactory;
import org.jgrapht.alg.CycleDetector;
import org.jgrapht.graph.DefaultDirectedGraph;

import java.util.*;
import java.util.function.Supplier;

 * Common code for graphs used for local assembly.
public abstract class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends DefaultDirectedGraph<V, E> {
    private static final long serialVersionUID = 1l;
    protected final int kmerSize;

     * Construct a TestGraph with kmerSize
     * @param kmerSize
    protected BaseGraph(final int kmerSize, final EdgeFactory<V,E> edgeFactory) {
        Utils.validateArg(kmerSize > 0, () -> "kmerSize must be > 0 but got " + kmerSize);
        this.kmerSize = kmerSize;

     * How big of a kmer did we use to create this graph?
     * @return
    public final int getKmerSize() {
        return kmerSize;

     * @param v the vertex to test
     * @return  true if this vertex is a reference node (meaning that it appears on the reference path in the graph)
    public final boolean isReferenceNode( final V v ) {
        Utils.nonNull(v, "Attempting to test a null vertex.");

        if (edgesOf(v).stream().anyMatch(e -> e.isRef())){
            return true;

        // edge case: if the graph only has one node then it's a ref node, otherwise it's not
        return vertexSet().size() == 1;

     * @param v the vertex to test
     * @return  true if this vertex is a source node (in degree == 0)
    public final boolean isSource( final V v ) {
        Utils.nonNull(v, "Attempting to test a null vertex.");
        return inDegreeOf(v) == 0;

     * @param v the vertex to test
     * @return  true if this vertex is a sink node (out degree == 0)
    public final boolean isSink( final V v ) {
        Utils.nonNull(v, "Attempting to test a null vertex.");
        return outDegreeOf(v) == 0;

     * Get the set of source vertices of this graph
     * @return a non-null set
    public final Set<V> getSources() {
        return vertexSet().stream().filter(v -> isSource(v)).collect(Collectors.toSet());

     * Get the set of sink vertices of this graph
     * @return a non-null set
    public final Set<V> getSinks() {
        return vertexSet().stream().filter(v -> isSink(v)).collect(Collectors.toSet());

     * Convert this kmer graph to a simple sequence graph.
     * Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer
     * graph.  Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence
     * @return a newly allocated SequenceGraph
    public SeqGraph toSequenceGraph() {
        final SeqGraph seqGraph = new SeqGraph(kmerSize);
        final Map<V, SeqVertex> vertexMap = new HashMap<>();

        // create all of the equivalent seq graph vertices
        for ( final V dv : vertexSet() ) {
            final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv)));
            vertexMap.put(dv, sv);

        // walk through the nodes and connect them to their equivalent seq vertices
        for( final E e : edgeSet() ) {
            final SeqVertex seqInV = vertexMap.get(getEdgeSource(e));
            final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e));
            seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity()));

        return seqGraph;

     * Pull out the additional sequence implied by traversing this node in the graph
     * @param v the vertex from which to pull out the additional base sequence
     * @return  non-null byte array
    public final byte[] getAdditionalSequence( final V v ) {
        Utils.nonNull(v, "Attempting to pull sequence from a null vertex.");
        return v.getAdditionalSequence(isSource(v));

     * @param v the vertex to test
     * @return  true if this vertex is a reference source
    public final boolean isRefSource( final V v ) {
        Utils.nonNull(v, "Attempting to pull sequence from a null vertex.");

        // confirm that no incoming edges are reference edges
        if (incomingEdgesOf(v).stream().anyMatch(e -> e.isRef())) {
            return false;

        // confirm that there is an outgoing reference edge
        if (outgoingEdgesOf(v).stream().anyMatch(e -> e.isRef())) {
            return true;

        // edge case: if the graph only has one node then it's a ref source, otherwise it's not
        return vertexSet().size() == 1;

     * @param v the vertex to test
     * @return  true if this vertex is a reference sink
    public final boolean isRefSink( final V v ) {
        Utils.nonNull(v, "Attempting to pull sequence from a null vertex.");

        // confirm that no outgoing edges are reference edges
        if (outgoingEdgesOf(v).stream().anyMatch(e -> e.isRef())) {
            return false;

        // confirm that there is an incoming reference edge
        if (incomingEdgesOf(v).stream().anyMatch(e -> e.isRef())) {
            return true;

        // edge case: if the graph only has one node then it's a ref sink, otherwise it's not
        return vertexSet().size() == 1;

     * @return the reference source vertex pulled from the graph, can be null if it doesn't exist in the graph
    public final V getReferenceSourceVertex( ) {
        return vertexSet().stream().filter(v -> isRefSource(v)).findFirst().orElse(null);

     * @return the reference sink vertex pulled from the graph, can be null if it doesn't exist in the graph
    public final V getReferenceSinkVertex( ) {
        return vertexSet().stream().filter(v -> isRefSink(v)).findFirst().orElse(null);

     * Traverse the graph and get the next reference vertex if it exists
     * @param v the current vertex, can be null
     * @return  the next reference vertex if it exists, otherwise null
    public final V getNextReferenceVertex( final V v ) {
        return getNextReferenceVertex(v, false, Optional.<E>empty());

     * Traverse the graph and get the next reference vertex if it exists
     * @param v the current vertex, can be null
     * @param allowNonRefPaths if true, allow sub-paths that are non-reference if there is only a single outgoing edge
     * @param blacklistedEdge optional edge to ignore in the traversal down; useful to exclude the non-reference dangling paths
     * @return the next vertex (but not necessarily on the reference path if allowNonRefPaths is true) if it exists, otherwise null
    public final V getNextReferenceVertex( final V v, final boolean allowNonRefPaths, final Optional<E> blacklistedEdge ) {
        if( v == null ) { return null; }

        final Set<E> outgoingEdges = outgoingEdgesOf(v);

        if (outgoingEdges.isEmpty()){
            return null;

        for( final E edgeToTest : outgoingEdges ) {
            if( edgeToTest.isRef() ) {
                return getEdgeTarget(edgeToTest);

        if (!allowNonRefPaths){
            return null;

        //singleton or empty set
        final Set<E> blacklistedEdgeSet = blacklistedEdge.isPresent() ? Collections.singleton(blacklistedEdge.get()) : Collections.emptySet();

        // if we got here, then we aren't on a reference path
        final Optional<E> edge = -> !blacklistedEdgeSet.contains(e)).findAny();
        return edge.isPresent() ? getEdgeTarget(edge.get()) : null;

     * Traverse the graph and get the previous reference vertex if it exists
     * @param v the current vertex, can be null
     * @return  the previous reference vertex if it exists or null otherwise.
    public final V getPrevReferenceVertex( final V v ) {
        if( v == null ) { return null; }
        return incomingEdgesOf(v).stream().map(e -> getEdgeSource(e)).filter(vrtx -> isReferenceNode(vrtx)).findFirst().orElse(null);

     * Walk along the reference path in the graph and pull out the corresponding bases
     * @param fromVertex    starting vertex
     * @param toVertex      ending vertex
     * @param includeStart  should the starting vertex be included in the path
     * @param includeStop   should the ending vertex be included in the path
     * @return              byte[] array holding the reference bases, this can be null if there are no nodes between the starting and ending vertex (insertions for example)
    public final byte[] getReferenceBytes( final V fromVertex, final V toVertex, final boolean includeStart, final boolean includeStop ) {
        Utils.nonNull(fromVertex, "Starting vertex in requested path cannot be null.");
        Utils.nonNull(toVertex, "From vertex in requested path cannot be null.");

        byte[] bytes = null;
        V v = fromVertex;
        if( includeStart ) {
            bytes = ArrayUtils.addAll(bytes, getAdditionalSequence(v));
        v = getNextReferenceVertex(v); // advance along the reference path
        while( v != null && !v.equals(toVertex) ) {
            bytes = ArrayUtils.addAll(bytes, getAdditionalSequence(v));
            v = getNextReferenceVertex(v); // advance along the reference path
        if( includeStop && v != null && v.equals(toVertex)) {
            bytes = ArrayUtils.addAll(bytes, getAdditionalSequence(v));
        return bytes;

     * Convenience function to add multiple vertices to the graph at once
     * @param vertices one or more vertices to add
    public final void addVertices(final V... vertices) {

     * Convenience function to add multiple vertices to the graph at once
     * @param vertices one or more vertices to add
    public final void addVertices(final Collection<V> vertices) {
        vertices.forEach(v -> addVertex(v));

     * Convenience function to add multiple edges to the graph
     * @param start the first vertex to connect
     * @param remaining all additional vertices to connect
    public final void addEdges(final V start, final V... remaining) {
        Utils.nonNull(start, "start vertex");
        if (remaining == null || remaining.length == 0){
        V prev = start;
        for ( final V next : remaining ) {
            Utils.nonNull(next, "null vertex");
            addEdge(prev, next);
            prev = next;

     * Convenience function to add multiple edges to the graph
     * @param start the first vertex to connect
     * @param remaining all additional vertices to connect
    public final void addEdges(final Supplier<E> template, final V start, final V... remaining) {
        Utils.nonNull(template, "template edge");
        Utils.nonNull(start, "start vertex");

        V prev = start;
        for ( final V next : remaining ) {
            Utils.nonNull(next, "null vertex");
            addEdge(prev, next, template.get());
            prev = next;

     * Get the set of vertices connected by outgoing edges of V
     * @param v a non-null vertex
     * @return a set of vertices connected by outgoing edges from v
    public final Set<V> outgoingVerticesOf(final V v) {
        return outgoingEdgesOf(v).stream().map(e -> getEdgeTarget(e)).collect(Collectors.toSet());

     * Get the set of vertices connected to v by incoming edges
     * @param v a non-null vertex
     * @return a set of vertices {X} connected X -> v
    public final Set<V> incomingVerticesOf(final V v) {
        return incomingEdgesOf(v).stream().map(e -> getEdgeSource(e)).collect(Collectors.toSet());

     * Get the set of vertices connected to v by incoming or outgoing edges
     * @param v a non-null vertex
     * @return a set of vertices {X} connected X -> v or v -> Y
    public final Set<V> neighboringVerticesOf(final V v) {
        return Sets.union(incomingVerticesOf(v), outgoingVerticesOf(v));

     * Print out the graph in the dot language for visualization
     * @param destination File to write to
    public final void printGraph(final File destination, final int pruneFactor) {
        try (PrintStream stream = new PrintStream(new FileOutputStream(destination))) {
            printGraph(stream, true, pruneFactor);
        } catch ( final FileNotFoundException e ) {
            throw new UserException.CouldNotReadInputFile(destination, e);

    public final void printGraph(final PrintStream graphWriter, final boolean writeHeader, final int pruneFactor) {
        if ( writeHeader ) {
            graphWriter.println("digraph assemblyGraphs {");

        for( final E edge : edgeSet() ) {
            final String edgeString =  String.format("\t%s -> %s ", getEdgeSource(edge).toString(), getEdgeTarget(edge).toString());
            final String edgeLabelString;
            if (edge.getMultiplicity() > 0 && edge.getMultiplicity() < pruneFactor){
                edgeLabelString = String.format("[style=dotted,color=grey,label=\"%s\"];", edge.getDotLabel());
            } else {
                edgeLabelString = String.format("[label=\"%s\"];", edge.getDotLabel());
            if( edge.isRef() ) {
                graphWriter.println(edgeString + " [color=red];");

        for( final V v : vertexSet() ) {
            graphWriter.println(String.format("\t%s [label=\"%s\",shape=box]", v.toString(), new String(getAdditionalSequence(v)) + v.getAdditionalInfo()));

        if ( writeHeader ) {

     * Remove edges that are connected before the reference source and after the reference sink
     * Also removes all vertices that are orphaned by this process
    public final void cleanNonRefPaths() {
        if( getReferenceSourceVertex() == null || getReferenceSinkVertex() == null ) {

        // Remove non-ref edges connected before and after the reference path
        final Collection<E> edgesToCheck = new HashSet<>();
        while( !edgesToCheck.isEmpty() ) {
            final E e = edgesToCheck.iterator().next();
            if( !e.isRef() ) {
                edgesToCheck.addAll( incomingEdgesOf(getEdgeSource(e)) );

        while( !edgesToCheck.isEmpty() ) {
            final E e = edgesToCheck.iterator().next();
            if( !e.isRef() ) {
                edgesToCheck.addAll( outgoingEdgesOf(getEdgeTarget(e)) );


     * Prune all chains from this graph where all edges in the path have multiplicity < pruneFactor
     * @see LowWeightChainPruner for more information
     * @param pruneFactor all edges with multiplicity < this factor that aren't ref edges will be removed
    public final void pruneLowWeightChains( final int pruneFactor ) {
        new LowWeightChainPruner<V,E>(pruneFactor).pruneLowWeightChains(this);

     * Remove all vertices in the graph that have in and out degree of 0
    public void removeSingletonOrphanVertices() {
        // Run through the graph and clean up singular orphaned nodes
        //Note: need to collect nodes to remove first because we can't directly modify the list we're iterating over
        final List<V> toRemove = vertexSet().stream().filter(v -> isSingletonOrphan(v)).collect(Collectors.toList());

    private boolean isSingletonOrphan(final V v) {
        return inDegreeOf(v) == 0 && outDegreeOf(v) == 0 && !isRefSource(v);

     * Remove all vertices on the graph that cannot be accessed by following any edge,
     * regardless of its direction, from the reference source vertex
    public final void removeVerticesNotConnectedToRefRegardlessOfEdgeDirection() {
        final Collection<V> toRemove = new HashSet<>(vertexSet());

        final V refV = getReferenceSourceVertex();
        if ( refV != null ) {
            for ( final V v : new BaseGraphIterator<>(this, refV, true, true) ) {


     * Remove all vertices in the graph that aren't on a path from the reference source vertex to the reference sink vertex
     * More aggressive reference pruning algorithm than removeVerticesNotConnectedToRefRegardlessOfEdgeDirection,
     * as it requires vertices to not only be connected by a series of directed edges but also prunes away
     * paths that do not also meet eventually with the reference sink vertex
    public final void removePathsNotConnectedToRef() {
        if ( getReferenceSourceVertex() == null || getReferenceSinkVertex() == null ) {
            throw new IllegalStateException("Graph must have ref source and sink vertices");

        // get the set of vertices we can reach by going forward from the ref source
        final Collection<V> onPathFromRefSource = new HashSet<>(vertexSet().size());
        for ( final V v : new BaseGraphIterator<>(this, getReferenceSourceVertex(), false, true) ) {

        // get the set of vertices we can reach by going backward from the ref sink
        final Collection<V> onPathFromRefSink = new HashSet<>(vertexSet().size());
        for ( final V v : new BaseGraphIterator<>(this, getReferenceSinkVertex(), true, false) ) {

        // we want to remove anything that's not in both the sink and source sets
        final Collection<V> verticesToRemove = new HashSet<>(vertexSet());

        // simple sanity checks that this algorithm is working.
        if ( getSinks().size() > 1 ) {
            throw new IllegalStateException("Should have eliminated all but the reference sink, but found " + getSinks());

        if ( getSources().size() > 1 ) {
            throw new IllegalStateException("Should have eliminated all but the reference source, but found " + getSources());

     * Semi-lenient comparison of two graphs, truing true if g1 and g2 have similar structure
     * By similar this means that both graphs have the same number of vertices, where each vertex can find
     * a vertex in the other graph that's seqEqual to it.  A similar constraint applies to the edges,
     * where all edges in g1 must have a corresponding edge in g2 where both source and target vertices are
     * seqEqual
     * @param g1 the first graph to compare
     * @param g2 the second graph to compare
     * @param <T> the type of the nodes in those graphs
     * @return true if g1 and g2 are equals
    public static <T extends BaseVertex, E extends BaseEdge> boolean graphEquals(final BaseGraph<T,E> g1, final BaseGraph<T,E> g2) {
        Utils.nonNull(g1, "g1");
        Utils.nonNull(g2, "g2");
        final Set<T> vertices1 = g1.vertexSet();
        final Set<T> vertices2 = g2.vertexSet();
        final Set<E> edges1 = g1.edgeSet();
        final Set<E> edges2 = g2.edgeSet();

        if ( vertices1.size() != vertices2.size() || edges1.size() != edges2.size() ) {
            return false;

        //for every vertex in g1 there is a vertex in g2 with an equal getSequenceString
        final boolean ok= -> v1.getSequenceString()).allMatch(v1seqString -> -> v1seqString.equals(v2.getSequenceString())));
        if (! ok){
            return false;

        //for every edge in g1 there is an equal edge in g2
        final boolean okG1 = -> -> g1.seqEquals(e1, e2, g2)));
        if (! okG1){
            return false;
        //for every edge in g2 there is an equal edge in g1
        return -> -> g2.seqEquals(e2, e1, g1)));

    // For use when comparing edges across graphs!
    private boolean seqEquals( final E edge1, final E edge2, final BaseGraph<V,E> graph2 ) {
        return (getEdgeSource(edge1).seqEquals(graph2.getEdgeSource(edge2))) && (getEdgeTarget(edge1).seqEquals(graph2.getEdgeTarget(edge2)));

     * Get the incoming edge of v.  Requires that there be only one such edge or throws an error
     * @param v our vertex
     * @return the single incoming edge to v, or null if none exists
    public final E incomingEdgeOf(final V v) {
        return getSingletonEdge(incomingEdgesOf(v));

     * Get the outgoing edge of v.  Requires that there be only one such edge or throws an error
     * @param v our vertex
     * @return the single outgoing edge from v, or null if none exists
    public final E outgoingEdgeOf(final V v) {
        return getSingletonEdge(outgoingEdgesOf(v));

     * Helper function that gets the a single edge from edges, null if edges is empty, or
     * throws an error is edges has more than 1 element
     * @param edges a set of edges
     * @return a edge
    private E getSingletonEdge(final Collection<E> edges) {
        Utils.validateArg(edges.size() <= 1, "Cannot get a single incoming edge for a vertex with multiple incoming edges " + edges);
        return edges.isEmpty() ? null : edges.iterator().next();

     * Add edge between source -> target if none exists, or add e to an already existing one if present
     * @param source source vertex
     * @param target vertex
     * @param e edge to add
    public final void addOrUpdateEdge(final V source, final V target, final E e) {
        Utils.nonNull(source, "source");
        Utils.nonNull(target, "target");
        Utils.nonNull(e, "edge");

        final E prev = getEdge(source, target);
        if ( prev != null ) {
        } else {
            addEdge(source, target, e);

    public String toString() {
        return "BaseGraph{" +
                "kmerSize=" + kmerSize +

     * Get the set of vertices within distance edges of source, regardless of edge direction
     * @param source the source vertex to consider
     * @param distance the distance
     * @return a set of vertices within distance of source
    private Set<V> verticesWithinDistance(final V source, final int distance) {
        if ( distance == 0 ) {
            return Collections.singleton(source);

        final Set<V> found = new HashSet<>();
        for ( final V v : neighboringVerticesOf(source) ) {
            found.addAll(verticesWithinDistance(v, distance - 1));

        return found;

     * Get a graph containing only the vertices within distance edges of target
     * @param target a vertex in graph
     * @param distance the max distance
     * @return a non-null graph
    public final BaseGraph<V,E> subsetToNeighbors(final V target, final int distance) {
        Utils.nonNull(target, "Target cannot be null");
        Utils.validateArg(containsVertex(target), () -> "Graph doesn't contain vertex " + target);
        Utils.validateArg(distance >= 0, () -> "Distance must be >= 0 but got " + distance);

        final Set<V> toKeep = verticesWithinDistance(target, distance);
        final Collection<V> toRemove = new HashSet<>(vertexSet());

        final BaseGraph<V,E> result = clone();

        return result;

     * Get a subgraph of graph that contains only vertices within a given number of edges of the ref source vertex
     * @return a non-null subgraph of this graph
    public final BaseGraph<V,E> subsetToRefSource(final int refSourceNeighborhood) {
        Utils.validateArg(refSourceNeighborhood > 0, () -> "refSourceNeighborhood needs to be positive but was " + refSourceNeighborhood);
        return subsetToNeighbors(getReferenceSourceVertex(), refSourceNeighborhood);

     * Checks whether the graph contains all the vertices in a collection.
     * @param vertices the vertices to check. Must not be null and must not contain a null.
     * @throws IllegalArgumentException if {@code vertices} is {@code null}.
     * @return {@code true} if all the vertices in the input collection are present in this graph.
     * Also if the input collection is empty. Otherwise it returns {@code false}.
    public final boolean containsAllVertices(final Collection<? extends V> vertices) {
        Utils.nonNull(vertices, "the input vertices collection cannot be null");
        Utils.containsNoNull(vertices, "null vertex");
        return -> containsVertex(v));

     * Checks for the presence of directed cycles in the graph.
     * @return {@code true} if the graph has cycles, {@code false} otherwise.
    public final boolean hasCycles() {
        return new CycleDetector<>(this).detectCycles();

    public BaseGraph<V,E> clone()  {
        return (BaseGraph<V,E>) super.clone();

     * General iterator that can iterate over all vertices in a BaseGraph, following either
     * incoming, outgoing edge (as well as both or none) edges.  Supports traversal of graphs
     * with cycles and other crazy structures.  Will only ever visit each vertex once.  The
     * order in which the vertices are visited is undefined.
    private static final class BaseGraphIterator<T extends BaseVertex, E extends BaseEdge> implements Iterator<T>, Iterable<T> {
        final Collection<T> visited = new HashSet<>();
        final Deque<T> toVisit = new LinkedList<>();
        final BaseGraph<T,E> graph;
        final boolean followIncomingEdges;
        final boolean followOutgoingEdges;

         * Create a new BaseGraphIterator starting its traversal at start
         * Note that if both followIncomingEdges and followOutgoingEdges are false, we simply return the
         * start vertex
         * @param graph the graph to iterator over.  Cannot be null
         * @param start the vertex to start at.  Cannot be null
         * @param followIncomingEdges should we follow incoming edges during our
         *                            traversal? (goes backward through the graph)
         * @param followOutgoingEdges should we follow outgoing edges during out traversal?
        private BaseGraphIterator(final BaseGraph<T,E> graph, final T start,
                                 final boolean followIncomingEdges, final boolean followOutgoingEdges) {
            Utils.nonNull(graph, "graph cannot be null");
            Utils.nonNull(start, "start cannot be null");
            Utils.validateArg(graph.containsVertex(start), () -> "start " + start + " must be in graph but it isn't");
            this.graph = graph;
            this.followIncomingEdges = followIncomingEdges;
            this.followOutgoingEdges = followOutgoingEdges;


        public Iterator<T> iterator() {
            return this;

        public boolean hasNext() {
            return ! toVisit.isEmpty();

        public T next() {
            final T v = toVisit.pop();

            if ( ! visited.contains(v) ) {
                if ( followIncomingEdges ) {
                if ( followOutgoingEdges ) {

            return v;

        public void remove() {
            throw new UnsupportedOperationException("Doesn't implement remove");