org.apache.spark.graphx.VertexId Scala Examples

The following examples show how to use org.apache.spark.graphx.VertexId. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: GraphGeneration.scala    From Mastering-Machine-Learning-with-Spark-2.x   with MIT License 6 votes vote down vote up
package com.github.maxpumperla.ml_spark.graphs

import org.apache.spark.graphx.lib.TriangleCount
import org.apache.spark.graphx.util.GraphGenerators
import org.apache.spark.graphx.{Graph, GraphLoader, PartitionStrategy, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}


object GraphGeneration extends App {

  val conf = new SparkConf()
    .setAppName("Graph generation")
    .setMaster("local[4]")
  val sc = new SparkContext(conf)

  val edgeListGraph = GraphLoader.edgeListFile(sc, "./edge_list.txt")

  val rawEdges: RDD[(VertexId, VertexId)] = sc.textFile("./edge_list.txt").map {
    line =>
      val field = line.split(" ")
      (field(0).toLong, field(1).toLong)
  }
  val edgeTupleGraph = Graph.fromEdgeTuples(
    rawEdges=rawEdges, defaultValue="")

  val gridGraph = GraphGenerators.gridGraph(sc, 5, 5)
  val starGraph = GraphGenerators.starGraph(sc, 11)
  val logNormalGraph  = GraphGenerators.logNormalGraph(
    sc, numVertices = 20, mu=1, sigma = 3
  )
  logNormalGraph.outDegrees.map(_._2).collect().sorted

  val actorGraph = GraphLoader.edgeListFile(
    sc, "./ca-hollywood-2009.txt", true
  ).partitionBy(PartitionStrategy.RandomVertexCut)
  actorGraph.edges.count()

  val actorComponents = actorGraph.connectedComponents().cache
  actorComponents.vertices.map(_._2).distinct().count

  val clusterSizes =actorComponents.vertices.map(
    v => (v._2, 1)).reduceByKey(_ + _)
  clusterSizes.map(_._2).max
  clusterSizes.map(_._2).min

  val smallActorGraph = GraphLoader.edgeListFile(sc, "./ca-hollywood-2009.txt")
  val strongComponents = smallActorGraph.stronglyConnectedComponents(numIter = 5)
  strongComponents.vertices.map(_._2).distinct().count

  val canonicalGraph = actorGraph.mapEdges(e => 1).removeSelfEdges().convertToCanonicalEdges()
  val partitionedGraph = canonicalGraph.partitionBy(PartitionStrategy.RandomVertexCut)

  actorGraph.triangleCount()
  val triangles = TriangleCount.runPreCanonicalized(partitionedGraph)

  actorGraph.staticPageRank(10)
  val actorPrGraph: Graph[Double, Double] = actorGraph.pageRank(0.0001)
  actorPrGraph.vertices.reduce((v1, v2) => {
    if (v1._2 > v2._2) v1 else v2
  })

  actorPrGraph.inDegrees.filter(v => v._1 == 33024L).collect.foreach(println)

  actorPrGraph.inDegrees.map(_._2).collect().sorted.takeRight(10)

  actorPrGraph.inDegrees.map(_._2).filter(_ >= 62).count

} 
Example 2
Source File: SSSPExample.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.graphx

// $example on$
import org.apache.spark.graphx.{Graph, VertexId}
import org.apache.spark.graphx.util.GraphGenerators
// $example off$
import org.apache.spark.sql.SparkSession


object SSSPExample {
  def main(args: Array[String]): Unit = {
    // Creates a SparkSession.
    val spark = SparkSession
      .builder
      .appName(s"${this.getClass.getSimpleName}")
      .getOrCreate()
    val sc = spark.sparkContext

    // $example on$
    // A graph with edge attributes containing distances
    val graph: Graph[Long, Double] =
      GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble)
    val sourceId: VertexId = 42 // The ultimate source
    // Initialize the graph such that all vertices except the root have distance infinity.
    val initialGraph = graph.mapVertices((id, _) =>
        if (id == sourceId) 0.0 else Double.PositiveInfinity)
    val sssp = initialGraph.pregel(Double.PositiveInfinity)(
      (id, dist, newDist) => math.min(dist, newDist), // Vertex Program
      triplet => {  // Send Message
        if (triplet.srcAttr + triplet.attr < triplet.dstAttr) {
          Iterator((triplet.dstId, triplet.srcAttr + triplet.attr))
        } else {
          Iterator.empty
        }
      },
      (a, b) => math.min(a, b) // Merge Message
    )
    println(sssp.vertices.collect.mkString("\n"))
    // $example off$

    spark.stop()
  }
}
// scalastyle:on println 
Example 3
Source File: NOInitBFSProcessor.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.hua.processor

import ml.sparkling.graph.operators.algorithms.bfs.processor.BFSProcessor
import ml.sparkling.graph.operators.measures.vertex.betweenness.hua.struct.NOVertex
import ml.sparkling.graph.operators.measures.vertex.betweenness.hua.struct.messages.{BFSConfirmMessage, BFSExpandMessage, NOMessage}
import org.apache.spark.graphx.{EdgeTriplet, VertexId}


class NOInitBFSProcessor[ED] extends BFSProcessor[NOVertex, ED, List[NOMessage[VertexId]]] {
  override def initialMessage: List[NOMessage[VertexId]] = List.empty

  override def mergeMessages(msg1: List[NOMessage[VertexId]], msg2: List[NOMessage[VertexId]]): List[NOMessage[VertexId]] = {
    val allMessages = msg1 ++ msg2
    val expandMessageList = allMessages.filter(_.isExpand)
    val expandMessage = expandMessageList.headOption
    val succMessages = allMessages.filter(_.isConfirm)

    expandMessage match {
      case Some(m) => succMessages :+ m
      case None => succMessages
    }
  }

  override def sendMessage(triplet: EdgeTriplet[NOVertex, ED]): Iterator[(VertexId, List[NOMessage[VertexId]])] = {

    def createExpandMsg(dstId: VertexId) = {
      val dstAttr = triplet.vertexAttr(dstId)
      val srcAttr = triplet.otherVertexAttr(dstId)
      if (dstAttr.pred.isEmpty && srcAttr.pred.nonEmpty) Iterator((dstId, List(BFSExpandMessage(triplet.otherVertexId(dstId))))) else Iterator.empty
    }

    def createConfirmMsg(dstId: VertexId) = {
      val dstAttr = triplet.vertexAttr(dstId)
      val srcAttr = triplet.otherVertexAttr(dstId)
      if (!dstAttr.isCompleted && srcAttr.pred.exists(_ == dstId)) Iterator((dstId, List(BFSConfirmMessage(triplet.otherVertexId(dstId))))) else Iterator.empty
    }

    val confirmMsg = createConfirmMsg(triplet.srcId) ++ createConfirmMsg(triplet.dstId)
    val expandMsg = createExpandMsg(triplet.srcId) ++ createExpandMsg(triplet.dstId)
    confirmMsg ++ expandMsg
  }
} 
Example 4
Source File: NOVertex.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.hua.struct

import ml.sparkling.graph.operators.measures.vertex.betweenness.hua.struct.messages.DFSPointer
import org.apache.spark.graphx.VertexId


class NOVertex(val vertexId: VertexId,
               val bfsMap: Map[VertexId, NOBFSVertex],
               val pred: Option[VertexId],
               val succ: Option[Array[VertexId]],
               val dfsPointer: Option[DFSPointer],
               val bc: Double) extends Serializable {
  def setParent(idParent: VertexId) = NOVertex(vertexId, bfsMap, Some(idParent), succ, dfsPointer, bc)

  def setPredecessorAndSuccessors(newPred: Option[VertexId], newSucc: Option[Array[VertexId]]) =
    NOVertex(vertexId, bfsMap, newPred, newSucc, dfsPointer, bc)

  val isCompleted = pred.nonEmpty && succ.nonEmpty

  val leaf = succ.isEmpty

  lazy val bfsRoot = bfsMap.contains(vertexId)

  lazy val lowestSucc = succ.getOrElse(Array.empty).sorted.headOption

  lazy val eccentricity = if (bfsMap.isEmpty) 0 else bfsMap.map({ case (id, v) => v.distance}).max

  def withDfsPointer(pointer: Option[DFSPointer]) =
    NOVertex(vertexId, bfsMap, pred, succ, pointer, bc)

  def update(bfsMap: Map[VertexId, NOBFSVertex] = bfsMap, succ: Option[Array[VertexId]] = succ, dfsPointer: Option[DFSPointer] = dfsPointer, bcInc: Double = 0) =
    NOVertex(vertexId, bfsMap, pred, succ, dfsPointer, bc + bcInc)
}

object NOVertex extends Serializable {
  def apply(vertexId: VertexId,
            bfsMap: Map[VertexId, NOBFSVertex] = Map.empty,
            pred: Option[VertexId] = None,
            succ: Option[Array[VertexId]] = None,
            dfsPointer: Option[DFSPointer] = None,
            bc: Double = .0): NOVertex = new NOVertex(vertexId, bfsMap, pred, succ, dfsPointer, bc)
} 
Example 5
Source File: WithPathProcessor.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.algorithms.shortestpaths.pathprocessors

import org.apache.spark.graphx.VertexId



class WithPathProcessor[VD,ED]() extends  PathProcessor[VD,ED,Map[VertexId,(ED,Set[List[VertexId]])]]{
  private type PathsSet=(ED,Set[List[VertexId]])
  private type PathsMap=Map[VertexId,PathsSet]
  def EMPTY_CONTAINER=Map.empty[VertexId,PathsSet]

  def getNewContainerForPaths() ={
    EMPTY_CONTAINER
  }

  def putNewPath(map:PathsMap,to:VertexId,weight:ED)(implicit num:Numeric[ED]): PathsMap={
    (map + (to -> (weight,Set(to::Nil)))).map(identity)
  }

  def processNewMessages(map1:PathsMap,map2:PathsMap)(implicit num:Numeric[ED]):PathsMap={
    (map1.keySet ++ map2.keySet).map(vId=>(vId,mergePathSets(map1.get(vId),map2.get(vId)))).toMap.map(identity)
  }


  def extendPathsMerging(targetVertexId:VertexId,map:PathsMap,vertexId:VertexId,distance:ED,map2:PathsMap)(implicit num:Numeric[ED]): PathsMap ={
    val extended=map.filterKeys(_!=targetVertexId).mapValues(extendPathsSet(_,vertexId,distance)).map(identity)
    processNewMessages(extended,map2)
  }

  private def extendPathsSet(pathSet:PathsSet,vertexId:VertexId,distance:ED)(implicit num:Numeric[ED]):PathsSet={
    pathSet match{
      case (edge,set) =>  (num.plus(distance,edge),set.map(vertexId :: _))
    }

  }

  private def mergePathSets(pathSet1:Option[PathsSet],pathSet2:Option[PathsSet])(implicit num:Numeric[ED]): PathsSet ={
    (pathSet1 :: pathSet2 :: Nil).flatten[PathsSet].reduce[PathsSet]{
      case ((edge1,set1),(edge2,set2))=>
        num.compare(edge1,edge2).signum match{
          case 0=> (edge1,set1++set2)
          case 1=>(edge2,set2)
          case -1=>(edge1,set1)
        }
    }
  }
} 
Example 6
Source File: FastUtilWithDistance.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.algorithms.shortestpaths.pathprocessors.fastutils

import it.unimi.dsi.fastutil.longs._
import ml.sparkling.graph.api.operators.algorithms.shortestpaths.ShortestPathsTypes
import ml.sparkling.graph.api.operators.algorithms.shortestpaths.ShortestPathsTypes._
import ml.sparkling.graph.operators.algorithms.shortestpaths.pathprocessors.PathProcessor
import ml.sparkling.graph.operators.algorithms.shortestpaths.pathprocessors.fastutils.FastUtilWithDistance.DataMap
import ml.sparkling.graph.operators.utils.LoggerHolder
import org.apache.spark.graphx.VertexId

import scala.collection.JavaConversions._



class FastUtilWithDistance[VD, ED]() extends PathProcessor[VD, ED, DataMap] {
  def EMPTY_CONTAINER = new DataMap(0)
  def getNewContainerForPaths() = {
   new DataMap(64,0.25f)
  }

  def putNewPath(map: DataMap, to: VertexId, weight: ED)(implicit num: Numeric[ED]): DataMap = {
    val out=map.asInstanceOf[DataMap].clone()
    out.put(to, num.toDouble(weight))
    out
  }

  def processNewMessages(map1: DataMap, map2: DataMap)(implicit num: Numeric[ED]):DataMap = {
    mergeMessages(map1,map2.clone())
  }

  override def mergeMessages(map1: DataMap, map2: DataMap)(implicit num: Numeric[ED]):DataMap = {
    val out=map2
    map1.foreach{case (key: JLong,inValue: JDouble)=>{
      val longKey=key.toLong
      val value: Double =if(map2.containsKey(longKey)) {
        min(inValue,map2.get(key.toLong))
      }else{
        inValue
      }
      out.put(longKey, value)
    }}
    out
  }

  def min(d1:JDouble,d2:JDouble):JDouble={
    if(d1<d2){
      d1
    }else{
      d2
    }
  }

  def extendPathsMerging(targetVertexId:VertexId,map: DataMap, vertexId: VertexId, distance: ED,map2: DataMap)(implicit num: Numeric[ED]):DataMap = {
    val out=map2.clone()
    val toAdd=num.toDouble(distance)
    map.foreach{case (key: JLong,inValue: JDouble)=>{
      if(!targetVertexId.equals(key)){
        val longKey=key.toLong
        val value: Double =if(map2.containsKey(longKey)) {
          min(inValue+toAdd,map2.get(longKey))
        }else{
          inValue+toAdd
        }
        out.put(longKey,value)
      }
    }}
    out
  }

}

object FastUtilWithDistance{
  type DataMap=Long2DoubleOpenHashMap
} 
Example 7
Source File: PathProcessor.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.algorithms.shortestpaths.pathprocessors

import org.apache.spark.graphx.VertexId


trait PathProcessor[VD,ED,PS] extends Serializable{
  def EMPTY_CONTAINER:PS
  def getNewContainerForPaths():PS
  def putNewPath(map:PS,to:VertexId,weight:ED)(implicit num:Numeric[ED]): PS
  def processNewMessages(map1:PS, map2:PS)(implicit num:Numeric[ED]):PS
  def mergeMessages(map1:PS, map2:PS)(implicit num:Numeric[ED]):PS={
    processNewMessages(map1,map2)
  }
  def extendPathsMerging(targetVertexId:VertexId,map:PS,vertexId:VertexId,distance:ED,map2:PS)(implicit num:Numeric[ED]): PS
} 
Example 8
Source File: SingleVertexProcessor.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.algorithms.shortestpaths.pathprocessors

import org.apache.spark.graphx.VertexId


class SingleVertexProcessor[VD, ED](computedVertexId:VertexId) extends PathProcessor[VD, ED, Double] {
  def EMPTY_CONTAINER = 0d

  override def getNewContainerForPaths(): Double = 0d

  override def extendPathsMerging(targetVertexId: VertexId, currentValue: Double, vertexId: VertexId, distance: ED, currentValue2: Double)(implicit num: Numeric[ED]): Double = {
    val currentExtended= {
      if (vertexId == computedVertexId || currentValue != 0)
        currentValue + num.toDouble(distance)
      else
        0.0
    }
    processNewMessages(currentExtended,currentValue2)
  }

  override def processNewMessages(map1: Double, map2: Double)(implicit num: Numeric[ED]): Double = {
    (map1,map2) match{
      case (0d,_)=> map2
      case (_,0d)=> map1
      case _ =>Math.min(map1,map2)
    }
  }

  override def putNewPath(map: Double, to: VertexId, weight: ED)(implicit num: Numeric[ED]): Double = {
   num.toDouble(weight)
  }

} 
Example 9
Source File: PSCANConnectedComponents.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.algorithms.community.pscan

import org.apache.spark.graphx.{EdgeTriplet, Graph, Pregel, VertexId}


class PSCANConnectedComponents(minWeight:Double) extends Serializable{


  def run[VD,ED](graph:Graph[VertexId,Double], maxIterations:Int=Int.MaxValue):Graph[VertexId,Double]={
    val initialMessage = Long.MaxValue
    Pregel(graph, initialMessage,maxIterations = maxIterations)(
    vprog = (_, attr, msg) => math.min(attr, msg),
    sendMsg = sendMessage,
    mergeMsg = (a, b) => math.min(a, b))
  }

  def sendMessage(edge: EdgeTriplet[VertexId, Double]): Iterator[(VertexId, VertexId)] = {
    if(edge.attr > minWeight){
      if(edge.srcAttr<edge.dstAttr){
        Iterator((edge.dstId,edge.srcAttr))
      }else if(edge.dstAttr<edge.srcAttr){
        Iterator((edge.srcId,edge.dstAttr))
      }else{
        Iterator.empty
      }
    }else{
      Iterator.empty
    }
  }
} 
Example 10
Source File: GraphMLLoader.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.loaders.graphml

import com.databricks.spark.xml._
import ml.sparkling.graph.loaders.graphml.GraphMLFormat._
import ml.sparkling.graph.loaders.graphml.GraphMLTypes.TypeHandler
import org.apache.spark.SparkContext
import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SQLContext, SparkSession}

import scala.collection.mutable
import scala.util.Try


  def loadGraphFromML(path: String)(implicit sc: SparkContext): Graph[ValuesMap, ValuesMap] = {
    val sparkSession=SparkSession.builder().getOrCreate();

    val graphDataFrame = sparkSession.sqlContext.read
      .format("com.databricks.spark.xml")
      .option("attributePrefix","@")
      .option("valueTag","#VALUE")
      .option("rowTag",graphTag).load(path).rdd

    val keys =sparkSession.sqlContext.read
      .format("com.databricks.spark.xml")
      .option("attributePrefix","@")
      .option("valueTag","#VALUE")
      .option("rowTag",graphMLTag).load(path).rdd
      .flatMap(r => Try(r.getAs[mutable.WrappedArray[Row]](keyTag).toArray).getOrElse(Array.empty))

    val nodesKeys = keys
      .filter(r => r.getAs[String](forAttribute) == nodeTag)
    val edgeKeys = keys
      .filter(r => r.getAs[String](forAttribute) == edgeTag)

    val nodeAttrHandlers = createAttrHandlersFor(nodesKeys)
    val edgeAttrHandlers = createAttrHandlersFor(edgeKeys)

    val verticesWithData = graphDataFrame.flatMap(r => r.getAs[Any](nodeTag) match {
      case data: mutable.WrappedArray[Row@unchecked] => data.array
      case data: Row => Array(data)
    })

    val verticesIndex = verticesWithData.map(r => r.getAs[String](idAttribute)).zipWithUniqueId().collect().toMap

    val vertices: RDD[(VertexId, Map[String, Any])] = verticesWithData
      .map(
        r => (verticesIndex(r.getAs[String](idAttribute)), extractAttributesMap(nodeAttrHandlers, r))
      )

    val edgesRows = graphDataFrame.flatMap(r => r.getAs[Any](edgeTag) match {
      case data: mutable.WrappedArray[Row@unchecked] => data.array
      case data: Row => Array(data)
    })
      .map(r => Edge(
        verticesIndex(r.getAs[String](sourceAttribute)),
        verticesIndex(r.getAs[String](targetAttribute)),
        extractAttributesMap(edgeAttrHandlers, r)
      ))
    Graph(vertices, edgesRows)
  }

  def extractAttributesMap(attrHandlers: Map[String, GraphMLAttribute], r: Row): Map[String, Any] = {
    Try(r.getAs[mutable.WrappedArray[Row]](dataTag)).toOption.map(
      _.map(r => {
        val attribute = attrHandlers(r.getAs[String](keyAttribute))
        (attribute.name, attribute.handler(r.getAs[String](tagValue)))
      }).toMap
    ).getOrElse(Map.empty) + ("id" -> r.getAs[String](idAttribute))
  }

  def createAttrHandlersFor(keys: RDD[Row]): Map[String, GraphMLAttribute] = {
    keys
      .map(r => (r.getAs[String](idAttribute), GraphMLAttribute(r.getAs[String](nameAttribute), GraphMLTypes(r.getAs[String](typeAttribute)))))
      .collect().toMap
  }
} 
Example 11
Source File: GraphProviders.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.loaders.csv.providers

import ml.sparkling.graph.loaders.csv.types.Types
import ml.sparkling.graph.loaders.csv.types.Types.ToVertexId
import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.sql.SparkSession;
import scala.reflect.ClassTag


object GraphProviders {
  val defaultStorageLevel=StorageLevel.MEMORY_ONLY
  def simpleGraphBuilder[VD: ClassTag, ED: ClassTag](defaultVertex: Option[VD]=None,
                                                     vertexProvider: Row => Seq[(VertexId, VD)],
                                                     edgeProvider: Row => Seq[Edge[ED]],
                                                     edgeStorageLevel: StorageLevel = defaultStorageLevel,
                                                     vertexStorageLevel: StorageLevel =defaultStorageLevel)
                                                    (dataFrame: DataFrame): Graph[VD, ED] = {

    def mapRows[MT: ClassTag](mappingFunction: (Row) => Seq[MT]): RDD[MT] = {
      dataFrame.rdd.mapPartitionsWithIndex((id, rowIterator) => {
        rowIterator.flatMap { case row => mappingFunction(row) }
      })
    }

    val vertices: RDD[(VertexId, VD)] = mapRows(vertexProvider)
    val edges: RDD[Edge[ED]] = mapRows(edgeProvider)
    defaultVertex match{
      case None => Graph(vertices,edges,edgeStorageLevel=edgeStorageLevel,vertexStorageLevel=vertexStorageLevel)
      case Some(defaultVertexValue)=> Graph(vertices,edges,defaultVertexValue,edgeStorageLevel,vertexStorageLevel)
    }

  }

  def indexedGraphBuilder[VD:ClassTag, ED: ClassTag](defaultVertex: Option[VD]=None,
                                                      vertexProvider: (Row, ToVertexId[VD]) => Seq[(VertexId, VD)],
                                                      edgeProvider: (Row, ToVertexId[VD]) => Seq[Edge[ED]],
                                                      columnsToIndex: Seq[Int],
                                                      edgeStorageLevel: StorageLevel = defaultStorageLevel,
                                                      vertexStorageLevel: StorageLevel = defaultStorageLevel)
                                                     (dataFrame: DataFrame): Graph[VD, ED] = {
    val index = dataFrame.rdd.flatMap(row => columnsToIndex.map(row(_))).distinct().zipWithUniqueId().collect().toMap
    def extractIdFromIndex(vertex: VD) = index(vertex)
    simpleGraphBuilder(defaultVertex,
      vertexProvider(_: Row, extractIdFromIndex _),
      edgeProvider(_: Row, extractIdFromIndex _),
      edgeStorageLevel,
      vertexStorageLevel)(dataFrame)

  }
} 
Example 12
Source File: NOInitBFSPredicate.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.hua.predicate

import ml.sparkling.graph.operators.algorithms.bfs.predicate.BFSPredicate
import ml.sparkling.graph.operators.measures.vertex.betweenness.hua.struct.NOVertex
import ml.sparkling.graph.operators.measures.vertex.betweenness.hua.struct.messages.NOMessage
import org.apache.spark.graphx.VertexId


class NOInitBFSPredicate extends BFSPredicate[NOVertex, List[NOMessage[VertexId]]] {

  override def getInitialData(vertexId: VertexId, attr: NOVertex): (VertexId) => NOVertex =
    (id: VertexId) => if (id == vertexId) attr.setParent(id) else attr

  override def applyMessages(vertexId: VertexId, vertex: NOVertex, message: List[NOMessage[VertexId]]): NOVertex =
    if (vertex.isCompleted) vertex else updateVertex(vertex, message)


  def updateVertex(vertex: NOVertex, messages: List[NOMessage[VertexId]]) = {
    val parent = extractParrent(vertex, messages)
    val succ = extractSuccessors(vertex, messages)
    vertex.setPredecessorAndSuccessors(parent, succ)
  }

  def extractParrent(vertex: NOVertex, messages: List[NOMessage[VertexId]]) = {
    vertex.pred match {
      case Some(pred) => vertex.pred
      case None =>
        val expandMsg = messages.filter(_.isExpand).map(_.content)
        expandMsg.headOption
    }
  }

  def extractSuccessors(vertex: NOVertex, messages: List[NOMessage[VertexId]]) =
    vertex.succ match {
      case Some(arr) => vertex.succ
      case None =>
        val confirmMsg = messages.filter(_.isConfirm).map(_.content)
        if (confirmMsg.nonEmpty) Some(confirmMsg.toArray) else None
    }
} 
Example 13
Source File: FastUnfolding.scala    From fastunfolding   with Apache License 2.0 5 votes vote down vote up
package com.soteradefense.dga.graphx.louvain

import org.apache.spark.SparkContext
import org.apache.spark.graphx.{VertexId, PartitionStrategy, TripletFields, Graph}

import scala.reflect.ClassTag


class FastUnfolding(outputdir: String,
                    minProgress: Int = 1,
                    progressCounter: Int = 1) {

    var qValues = Array[(Int, Double)]()

    def saveLevel(sc: SparkContext,
                  level: Int,
                  q: Double,
                  graph: Graph[MyVertexState, Long]) = {
        graph.vertices.saveAsTextFile(s"${outputdir}/level_${level}_vertices")
        graph.edges.saveAsTextFile(s"${outputdir}/level_${level}_edges")
        //graph.vertices.map( {case (id,v) => ""+id+","+v.internalWeight+","+v.community }).saveAsTextFile(outputdir+"/level_"+level+"_vertices")
        //graph.edges.mapValues({case e=>""+e.srcId+","+e.dstId+","+e.attr}).saveAsTextFile(outputdir+"/level_"+level+"_edges")
        qValues = qValues :+ ((level, q))
        println(s"qValue: $q")

        // overwrite the q values at each level
        sc.parallelize(qValues, 1).saveAsTextFile(s"${outputdir}/qvalues")
    }

    def run[VD: ClassTag](sc: SparkContext, graph: Graph[VD, Long]) = {
        val initialGraph = createGraph(graph)

        val graphWeight = initialGraph.vertices.map(
            vertex => {
                vertex._2.nodeWeight
            }
        ).reduce(_ + _)

        val broadcastGraphWeight = sc.broadcast(graphWeight)

        val initialModularity = initialGraph.vertices.map(
            vertex => {
                vertex._2.in / (2 * graphWeight) - vertex._2.tot * vertex._2.tot / (graphWeight * graphWeight)
            }
        ).reduce(_ + _)

        var level = -1
        var halt = false

        while(!halt) {
            level += 1
            println(s"Starting level ${level}")

            val (currentQ, currentGraph, passes) = runFastUnfolding(sc, initialGraph, minProgress, progressCounter)


        }
    }

    def runFastUnfolding(sc: SparkContext,
                        graph: Graph[MyVertexState, Long],
                        minProgress: Int,
                        progressCounter: Int) = {
        val cachedGraph = graph.cache()
        

    }

    def createGraph[VD: ClassTag](graph: Graph[VD, Long]): Graph[MyVertexState, Long] = {
        val nodeWeights = graph.aggregateMessages[Long](
            cxt => {
                cxt.sendToSrc(cxt.attr)
                cxt.sendToDst(cxt.attr)
            },
            (a, b) => a + b,
            TripletFields.EdgeOnly
        )

        nodeWeights.foreach(result => println(s"nodeweight: ${result._1}, ${result._2}"))


        val louvainGraph = graph.outerJoinVertices(nodeWeights)((vid, data, weightOption) => {
            val weight = weightOption.getOrElse(0L)
            val state = new MyVertexState()
            state.community = vid
            state.changed = false
            state.tot = weight
            state.in = 0
            state.nodeWeight = weight
            state
        }).partitionBy(PartitionStrategy.EdgePartition2D)

        louvainGraph
    }
} 
Example 14
Source File: Neo4jGraphScalaTSE.scala    From neo4j-spark-connector   with Apache License 2.0 5 votes vote down vote up
package org.neo4j.spark

import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.junit.Assert._
import org.junit._

import scala.collection.JavaConverters._

object Neo4jGraphScalaTSE {

}


class Neo4jGraphScalaTSE extends SparkConnectorScalaBaseTSE {
  val FIXTURE: String = "CREATE (s:A {a:0})-[r:REL {foo:'bar'}]->(t:B {b:1}) RETURN id(s) AS source, id(t) AS target"

  private var source: Long = _
  private var target: Long = _

  @Before
  @throws[Exception]
  def setUp {
    val map = SparkConnectorScalaSuiteIT.session().run(FIXTURE).single()
      .asMap()
    source = map.get("source").asInstanceOf[Long]
    target = map.get("target").asInstanceOf[Long]
  }

  private def assertGraph(graph: Graph[_, _], expectedNodes: Long, expectedRels: Long) = {
    assertEquals(expectedNodes, graph.vertices.count)
    assertEquals(expectedRels, graph.edges.count)
  }

  @Test def runCypherQueryWithParams {
    val data = List(Map("id"->1,"name"->"Test").asJava).asJava
    Executor.execute(sc, "UNWIND $data as row CREATE (n:Test {id:row.id}) SET n.name = row.name", Map(("data",data)))
  }
  @Test def runMatrixQuery {
    val graph = Neo4jGraph.loadGraph(sc, "A", Seq.empty, "B")
    assertGraph(graph, 2, 1)
  }

  @Test def saveGraph {
    val edges : RDD[Edge[VertexId]] = sc.makeRDD(Seq(Edge(source,target,42L)))
    val graph = Graph.fromEdges(edges,-1)
    assertGraph(graph, 2, 1)
    Neo4jGraph.saveGraph(sc,graph,null,("REL","test"))
    assertEquals(42L, SparkConnectorScalaSuiteIT.session().run("MATCH (:A)-[rel:REL]->(:B) RETURN rel.test as prop").single().get("prop").asLong())
  }

  @Test def saveGraphMerge {
    val edges : RDD[Edge[Long]] = sc.makeRDD(Seq(Edge(source,target,42L)))
    val graph = Graph.fromEdges(edges,13L)
    assertGraph(graph, 2, 1)
    Neo4jGraph.saveGraph(sc,graph,"value",("FOOBAR","test"),Option("Foo","id"),Option("Bar","id"),merge = true)
    assertEquals(Map("fid"->source,"bid"->target,"rv"->42L,"fv"->13L,"bv"->13L).asJava,SparkConnectorScalaSuiteIT.session().run("MATCH (foo:Foo)-[rel:FOOBAR]->(bar:Bar) RETURN {fid: foo.id, fv:foo.value, rv:rel.test,bid:bar.id,bv:bar.value} as data").single().get("data").asMap())
  }
  @Test def saveGraphByNodeLabel {
    val edges : RDD[Edge[VertexId]] = sc.makeRDD(Seq(Edge(0,1,42L)))
    val graph = Graph.fromEdges(edges,-1)
    assertGraph(graph, 2, 1)
    Neo4jGraph.saveGraph(sc,graph,null,("REL","test"),Option(("A","a")),Option(("B","b")))
    assertEquals(42L,SparkConnectorScalaSuiteIT.session().run("MATCH (:A)-[rel:REL]->(:B) RETURN rel.test as prop").single().get("prop").asLong())
  }
  @Test def mergeGraphByNodeLabel {
    val edges : RDD[Edge[VertexId]] = sc.makeRDD(Seq(Edge(source,target,42L)))
    val graph = Graph.fromEdges(edges,-1)
    assertGraph(graph, 2, 1)
    Neo4jGraph.saveGraph(sc,graph,null,("REL2","test"),merge = true)
    assertEquals(42L,SparkConnectorScalaSuiteIT.session().run("MATCH (:A)-[rel:REL2]->(:B) RETURN rel.test as prop").single().get("prop").asLong())
  }

  @Test def saveGraphNodes {
    val nodes : RDD[(VertexId, Long)] = sc.makeRDD(Seq((source,10L),(target,20L)))
    val edges : RDD[Edge[Long]] = sc.makeRDD(Seq())
    val graph = Graph[Long,Long](nodes,edges,-1)
    assertGraph(graph, 2, 0)
    Neo4jGraph.saveGraph(sc,graph,"prop")
    assertEquals(10L,SparkConnectorScalaSuiteIT.session().run(s"MATCH (a:A) WHERE id(a) = $source RETURN a.prop as prop").single().get("prop").asLong())
    assertEquals(20L,SparkConnectorScalaSuiteIT.session().run(s"MATCH (b:B) WHERE id(b) = $target RETURN b.prop as prop").single().get("prop").asLong())
  }
} 
Example 15
Source File: PageRank.scala    From MaxCompute-Spark   with Apache License 2.0 5 votes vote down vote up
package com.aliyun.odps.spark.examples.graphx

import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object PageRank {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("PageRank")
    val sc = new SparkContext(conf)

    // build vertices
    val users: RDD[(VertexId, Array[String])] = sc.parallelize(List(
      "1,BarackObama,Barack Obama",
      "2,ladygaga,Goddess of Love",
      "3,jeresig,John Resig",
      "4,justinbieber,Justin Bieber",
      "6,matei_zaharia,Matei Zaharia",
      "7,odersky,Martin Odersky",
      "8,anonsys"
    ).map(line => line.split(",")).map(parts => (parts.head.toLong, parts.tail)))

    // build edges
    val followers: RDD[Edge[Double]] = sc.parallelize(Array(
      Edge(2L, 1L, 1.0),
      Edge(4L, 1L, 1.0),
      Edge(1L, 2L, 1.0),
      Edge(6L, 3L, 1.0),
      Edge(7L, 3L, 1.0),
      Edge(7L, 6L, 1.0),
      Edge(6L, 7L, 1.0),
      Edge(3L, 7L, 1.0)
    ))

    // build graph
    val followerGraph: Graph[Array[String], Double] = Graph(users, followers)

    // restrict the graph to users with usernames and names
    val subgraph = followerGraph.subgraph(vpred = (vid, attr) => attr.size == 2)

    // compute PageRank
    val pageRankGraph = subgraph.pageRank(0.001)

    // get attributes of the top pagerank users
    val userInfoWithPageRank = subgraph.outerJoinVertices(pageRankGraph.vertices) {
      case (uid, attrList, Some(pr)) => (pr, attrList.toList)
      case (uid, attrList, None) => (0.0, attrList.toList)
    }

    println(userInfoWithPageRank.vertices.top(5)(Ordering.by(_._2._1)).mkString("\n"))
  }
} 
Example 16
Source File: PageRank.scala    From MaxCompute-Spark   with Apache License 2.0 5 votes vote down vote up
package com.aliyun.odps.spark.examples.graphx

import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

object PageRank {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .appName("PageRank")
      .getOrCreate()
    val sc = spark.sparkContext

    // build vertices
    val users: RDD[(VertexId, Array[String])] = sc.parallelize(List(
      "1,BarackObama,Barack Obama",
      "2,ladygaga,Goddess of Love",
      "3,jeresig,John Resig",
      "4,justinbieber,Justin Bieber",
      "6,matei_zaharia,Matei Zaharia",
      "7,odersky,Martin Odersky",
      "8,anonsys"
    ).map(line => line.split(",")).map(parts => (parts.head.toLong, parts.tail)))

    // build edges
    val followers: RDD[Edge[Double]] = sc.parallelize(Array(
      Edge(2L, 1L, 1.0),
      Edge(4L, 1L, 1.0),
      Edge(1L, 2L, 1.0),
      Edge(6L, 3L, 1.0),
      Edge(7L, 3L, 1.0),
      Edge(7L, 6L, 1.0),
      Edge(6L, 7L, 1.0),
      Edge(3L, 7L, 1.0)
    ))

    // build graph
    val followerGraph: Graph[Array[String], Double] = Graph(users, followers)

    // restrict the graph to users with usernames and names
    val subgraph = followerGraph.subgraph(vpred = (vid, attr) => attr.size == 2)

    // compute PageRank
    val pageRankGraph = subgraph.pageRank(0.001)

    // get attributes of the top pagerank users
    val userInfoWithPageRank = subgraph.outerJoinVertices(pageRankGraph.vertices) {
      case (uid, attrList, Some(pr)) => (pr, attrList.toList)
      case (uid, attrList, None) => (0.0, attrList.toList)
    }

    println(userInfoWithPageRank.vertices.top(5)(Ordering.by(_._2._1)).mkString("\n"))
  }
} 
Example 17
Source File: SparkPersistence.scala    From csb   with GNU General Public License v3.0 5 votes vote down vote up
package edu.msstate.dasi.csb.persistence

import java.io.File

import edu.msstate.dasi.csb.model.{EdgeData, VertexData}
import edu.msstate.dasi.csb.sc
import edu.msstate.dasi.csb.util.Util
import org.apache.hadoop.fs.FileUtil
import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.storage.StorageLevel

object SparkPersistence extends GraphPersistence {
  private val vertices_suffix = "_vertices"
  private val edges_suffix = "_edges"

  
  def saveAsText(graph: Graph[VertexData, EdgeData], graphName: String, overwrite: Boolean = false): Unit = {
    val verticesPath = graphName + vertices_suffix
    val verticesTmpPath = "__" + verticesPath
    val edgesPath = graphName + edges_suffix
    val edgesTmpPath = "__" + edgesPath

    if (overwrite) {
      FileUtil.fullyDelete(new File(verticesPath))
      FileUtil.fullyDelete(new File(edgesPath))
    }

    graph.vertices.saveAsTextFile(verticesTmpPath)
    Util.merge(verticesTmpPath, verticesPath)
    FileUtil.fullyDelete(new File(verticesTmpPath))

    graph.edges.saveAsTextFile(edgesTmpPath)
    Util.merge(edgesTmpPath, edgesPath)
    FileUtil.fullyDelete(new File(edgesTmpPath))
  }
} 
Example 18
Source File: BFS.scala    From csb   with GNU General Public License v3.0 5 votes vote down vote up
package edu.msstate.dasi.csb.workload.spark

import edu.msstate.dasi.csb.workload.Workload
import org.apache.spark.graphx.{Graph, VertexId}

import scala.reflect.ClassTag


  def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]): Unit = {
    // if (src == dst) return List(src)
    if (src == dst) return

    // The attribute of each vertex is (dist from src, id of vertex with dist-1)
    var g: Graph[(Int, VertexId), ED] = graph.mapVertices((id, _) => (if (id == src) 0 else Int.MaxValue, 0L)).cache()

    // Traverse forward from src
    var dstAttr = (Int.MaxValue, 0L)
    while (dstAttr._1 == Int.MaxValue) {
      val msgs = g.aggregateMessages[(Int, VertexId)](e => if (e.srcAttr._1 != Int.MaxValue && e.srcAttr._1 + 1 < e.dstAttr._1) {
        e.sendToDst((e.srcAttr._1 + 1, e.srcId))
      }, (a, b) => if (a._1 < b._1) a else b).cache()

      // if (msgs.count == 0) return List.empty
      if (msgs.count == 0) return

      g = g.ops.joinVertices(msgs) { (_, oldAttr, newAttr) =>
        if (newAttr._1 < oldAttr._1) newAttr else oldAttr
      }.cache()

      dstAttr = g.vertices.filter(_._1 == dst).first()._2
    }

    // Traverse backward from dst and collect the path
    var path: List[VertexId] = dstAttr._2 :: dst :: Nil
    while (path.head != src) {
      path = g.vertices.filter(_._1 == path.head).first()._2._2 :: path
    }

    // path
  }
} 
Example 19
Source File: SSSP.scala    From csb   with GNU General Public License v3.0 5 votes vote down vote up
package edu.msstate.dasi.csb.workload.spark

import edu.msstate.dasi.csb.workload.Workload
import org.apache.spark.graphx.{Graph, VertexId}

import scala.reflect.ClassTag


  def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]): Unit = {
    for (dst <- graph.vertices.keys.toLocalIterator) {
      bfs(graph, src, dst)
    }
  }

  private def bfs[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED], src: VertexId, dst: VertexId): Unit = {
    // if (src == dst) return List(src)
    if (src == dst) return

    // The attribute of each vertex is (dist from src, id of vertex with dist-1)
    var g: Graph[(Int, VertexId), ED] = graph.mapVertices((id, _) => (if (id == src) 0 else Int.MaxValue, 0L)).cache()

    // Traverse forward from src
    var dstAttr = (Int.MaxValue, 0L)
    while (dstAttr._1 == Int.MaxValue) {
      val msgs = g.aggregateMessages[(Int, VertexId)](e => if (e.srcAttr._1 != Int.MaxValue && e.srcAttr._1 + 1 < e.dstAttr._1) {
        e.sendToDst((e.srcAttr._1 + 1, e.srcId))
      }, (a, b) => if (a._1 < b._1) a else b).cache()

      // if (msgs.count == 0) return List.empty
      if (msgs.count == 0) return

      g = g.ops.joinVertices(msgs) { (_, oldAttr, newAttr) =>
        if (newAttr._1 < oldAttr._1) newAttr else oldAttr
      }.cache()

      dstAttr = g.vertices.filter(_._1 == dst).first()._2
    }

    // Traverse backward from dst and collect the path
    var path: List[VertexId] = dstAttr._2 :: dst :: Nil
    while (path.head != src) {
      path = g.vertices.filter(_._1 == path.head).first()._2._2 :: path
    }

    // path
  }
} 
Example 20
Source File: ClosenessCentrality.scala    From csb   with GNU General Public License v3.0 5 votes vote down vote up
package edu.msstate.dasi.csb.workload.spark

import edu.msstate.dasi.csb.workload.Workload
import org.apache.spark.graphx.{EdgeDirection, Graph, VertexId}

import scala.collection.mutable
import scala.reflect.ClassTag


  def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]): Unit = {
    getClosenessOfVert(vertex, graph)
  }

  private class DistanceNodePair(var distance: Long, var totalPairs: Long) extends Comparable[DistanceNodePair] {

    override def compareTo(dp: DistanceNodePair): Int = (this.distance - dp.distance).toInt
  }

  private class NodeVisitCounter extends java.io.Serializable {

    var totalPairs: Long = _

    var levelSize: mutable.HashMap[Long, Long] = _ //first is distance second is pair at that distance
  }

  private def BFSNode[VD: ClassTag, ED: ClassTag](nID: Long, graph: Graph[VD, ED]): NodeVisitCounter = {
    val q = new mutable.Queue[Long]()
    q.enqueue(nID)
    val visited = new mutable.HashSet[VertexId]()
    val levelSize = new mutable.HashMap[Long, Long]()
    visited.add(nID)
    var totalPairs: Long = 0
    val visitCounter = new NodeVisitCounter()
    var level = 0
    while (q.nonEmpty) {
      val size = q.size
      totalPairs += size
      if (level != 0) {
        levelSize.put(level, size)
      }

      val list: Array[Long] = new Array[Long](size)
      for (x <- 0 until size) {
        list(x) = q.dequeue()
      }
      var children: Array[VertexId] = null
      if (list.length > 0) {
        for (x <- list) {
          val node: VertexId = x
          if (graph.collectNeighborIds(EdgeDirection.Out).lookup(node).nonEmpty) {
            children = graph.collectNeighborIds(EdgeDirection.Out).lookup(node).head
            //        children = hashmap.value.get(x).head
            for (c: Long <- children) {
              // val childNode = graph.vertices.lookup(c) //hashmap.value.get(c).head
              if (!visited.contains(c)) {
                q.enqueue(c)
                visited.add(c)
              }
            }
          }
        }
      }
      level += 1
    }
    totalPairs -= 1

    visitCounter.levelSize = levelSize
    visitCounter.totalPairs = totalPairs

    visitCounter
  }

  private def getClosenessOfVert[VD: ClassTag, ED: ClassTag](vertex: VertexId, graph: Graph[VD, ED]): Double = {
    val visitCenter = BFSNode(vertex, graph)

    var denominator: Long = 0L
    for (x <- visitCenter.levelSize.keySet) {
      denominator += visitCenter.levelSize.get(x).head * x
    }
    if (denominator == 0) return -1
    val count = graph.vertices.count().toDouble
    count / denominator
  }
} 
Example 21
Source File: FindInfluencer.scala    From spark-graphx-twitter   with Apache License 2.0 5 votes vote down vote up
package com.knoldus.spark.graphx.example

import org.apache.spark.graphx.{Edge, EdgeDirection, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object FindInfluencer {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Twittter Influencer").setMaster("local[*]")
    val sparkContext = new SparkContext(conf)
    sparkContext.setLogLevel("ERROR")

    val twitterData = sparkContext.textFile("src/main/resources/twitter-graph-data.txt")

    val followeeVertices: RDD[(VertexId, String)] = twitterData.map(_.split(",")).map { arr =>
      val user = arr(0).replace("((", "")
      val id = arr(1).replace(")", "")
      (id.toLong, user)
    }

    val followerVertices: RDD[(VertexId, String)] = twitterData.map(_.split(",")).map { arr =>
      val user = arr(2).replace("(", "")
      val id = arr(3).replace("))", "")
      (id.toLong, user)
    }

    val vertices = followeeVertices.union(followerVertices)
    val edges: RDD[Edge[String]] = twitterData.map(_.split(",")).map { arr =>
      val followeeId = arr(1).replace(")", "").toLong
      val followerId = arr(3).replace("))", "").toLong
      Edge(followeeId, followerId, "follow")
    }

    val defaultUser = ("")
    val graph = Graph(vertices, edges, defaultUser)

    val subGraph = graph.pregel("", 2, EdgeDirection.In)((_, attr, msg) =>
      attr + "," + msg,
      triplet => Iterator((triplet.srcId, triplet.dstAttr)),
      (a, b) => (a + "," + b))

    val lengthRDD = subGraph.vertices.map(vertex => (vertex._1, vertex._2.split(",").distinct.length - 2)).max()(new Ordering[Tuple2[VertexId, Int]]() {
      override def compare(x: (VertexId, Int), y: (VertexId, Int)): Int =
        Ordering[Int].compare(x._2, y._2)
    })

    val userId = graph.vertices.filter(_._1 == lengthRDD._1).map(_._2).collect().head
    println(userId + " has maximum influence on network with " + lengthRDD._2 + " influencers.")

    sparkContext.stop()
  }
} 
Example 22
Source File: AbstractPipeClusteringGraph.scala    From sddf   with GNU General Public License v3.0 5 votes vote down vote up
package de.unihamburg.vsis.sddf.clustering

import org.apache.spark.graphx.Edge
import org.apache.spark.graphx.Graph
import org.apache.spark.graphx.VertexId
import org.apache.spark.rdd.RDD

import de.unihamburg.vsis.sddf.pipe.PipeElement
import de.unihamburg.vsis.sddf.pipe.context.AbstractPipeContext
import de.unihamburg.vsis.sddf.reading.SymPair
import de.unihamburg.vsis.sddf.reading.Tuple
import de.unihamburg.vsis.sddf.similarity.aggregator.Mean
import de.unihamburg.vsis.sddf.visualisation.model.BasicAnalysable


abstract class AbstractPipeClusteringGraph
  extends PipeElement[RDD[(SymPair[Tuple], Array[Double])], RDD[Set[Tuple]]]
  with Serializable {
  
  def cluster(graph: Graph[Tuple, Double]): RDD[Set[Tuple]]

  def step(input: RDD[(SymPair[Tuple], Array[Double])])(implicit pipeContext: AbstractPipeContext): RDD[Set[Tuple]] = {
    
    val duplicatePairsWithSimilarity = input.map(
      pair => (pair._1, Mean.agrSimilarity(pair._2))
    )
    
    val edges: RDD[Edge[Double]] = duplicatePairsWithSimilarity.map(
      pair => { Edge(pair._1._1.id, pair._1._2.id, pair._2) }
    )

    // TODO optimize: it would be nice to build the graph only by using edge triplets
    // but as far as I know that's not possible
    val verticesNotUnique: RDD[(VertexId, Tuple)] = duplicatePairsWithSimilarity.map(_._1).flatMap(
      tuplePair => Seq(tuplePair._1, tuplePair._2)
    ).map(tuple => (tuple.id, tuple))

    // delete all duplicate vertices
    val vertices = verticesNotUnique.distinct()

    // The edge type Boolean is just a workaround because no edge types are needed
    val graph: Graph[Tuple, Double] = Graph.apply(vertices, edges, null)
    
    cluster(graph)
  }

} 
Example 23
Source File: InputDataFlow.scala    From spark-graphx   with GNU General Public License v3.0 5 votes vote down vote up
package com.github.graphx.pregel.social

import org.apache.spark.graphx.{Edge, VertexId}

import scala.collection.mutable.ListBuffer

object InputDataFlow {

  def parseNames(line: String): Option[(VertexId, String)] = {
    val fields = line.split('\t')
    if (fields.length > 1)
      Some(fields(0).trim().toLong, fields(1))
    else None
  }

  def makeEdges(line: String): List[Edge[Int]] = {
    var edges = new ListBuffer[Edge[Int]]()
    val fields = line.split(" ")
    val origin = fields(0)
    (1 until fields.length)
      .foreach { p =>
        edges += Edge(origin.toLong, fields(p).toLong, 0)
      }
    edges.toList
  }

} 
Example 24
Source File: ShortestPathProblemJob.scala    From spark-graphx   with GNU General Public License v3.0 5 votes vote down vote up
package com.github.graphx.pregel.jobs.ssp

import com.github.graphx.pregel.ssp.ShortestPathProblem
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkContext
import org.apache.spark.graphx.VertexId

object ShortestPathProblemJob extends App {
  Logger.getLogger("org").setLevel(Level.ERROR)
  val sc = new SparkContext("local[*]", "ShortestPathProblemDemo")
  val ssp = new ShortestPathProblem(sc)

  val sourceIdForTest: VertexId = 3
  val sourceIdForRandom: VertexId = 75

  val testGraph = ssp.testGraph
  val resultOnTestGraph = ssp.shortestPath(testGraph, sourceIdForTest)
  println(s"Test Graph:\n${ssp.graphToString(testGraph)}\n\n" +
    s"Distances on the test graph $resultOnTestGraph\n")

  val randomGraph = ssp.randomGraph
  val resultOnRandomGraph = ssp.shortestPath(randomGraph, sourceIdForRandom)
  println(s"Distances on the random graph $resultOnRandomGraph\n")
} 
Example 25
package com.github.maxpumperla.ml_spark.graphs

import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
//import org.graphframes._


object GraphFramesExample extends App {

    val conf = new SparkConf()
      .setAppName("RDD graph")
      .setMaster("local[4]")
    val sc = new SparkContext(conf)


    val vertices: RDD[(VertexId, String)] = sc.parallelize(
      Array((1L, "Anne"),
        (2L, "Bernie"),
        (3L, "Chris"),
        (4L, "Don"),
        (5L, "Edgar")))

    val edges: RDD[Edge[String]] = sc.parallelize(
      Array(Edge(1L, 2L, "likes"),
        Edge(2L, 3L, "trusts"),
        Edge(3L, 4L, "believes"),
        Edge(4L, 5L, "worships"),
        Edge(1L, 3L, "loves"),
        Edge(4L, 1L, "dislikes")))

    val friendGraph: Graph[String, String] = Graph(vertices, edges)

//    val friendGraphFrame = GraphFrame.fromGraphX(friendGraph)
//
//    friendGraphFrame.find("(v1)-[e1]->(v2); (v2)-[e2]->(v3)").filter(
//      "e1.attr = 'trusts' OR v3.attr = 'Chris'"
//    ).collect.foreach(println)

} 
Example 26
Source File: LocalRunner.scala    From spark-betweenness   with Apache License 2.0 5 votes vote down vote up
package com.centrality.kBC

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.graphx.Edge
import org.apache.spark.graphx.Graph
import org.apache.spark.graphx.VertexId
import org.apache.spark.rdd.RDD

object MainRunner 
{
  def main(args: Array[String])
  {
    // Create spark context
    val appName="kBC"
    val sparkMode="local"
    val conf = new SparkConf().setAppName(appName).setMaster(sparkMode);
    val sc = new SparkContext(conf);
    
    // Create sample graph
    //
    // Create an RDD for vertices
    val users: RDD[(VertexId, (String, String))] =
    sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
                         (5L, ("franklin", "prof")), (2L, ("istoica", "prof"))))
    // Create an RDD for edges
    val relationships: RDD[Edge[String]] =
      sc.parallelize(Array(Edge(3L, 7L, "collab"),    Edge(5L, 3L, "advisor"),
                           Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi")))
    // Define a default user in case there are relationship with missing user
    val defaultUser = ("John Doe", "Missing")
    // Build the initial Graph
    val graph = Graph(users, relationships, defaultUser)
    
    val kBCGraph = 
      KBetweenness.run(graph, 3)
  }
} 
Example 27
Source File: VertexAPI.scala    From Hands-On-Big-Data-Analytics-with-PySpark   with MIT License 5 votes vote down vote up
package com.tomekl007.chapter_7

import org.apache.spark.SparkContext
import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.scalatest.FunSuite

class VertexAPI extends FunSuite {
  val spark: SparkContext = SparkSession.builder().master("local[2]").getOrCreate().sparkContext

  test("Should use Vertex API") {
    //given
    val users: RDD[(VertexId, (String))] =
      spark.parallelize(Array(
        (1L, "a"),
        (2L, "b"),
        (3L, "c"),
        (4L, "d")
      ))


    val relationships =
      spark.parallelize(Array(
        Edge(1L, 2L, "friend"),
        Edge(1L, 3L, "friend"),
        Edge(2L, 4L, "wife")
      ))

    val graph = Graph(users, relationships)

    //when
    val res = graph.mapVertices((_, att) => att.toUpperCase())
    res.vertices.collect().toList
  }

} 
Example 28
Source File: EdgeAPI.scala    From Hands-On-Big-Data-Analytics-with-PySpark   with MIT License 5 votes vote down vote up
package com.tomekl007.chapter_7

import org.apache.spark.SparkContext
import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.scalatest.FunSuite

class EdgeAPI extends FunSuite {
  val spark: SparkContext = SparkSession.builder().master("local[2]").getOrCreate().sparkContext

  test("Should use Edge API") {
    //given
    val users: RDD[(VertexId, (String))] =
      spark.parallelize(Array(
        (1L, "a"),
        (2L, "b"),
        (3L, "c"),
        (4L, "d")
      ))


    val relationships =
      spark.parallelize(Array(
        Edge(1L, 2L, "friend"),
        Edge(1L, 3L, "friend"),
        Edge(2L, 4L, "wife")
      ))

    val graph = Graph(users, relationships)

    //when
    val res = graph.mapEdges(e => e.attr.toUpperCase)

    println(res.edges.collect().toList)
  }

} 
Example 29
Source File: SSSPExample.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.graphx

// $example on$
import org.apache.spark.graphx.{Graph, VertexId}
import org.apache.spark.graphx.util.GraphGenerators
// $example off$
import org.apache.spark.sql.SparkSession


object SSSPExample {
  def main(args: Array[String]): Unit = {
    // Creates a SparkSession.
    val spark = SparkSession
      .builder
      .appName(s"${this.getClass.getSimpleName}")
      .getOrCreate()
    val sc = spark.sparkContext

    // $example on$
    // A graph with edge attributes containing distances
    val graph: Graph[Long, Double] =
      GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble)
    val sourceId: VertexId = 42 // The ultimate source
    // Initialize the graph such that all vertices except the root have distance infinity.
    val initialGraph = graph.mapVertices((id, _) =>
        if (id == sourceId) 0.0 else Double.PositiveInfinity)
    val sssp = initialGraph.pregel(Double.PositiveInfinity)(
      (id, dist, newDist) => math.min(dist, newDist), // Vertex Program
      triplet => {  // Send Message
        if (triplet.srcAttr + triplet.attr < triplet.dstAttr) {
          Iterator((triplet.dstId, triplet.srcAttr + triplet.attr))
        } else {
          Iterator.empty
        }
      },
      (a, b) => math.min(a, b) // Merge Message
    )
    println(sssp.vertices.collect.mkString("\n"))
    // $example off$

    spark.stop()
  }
}
// scalastyle:on println 
Example 30
Source File: SSSPExample.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.graphx

// $example on$
import org.apache.spark.graphx.{Graph, VertexId}
import org.apache.spark.graphx.util.GraphGenerators
// $example off$
import org.apache.spark.sql.SparkSession


object SSSPExample {
  def main(args: Array[String]): Unit = {
    // Creates a SparkSession.
    val spark = SparkSession
      .builder
      .appName(s"${this.getClass.getSimpleName}")
      .getOrCreate()
    val sc = spark.sparkContext

    // $example on$
    // A graph with edge attributes containing distances
    val graph: Graph[Long, Double] =
      GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble)
    val sourceId: VertexId = 42 // The ultimate source
    // Initialize the graph such that all vertices except the root have distance infinity.
    val initialGraph = graph.mapVertices((id, _) =>
        if (id == sourceId) 0.0 else Double.PositiveInfinity)
    val sssp = initialGraph.pregel(Double.PositiveInfinity)(
      (id, dist, newDist) => math.min(dist, newDist), // Vertex Program
      triplet => {  // Send Message
        if (triplet.srcAttr + triplet.attr < triplet.dstAttr) {
          Iterator((triplet.dstId, triplet.srcAttr + triplet.attr))
        } else {
          Iterator.empty
        }
      },
      (a, b) => math.min(a, b) // Merge Message
    )
    println(sssp.vertices.collect.mkString("\n"))
    // $example off$

    spark.stop()
  }
}
// scalastyle:on println 
Example 31
Source File: SSSPExample.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.graphx

// $example on$
import org.apache.spark.graphx.{Graph, VertexId}
import org.apache.spark.graphx.util.GraphGenerators
// $example off$
import org.apache.spark.sql.SparkSession


object SSSPExample {
  def main(args: Array[String]): Unit = {
    // Creates a SparkSession.
    val spark = SparkSession
      .builder
      .appName(s"${this.getClass.getSimpleName}")
      .getOrCreate()
    val sc = spark.sparkContext

    // $example on$
    // A graph with edge attributes containing distances
    val graph: Graph[Long, Double] =
      GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble)
    val sourceId: VertexId = 42 // The ultimate source
    // Initialize the graph such that all vertices except the root have distance infinity.
    val initialGraph = graph.mapVertices((id, _) =>
        if (id == sourceId) 0.0 else Double.PositiveInfinity)
    val sssp = initialGraph.pregel(Double.PositiveInfinity)(
      (id, dist, newDist) => math.min(dist, newDist), // Vertex Program
      triplet => {  // Send Message
        if (triplet.srcAttr + triplet.attr < triplet.dstAttr) {
          Iterator((triplet.dstId, triplet.srcAttr + triplet.attr))
        } else {
          Iterator.empty
        }
      },
      (a, b) => math.min(a, b) // Merge Message
    )
    println(sssp.vertices.collect.mkString("\n"))
    // $example off$

    spark.stop()
  }
}
// scalastyle:on println 
Example 32
Source File: ShortestPathLengthsFromCSV.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.examples

import ml.sparkling.graph.api.operators.algorithms.shortestpaths.ShortestPathsTypes
import ml.sparkling.graph.api.operators.algorithms.shortestpaths.ShortestPathsTypes._
import ml.sparkling.graph.operators.algorithms.shortestpaths.ShortestPathsAlgorithm
import ml.sparkling.graph.operators.algorithms.shortestpaths.pathprocessors.fastutils.FastUtilWithDistance.DataMap
import ml.sparkling.graph.operators.predicates.AllPathPredicate
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.graphx.{Graph, VertexId}

import scala.collection.JavaConversions._

object ShortestPathLengthsFromCSV extends ExampleApp {
def body()={
  val shortestPaths =if(bucketSize == -1l)
    ShortestPathsAlgorithm.computeShortestPathsLengths(partitionedGraph,AllPathPredicate,treatAsUndirected)
  else
    ShortestPathsAlgorithm.computeShortestPathsLengthsIterative(partitionedGraph,(g:Graph[_,_])=>bucketSize,treatAsUndirected)
  val size: Broadcast[VertexId] =ctx.broadcast(partitionedGraph.numVertices)
  partitionedGraph.outerJoinVertices(shortestPaths.vertices)(Util.dataTransformFunction(size) _).vertices.values.saveAsTextFile(out)
  ctx.stop()
}
}


private object Util{
  def dataTransformFunction(size: Broadcast[VertexId])(vId: VertexId,oldValue: String,pathsOption: Option[_ >: DataMap <: JMap[JLong, JDouble]])={
    pathsOption.flatMap((paths)=>{
      var entries=paths.entrySet().toList.sortBy(_.getKey)
      val out=new StringBuilder()
      out++=s"${oldValue},"
      var a = 0l
      while (a < size.value) {
        if (entries.size > 0 && a == entries.head.getKey) {
          out ++= s"${entries.head.getValue},"
          entries = entries.drop(1)
        }
        else {
          out ++= "0,"
        }
        a += 1l
      }
      out.setLength(out.length - 1)
      Option(out.toString())
    }).getOrElse(oldValue)
  }
} 
Example 33
Source File: PSCANBasedPartitioning.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.partitioning

import java.util.UUID

import ml.sparkling.graph.api.operators.algorithms.community.CommunityDetection.ComponentID
import ml.sparkling.graph.operators.algorithms.community.pscan.PSCAN
import ml.sparkling.graph.operators.partitioning.PropagationBasedPartitioning.{DefaultPartitionOperator, logger}
import org.apache.log4j.Logger
import org.apache.spark.SparkContext
import org.apache.spark.graphx.{Graph, VertexId}

import scala.collection.mutable
import scala.reflect.ClassTag


object PSCANBasedPartitioning {

  @transient
  val logger=Logger.getLogger(PSCANBasedPartitioning.getClass())

  def partitionGraphBy[VD:ClassTag,ED:ClassTag](graph:Graph[VD,ED],numberOfPartitions:Int, maxIterations:Int = Int.MaxValue)(implicit sc:SparkContext): Graph[VD, ED] ={
    val (numberOfCommunities: VertexId,  coarsedVertexMap: Map[VertexId, Int], coarsedNumberOfPartitions: Int, strategy: ByComponentIdPartitionStrategy) = buildPartitioningStrategy(graph, numberOfPartitions, maxIterations = maxIterations)
    logger.info(s"Partitioning graph using coarsed map with ${coarsedVertexMap.size} entries and ${coarsedNumberOfPartitions} partitions (before ${numberOfCommunities})")
    val out=graph.partitionBy(strategy,numberOfPartitions).cache()
    out.edges.foreachPartition((_)=>{})
    out.triplets.foreachPartition((_)=>{})
    out.vertices.foreachPartition((_)=>{})
    out
  }


  def buildPartitioningStrategy[ED: ClassTag, VD: ClassTag](graph: Graph[VD, ED], numberOfPartitions: Int, maxIterations:Int = Int.MaxValue)(implicit sc:SparkContext) = {
    val (numberOfCommunities: VertexId, coarsedVertexMap: Map[VertexId, Int], coarsedNumberOfPartitions: Int) = precomputePartitions(graph, numberOfPartitions, maxIterations = maxIterations)
    logger.info(s"Requested $numberOfPartitions partitions, computed $coarsedNumberOfPartitions")
    val strategy = ByComponentIdPartitionStrategy(coarsedVertexMap, numberOfPartitions, DefaultPartitionOperator)
    (numberOfCommunities, coarsedVertexMap, coarsedNumberOfPartitions, strategy)
  }

  def precomputePartitions[ED: ClassTag, VD: ClassTag](graph: Graph[VD, ED], numberOfPartitions: Int, maxIterations:Int = Int.MaxValue)(implicit sc:SparkContext) = {
    logger.info("Computing components using PSCAN")
    val (communities, numberOfCommunities): (Graph[ComponentID, ED], VertexId) = PSCAN.computeConnectedComponentsUsing(graph, numberOfPartitions, maxIterations = maxIterations)
    val computationData=communities.vertices.map(t=>t).localCheckpoint()
    logger.info("Components computed!")
    val (coarsedVertexMap, coarsedNumberOfPartitions) = ParallelPartitioningUtils.coarsePartitions(numberOfPartitions, numberOfCommunities, computationData)
    (numberOfCommunities, coarsedVertexMap, coarsedNumberOfPartitions)
  }
} 
Example 34
Source File: CommunityBasedPartitioning.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.partitioning


import ml.sparkling.graph.api.operators.algorithms.community.CommunityDetection.{CommunityDetectionAlgorithm, CommunityDetectionMethod, ComponentID}
import ml.sparkling.graph.operators.partitioning.PropagationBasedPartitioning.DefaultPartitionOperator
import org.apache.log4j.Logger
import org.apache.spark.{Partitioner, SparkContext}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.graphx.{Graph, PartitionID, PartitionStrategy, VertexId}

import scala.reflect.ClassTag


object CommunityBasedPartitioning {
  @transient
  val logger=Logger.getLogger(CommunityBasedPartitioning.getClass())

  def partitionGraphBy[VD:ClassTag,ED:ClassTag](graph:Graph[VD,ED],communityDetectionMethod:CommunityDetectionMethod[VD,ED],numParts:Int= -1)(implicit sc:SparkContext): Graph[VD, ED] ={
    val numberOfPartitions=if (numParts== -1) sc.defaultParallelism else numParts
    val communities: Graph[ComponentID, ED] = communityDetectionMethod(graph)
    val numberOfCommunities=communities.vertices.values.countApproxDistinct()
    val (coarsedVertexMap,coarsedNumberOfPartitions) = ParallelPartitioningUtils.coarsePartitions(numberOfPartitions,numberOfCommunities,communities.vertices)
    val strategy=ByComponentIdPartitionStrategy(coarsedVertexMap,coarsedNumberOfPartitions, DefaultPartitionOperator)
    logger.info(s"Partitioning graph using coarsed map with ${coarsedVertexMap.size} entries  and ${coarsedNumberOfPartitions} partitions")
    val out=graph.partitionBy(strategy,numberOfCommunities.toInt).cache()
    out.edges.foreachPartition((_)=>{})
    out.vertices.foreachPartition((_)=>{})
    out
  }


  def partitionGraphUsing[VD:ClassTag,ED:ClassTag](graph:Graph[VD,ED],communityDetectionMethod:CommunityDetectionAlgorithm,numParts:Int= -1)(implicit sc:SparkContext): Graph[VD, ED] ={
    partitionGraphBy(graph,communityDetectionMethod.detectCommunities[VD,ED](_),numParts)
  }



} 
Example 35
Source File: CFBCFlow.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.flow.struct

import org.apache.spark.graphx.VertexId


class CFBCFlow(val src: VertexId, val dst: VertexId, val potential: Double, val completed: Boolean, val aliveThrough: Int) extends Serializable {
  def supplyValue(vertexId: VertexId) = vertexId match {
    case `src` => 1
    case `dst` => -1
    case _ => 0
  }

  val key = (src, dst)

  val removable = completed && aliveThrough <= 0

  def countdownVitality = if (aliveThrough > 0) CFBCFlow(src, dst, potential, completed, aliveThrough - 1) else this
}

object CFBCFlow extends Serializable {
  def apply(src: VertexId,
            dst: VertexId,
            potential: Double = 1.0,
            completed: Boolean = false,
            aliveThrough: Int = 3
           ): CFBCFlow = new CFBCFlow(src, dst, potential, completed, aliveThrough)

  def updatePotential(flow: CFBCFlow, newPotential: Double, eps: Double = 0.0) = {
    val completed = Math.abs(flow.potential - newPotential) > eps
    CFBCFlow(flow.src, flow.dst, newPotential, completed, flow.aliveThrough)
  }

  def empty(key: (VertexId, VertexId)) = key match { case (src, dst) =>  CFBCFlow(src, dst, 0.0) }
} 
Example 36
Source File: CFBCVertex.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.flow.struct

import org.apache.spark.graphx.VertexId


class CFBCVertex(
                  val id: VertexId,
                  val degree: Int,
                  val bc: Double,
                  val sampleVertices: Array[VertexId],
                  val flows: (Array[CFBCFlow], Iterable[CFBCNeighbourFlow]),
                  val processedFlows: Int) extends Serializable {
  lazy val relatedFlows = vertexFlows.filter(f => f.dst == id || f.src == id)
  lazy val availableSamples = sampleVertices

  lazy val vertexPhi = vertexFlows.count(_.src == id)

  lazy val flowsMap = vertexFlows.map(f => ((f.src, f.dst), f)).toMap

  val (vertexFlows, neighboursFlows) = flows

  def isFinalized(k: Int) = sampleVertices.isEmpty || processedFlows >= k

  def getFlow(key: (VertexId, VertexId)) = flowsMap.getOrElse(key, CFBCFlow.empty(key))

  def updateBC(currentFlowing: Double) = {
    val newBC = (processedFlows * bc + currentFlowing) / (processedFlows + 1)
    new CFBCVertex(id, degree, newBC, sampleVertices, flows, processedFlows + 1)
  }

  def updateBC(currentFlowing: Seq[Double]) = {
    val newBC = if (currentFlowing.isEmpty) bc else (processedFlows * bc + currentFlowing.sum) / (processedFlows + currentFlowing.length)
    new CFBCVertex(id, degree, newBC, sampleVertices, flows, processedFlows + currentFlowing.length)
  }

  def addNewFlow(flow: CFBCFlow) =
    new CFBCVertex(id, degree, bc, sampleVertices.filterNot(_ == flow.dst), (vertexFlows :+ flow, neighboursFlows), processedFlows)

  def updateFlows(fls: Array[CFBCFlow]) =
    new CFBCVertex(id, degree, bc, sampleVertices, (fls, neighboursFlows), processedFlows)

  def removeFlows(toRemove: Seq[CFBCFlow]) = {
    val newFlows = vertexFlows.diff(toRemove).map(_.countdownVitality)
    new CFBCVertex(id, degree, bc, sampleVertices, (newFlows, neighboursFlows), processedFlows)
  }

  def applyNeighbourFlows(nbhFlows: Iterable[CFBCNeighbourFlow]) =
    new CFBCVertex(id, degree, bc, sampleVertices, (vertexFlows, nbhFlows), processedFlows)
}

object CFBCVertex extends Serializable {
  def apply(id: VertexId,
            degree: Int,
            bc: Double = 0.0,
            sampleVertices: Array[VertexId] = Array.empty,
            flows: (Array[CFBCFlow], Iterable[CFBCNeighbourFlow]) = (Array.empty, Iterable.empty)
           ): CFBCVertex = new CFBCVertex(id, degree, bc, sampleVertices, flows, 0)
} 
Example 37
Source File: CFBCNeighbourFlow.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.flow.struct

import org.apache.spark.graphx.VertexId


class CFBCNeighbourFlow(
                         val src: VertexId,
                         val dst: VertexId,
                         val sumOfPotential: Double,
                         val sumOfDifferences: Double,
                         val numberOfFlows: Int,
                         val allCompleted: Boolean,
                         val anyCompleted: Boolean) extends Serializable {

  val key = (src, dst)
}

object CFBCNeighbourFlow extends Serializable {
  def apply(src: VertexId,
            dst: VertexId,
            sumOfPotential: Double = .0,
            sumOfDifferences: Double = .0,
            numberOfFlows: Int = 0,
            allCompleted: Boolean = true,
            anyCompleted: Boolean = true
           ): CFBCNeighbourFlow = new CFBCNeighbourFlow(src, dst, sumOfPotential, sumOfDifferences, numberOfFlows, allCompleted, anyCompleted)

  def apply(key: (VertexId, VertexId)): CFBCNeighbourFlow = key match { case (src, dst) => apply(src, dst) }

  def apply(flows: Iterable[CFBCFlow], vertex: CFBCVertex): CFBCNeighbourFlow = {

    def aggregatePotential(vertexFlow: CFBCFlow)(acc: NeighbourFlowStats, flow: CFBCFlow) =
      NeighbourFlowStats.fromFlow(vertexFlow)(flow).merge(acc)

    def mergePotential(acc1: NeighbourFlowStats, acc2: NeighbourFlowStats) = acc1.merge(acc2)

    val (src, dst) = flows.headOption.map(_.key) match {
      case Some(k) => k
      case None => throw new RuntimeException("Empty flows!")
    }
    val aggregaeFunc = aggregatePotential(vertex.getFlow((src, dst))) _
    val stats = flows.aggregate(NeighbourFlowStats.empty)(aggregaeFunc, mergePotential)
    CFBCNeighbourFlow(src, dst, stats.potential, stats.sumPotentialDiff, flows.size, stats.allCompleted, stats.anyCompleted)
  }

  class NeighbourFlowStats( val potential: Double,
                            val sumPotentialDiff: Double,
                            val allCompleted: Boolean,
                            val anyCompleted: Boolean) extends Serializable {
    def merge(other: NeighbourFlowStats): NeighbourFlowStats = {
      NeighbourFlowStats(
        potential + other.potential,
        sumPotentialDiff + other.sumPotentialDiff,
        allCompleted && other.allCompleted,
        anyCompleted || other.anyCompleted)
    }
  }

  object NeighbourFlowStats extends Serializable {
    def apply(potential: Double, sumPotentialDiff: Double, allCompleted: Boolean, anyCompleted: Boolean): NeighbourFlowStats =
      new NeighbourFlowStats(potential, sumPotentialDiff, allCompleted, anyCompleted)

    def fromFlow(vertexFlow: CFBCFlow)(nbflow: CFBCFlow): NeighbourFlowStats =
      apply(nbflow.potential, Math.abs(nbflow.potential - vertexFlow.potential), nbflow.completed, nbflow.completed)

    def empty = apply(.0, .0, true, false)
  }
} 
Example 38
Source File: EdmondsMessage.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds.struct.messages

import org.apache.spark.graphx.VertexId


class EdmondsMessage(val preds: List[VertexId], val sigma: Int, val depth: Int) extends Serializable {
  def merge(other: EdmondsMessage): EdmondsMessage = {
    require(depth == other.depth)
    EdmondsMessage(preds ++ other.preds, sigma + other.sigma, depth)
  }
}

object EdmondsMessage extends Serializable {
  def apply(preds: List[VertexId], sigma: Int, depth: Int): EdmondsMessage =
    new EdmondsMessage(preds, sigma, depth)

  def empty = apply(List.empty, -1, -1)
}