org.apache.spark.graphx.VertexRDD Scala Examples

The following examples show how to use org.apache.spark.graphx.VertexRDD. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: PipeClusteringStrongestPath.scala    From sddf   with GNU General Public License v3.0 5 votes vote down vote up
package de.unihamburg.vsis.sddf.clustering

import scala.Iterator

import org.apache.spark.graphx.Graph
import org.apache.spark.graphx.VertexRDD

import de.unihamburg.vsis.sddf.reading.Tuple


class PipeClusteringStrongestPath extends PipeClusteringTransitiveClosure {
  
  override def manipulateGraph(graph: Graph[Tuple, Double]): Graph[_, Double] = {

    val cGraph = graph.mapVertices((vid, tuple) => (vid, Double.MinPositiveValue))

    // attach the max adjacent edge attribute to each vertice
    val verticesMaxEdgeAttributes: VertexRDD[Double] = cGraph.mapReduceTriplets(
      edge => {
        Iterator((edge.dstId, edge.attr), (edge.srcId, edge.attr))
      },
      (a: Double, b: Double) => math.max(a, b)
    )

    // join the resulting vertice attributes with the graph
    val maxGraph: Graph[(Tuple, Double), Double] =
      graph.outerJoinVertices(verticesMaxEdgeAttributes)((id, tuple, simOpt) =>
        simOpt match {
          case Some(sim) => (tuple, sim)
          case None      => (tuple, 0D)
        }
      )
      
    // remove edges which have a max value less then src or dst 
    val resultGraph = maxGraph.subgraph(edge => {
      if (edge.attr < edge.srcAttr._2 && edge.attr < edge.dstAttr._2) {
        false
      } else {
        true
      }
    })
    resultGraph
  }

}

object PipeClusteringStrongestPath {
  
  def apply() = new PipeClusteringStrongestPath()

} 
Example 2
Source File: SocialPageRankJob.scala    From spark-graphx   with GNU General Public License v3.0 5 votes vote down vote up
package com.github.graphx.pagerank

import com.github.graphx.pregel.social.SocialGraph
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkContext
import org.apache.spark.graphx.VertexRDD

object SocialPageRankJob {

  
  def static(socialGraph: SocialGraph, tolerance: Double): VertexRDD[Double] =
    socialGraph.graph.staticPageRank(numIter = 20).vertices

  def handleResult(socialGraph: SocialGraph, ranks: VertexRDD[Double]) = {
    socialGraph.verts.join(ranks).map {
      case (_, (username, rank)) => (username, rank)
    }.sortBy({ case (_, rank) => rank }, ascending = false).take(10)
  }

  def main(args: Array[String]): Unit = {
    Logger.getLogger("org").setLevel(Level.ERROR)
    val sc = new SparkContext("local[*]", "PageRank")

    val socialGraph: SocialGraph = new SocialGraph(sc)
    val TOLERANCE: Double = 0.0001

    import scala.compat.Platform.{EOL => D}
    val topUsersDynamically = handleResult(socialGraph, ranks(socialGraph, TOLERANCE)).mkString(D)
    val topUsersIterative = handleResult(socialGraph, static(socialGraph, TOLERANCE)).mkString(D)

    println(s"Top 10 users in network counted with TOLERANCE until convergence $TOLERANCE - $D $topUsersDynamically")
    println(s"Top 10 users in the network counted iteratively - $D $topUsersIterative")

    sc.stop()
  }
} 
Example 3
Source File: EdmondsBCAggregator.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds

import ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds.struct.EdmondsVertex
import org.apache.spark.graphx.{VertexRDD, _}


class EdmondsBCAggregator[ED] extends Serializable {

  def aggregate(graph: Graph[EdmondsVertex, ED], source: VertexId) = {
//    val startTime = System.nanoTime()

    val maxDepth = graph.vertices.aggregate(0)({ case (depth, (vId, vData)) => Math.max(vData.depth, depth) }, Math.max)

    var g = graph
    var oldGraph: Option[Graph[EdmondsVertex, ED]] = None

    var messages = aggregateMessages(g, maxDepth).cache
    messages.count

    for (i <- 1 until maxDepth reverse) {
      oldGraph = Some(g)

      g = applyMessages(g, messages).cache
      val oldMessages = messages
      messages = aggregateMessages(g, i).cache
      messages.count

      oldMessages.unpersist(false)
      oldGraph.foreach(_.unpersistVertices(false))
      oldGraph.foreach(_.edges.unpersist(false))
    }

    messages.unpersist(false)

//    println("Time of execution updateCentrality:" + ((finishTime - startTime) / 1000000) + " ms")
    //    val finishTime = System.nanoTime()
    g
  }

  private def aggregateMessages(graph: Graph[EdmondsVertex, ED], depth: Int) = graph.aggregateMessages[Double](
    edgeContext => {
      val sender = createAndSendMessage(edgeContext.toEdgeTriplet, depth) _
      sender(edgeContext.srcId, edgeContext.sendToDst)
      sender(edgeContext.dstId, edgeContext.sendToSrc)
    }, _ + _
  )

  private def createAndSendMessage(triplet: EdgeTriplet[EdmondsVertex, ED], depth: Int)(source: VertexId, f: (Double) => Unit) = {
    val attr = triplet.vertexAttr(source)
    if (attr.depth == depth) sendMessage(produceMessage(triplet)(source), f)
  }

  private def produceMessage(triplet: EdgeTriplet[EdmondsVertex, ED])(source: VertexId) = {
    val attr = triplet.vertexAttr(source)
    val otherAttr = triplet.otherVertexAttr(source)
    val delta = (otherAttr.sigma.toDouble / attr.sigma.toDouble) * (1.0 + attr.delta)
    if (attr.preds.contains(triplet.otherVertexId(source))) Some(delta) else None
  }

  private def sendMessage(message: Option[Double], f: (Double) => Unit) = message.foreach(f)

  private def applyMessages(graph: Graph[EdmondsVertex, ED], messages: VertexRDD[Double]) =
    graph.ops.joinVertices(messages)((vertexId, attr, delta) => {
      EdmondsVertex(attr.preds, attr.sigma, attr.depth, delta, delta)
    })
} 
Example 4
Source File: Modularity.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.graph

import ml.sparkling.graph.api.operators.algorithms.community.CommunityDetection.ComponentID
import ml.sparkling.graph.api.operators.measures.{VertexDependentGraphMeasure, GraphIndependentMeasure}
import org.apache.spark.graphx.{EdgeTriplet, VertexRDD, Graph}
import org.apache.spark.rdd.RDD

import scala.reflect.ClassTag


object Modularity extends VertexDependentGraphMeasure[Double,ComponentID]{

   def compute[V<:ComponentID:ClassTag,E:ClassTag](graph: Graph[V, E]): Double = {
     val edgesNum=graph.numEdges.toDouble;
     val edgesCounts: RDD[(V, (Int, Int))] = graph.triplets.flatMap(triplet => {
       if (triplet.srcAttr == triplet.dstAttr) {
         Iterator((triplet.srcAttr, (1, 0)),(triplet.srcAttr, (1, 0)))
       } else {
         Iterator((triplet.srcAttr, (0, 1)),(triplet.dstAttr,(0,1)))
       }
     })
     edgesCounts.aggregateByKey((0,0))(
       (agg:(Int,Int),data:(Int,Int))=>
         (agg,data) match{
           case ((a1,b1),(a2,b2))=>(a1+a2,b1+b2)
         },
     (agg1:(Int,Int),agg2:(Int,Int))=>{
       (agg1,agg2) match{
         case ((a1,b1),(a2,b2))=>(a1+a2,b1+b2)
       }
     }
     ).treeAggregate(0.0)(
       (agg:Double,data:(V,(Int,Int)))=>{
         data match{
           case (_,(edgesFull,edgesSome))=>
             agg+(edgesFull/(2.0*edgesNum))-Math.pow((edgesSome+edgesFull)/(2.0*edgesNum),2)
         }
       },
       (agg1,agg2)=>agg1+agg2
     )

  }

} 
Example 5
Source File: BetweennessEdmonds$Test.scala    From sparkling-graph   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds

import java.nio.file.Files

import ml.sparkling.graph.operators.MeasureTest
import org.apache.commons.io.FileUtils
import org.apache.spark.SparkContext
import org.apache.spark.graphx.{Graph, VertexRDD}


class BetweennessEdmonds$Test(implicit sc: SparkContext) extends MeasureTest {
  val tempDir = Files.createTempDirectory("spark-checkpoint")

  override def beforeAll() = {
    sc.setCheckpointDir(tempDir.toAbsolutePath.toString)
  }

  override def afterAll() = {
    FileUtils.deleteDirectory(tempDir.toFile)
  }

  "Edmonds betweenness centrality for random graph" should "be correctly calculated" in {
    Given("graph")
    val filePath = getClass.getResource("/graphs/graph_ER_15")
    val graph: Graph[Int, Int] = loadGraph(filePath.toString)
    When("Computes betweenness")
    val result = EdmondsBC.computeBC(graph)
    Then("Should calculate betweenness correctly")
    val bcFile = getClass.getResource("/graphs/graph_ER_15_bc")
    val bcCorrectValues = sc.textFile(bcFile.getPath)
      .filter(_.nonEmpty)
      .map(l => { val t = l.split("\t", 2); (t(0).toInt, t(1).toDouble) })
      .sortBy({ case (vId, data) => vId })
      .map({ case (vId, data) => data}).collect()
    val bcValues = result.sortBy({ case (vId, data) => vId })
      .map({ case (vId, data) => data }).collect()
    bcCorrectValues.zip(bcValues).foreach({ case (a, b) =>
      a should be(b +- 1e-5)
    })

    result.unpersist(false)
  }

}