org.apache.spark.graphx.Edge Java Examples

The following examples show how to use org.apache.spark.graphx.Edge. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GraphXGraphGenerator.java    From rya with Apache License 2.0 8 votes vote down vote up
public Graph<RyaTypeWritable, RyaTypeWritable> createGraph(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
    StorageLevel storageLvl1 = StorageLevel.MEMORY_ONLY();
    StorageLevel storageLvl2 = StorageLevel.MEMORY_ONLY();
    ClassTag<RyaTypeWritable> RTWTag = ClassTag$.MODULE$.apply(RyaTypeWritable.class);
    RyaTypeWritable rtw = null;
    RDD<Tuple2<Object, RyaTypeWritable>> vertexRDD = getVertexRDD(sc, conf);

    RDD<Tuple2<Object, Edge>> edgeRDD = getEdgeRDD(sc, conf);
    JavaRDD<Tuple2<Object, Edge>> jrddTuple = edgeRDD.toJavaRDD();
    JavaRDD<Edge<RyaTypeWritable>> jrdd = jrddTuple.map(tuple -> tuple._2);

    RDD<Edge<RyaTypeWritable>> goodERDD = JavaRDD.toRDD(jrdd);

    return Graph.apply(vertexRDD, goodERDD, rtw, storageLvl1, storageLvl2, RTWTag, RTWTag);
}
 
Example #2
Source File: GraphXEdgeInputFormat.java    From rya with Apache License 2.0 5 votes vote down vote up
/**
 * Load the next statement by converting the next Accumulo row to a
 * statement, and make the new (key,value) pair available for retrieval.
 *
 * @return true if another (key,value) pair was fetched and is ready to
 *         be retrieved, false if there was none.
 * @throws IOException
 *             if a row was loaded but could not be converted to a
 *             statement.
 */
@Override
public boolean nextKeyValue() throws IOException {
	if (!scannerIterator.hasNext()) {
              return false;
          }
	final Entry<Key, Value> entry = scannerIterator.next();
	++numKeysRead;
	currentKey = entry.getKey();
	try {
		currentK = currentKey.getRow();
		final RyaTypeWritable rtw = new RyaTypeWritable();
		final RyaStatement stmt = this.ryaContext.deserializeTriple(
				this.tableLayout, new TripleRow(entry.getKey().getRow()
						.getBytes(), entry.getKey().getColumnFamily()
						.getBytes(), entry.getKey()
						.getColumnQualifier().getBytes(), entry
						.getKey().getTimestamp(), entry.getKey()
						.getColumnVisibility().getBytes(), entry
						.getValue().get()));

		final long subHash = getVertexId(stmt.getSubject());
		final long objHash = getVertexId(stmt.getObject());
		rtw.setRyaType(stmt.getPredicate());

		final Edge<RyaTypeWritable> writable = new Edge<RyaTypeWritable>(
				subHash, objHash, rtw);
		currentV = writable;
	} catch (final TripleRowResolverException e) {
		throw new IOException(e);
	}
	return true;
}
 
Example #3
Source File: SparkGraphXKickoff.java    From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License 4 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

        System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE);

        final SparkConf conf = new SparkConf()
            .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES)
            .setAppName(APPLICATION_NAME);

        JavaSparkContext javaSparkContext = new JavaSparkContext(conf);        
        
        List<Tuple2<Object, String>> listOfVertex = new ArrayList<>();
        listOfVertex.add(new Tuple2<>(1l, "James"));
        listOfVertex.add(new Tuple2<>(2l, "Andy"));
        listOfVertex.add(new Tuple2<>(3l, "Ed"));
        listOfVertex.add(new Tuple2<>(4l, "Roger"));
        listOfVertex.add(new Tuple2<>(5l, "Tony"));

        List<Edge<String>> listOfEdge = new ArrayList<>();
        listOfEdge.add(new Edge<>(2, 1, "Friend"));
        listOfEdge.add(new Edge<>(3, 1, "Friend"));
        listOfEdge.add(new Edge<>(3, 2, "Colleague"));    
        listOfEdge.add(new Edge<>(3, 5, "Partner"));
        listOfEdge.add(new Edge<>(4, 3, "Boss"));        
        listOfEdge.add(new Edge<>(5, 2, "Partner"));       
    
        JavaRDD<Tuple2<Object, String>> vertexRDD = javaSparkContext.parallelize(listOfVertex);
        JavaRDD<Edge<String>> edgeRDD = javaSparkContext.parallelize(listOfEdge);

        ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
		
        Graph<String, String> graph = Graph.apply(
            vertexRDD.rdd(), 
            edgeRDD.rdd(), 
            "", 
            StorageLevel.MEMORY_ONLY(), 
			StorageLevel.MEMORY_ONLY(), 
			stringTag, 
			stringTag
            );    

        //apply specific algorithms, such as PageRank

        graph.vertices()
            .saveAsTextFile(VERTICES_FOLDER_PATH);        
			 
        graph.edges()
	    .saveAsTextFile(EDGES_FOLDER_PATH);        

        javaSparkContext.close();
    }
 
Example #4
Source File: AbsFunc7.java    From Apache-Spark-2x-for-Java-Developers with MIT License 4 votes vote down vote up
@Override
public Integer apply(Edge<String> edge) {
	return edge.attr().length();
}
 
Example #5
Source File: PropertyGraphExampleFromEdges.java    From Apache-Spark-2x-for-Java-Developers with MIT License 4 votes vote down vote up
public static void main(String[] args) {
		System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
		SparkConf conf = new SparkConf().setMaster("local").setAppName("graph");
		JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
		ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);


		List<Edge<String>> edges = new ArrayList<>();

		edges.add(new Edge<String>(1, 2, "Friend"));
		edges.add(new Edge<String>(2, 3, "Advisor"));
		edges.add(new Edge<String>(1, 3, "Friend"));
		edges.add(new Edge<String>(4, 3, "colleague"));
		edges.add(new Edge<String>(4, 5, "Relative"));
		edges.add(new Edge<String>(2, 5, "BusinessPartners"));


		JavaRDD<Edge<String>> edgeRDD = javaSparkContext.parallelize(edges);
		
		
		Graph<String, String> graph = Graph.fromEdges(edgeRDD.rdd(), "",StorageLevel.MEMORY_ONLY(), StorageLevel.MEMORY_ONLY(), stringTag, stringTag);
		
		
		graph.vertices().toJavaRDD().collect().forEach(System.out::println);
		
		
		
//	graph.aggregateMessages(sendMsg, mergeMsg, tripletFields, evidence$11)	
		
	}
 
Example #6
Source File: GraphXGraphGenerator.java    From rya with Apache License 2.0 4 votes vote down vote up
public RDD<Tuple2<Object, Edge>> getEdgeRDD(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
    // Load configuration parameters
    zk = MRUtils.getACZK(conf);
    instance = MRUtils.getACInstance(conf);
    userName = MRUtils.getACUserName(conf);
    pwd = MRUtils.getACPwd(conf);
    mock = MRUtils.getACMock(conf, false);
    tablePrefix = MRUtils.getTablePrefix(conf);
    // Set authorizations if specified
    String authString = conf.get(MRUtils.AC_AUTH_PROP);
    if (authString != null && !authString.isEmpty()) {
        authorizations = new Authorizations(authString.split(","));
        conf.set(ConfigUtils.CLOUDBASE_AUTHS, authString); // for consistency
    }
    else {
        authorizations = AccumuloRdfConstants.ALL_AUTHORIZATIONS;
    }
    // Set table prefix to the default if not set
    if (tablePrefix == null) {
        tablePrefix = RdfCloudTripleStoreConstants.TBL_PRFX_DEF;
        MRUtils.setTablePrefix(conf, tablePrefix);
    }
    // Check for required configuration parameters
    Preconditions.checkNotNull(instance, "Accumulo instance name [" + MRUtils.AC_INSTANCE_PROP + "] not set.");
    Preconditions.checkNotNull(userName, "Accumulo username [" + MRUtils.AC_USERNAME_PROP + "] not set.");
    Preconditions.checkNotNull(pwd, "Accumulo password [" + MRUtils.AC_PWD_PROP + "] not set.");
    Preconditions.checkNotNull(tablePrefix, "Table prefix [" + MRUtils.TABLE_PREFIX_PROPERTY + "] not set.");
    RdfCloudTripleStoreConstants.prefixTables(tablePrefix);
    // If connecting to real accumulo, set additional parameters and require zookeepers
    if (!mock) conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); // for consistency
    // Ensure consistency between alternative configuration properties
    conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance);
    conf.set(ConfigUtils.CLOUDBASE_USER, userName);
    conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd);
    conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
    conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, tablePrefix);

    Job job = Job.getInstance(conf, sc.appName());

    ClientConfiguration clientConfig = new ClientConfiguration().with(ClientProperty.INSTANCE_NAME, instance).with(ClientProperty.INSTANCE_ZK_HOST, zk);

    RyaInputFormat.setTableLayout(job, TABLE_LAYOUT.SPO);
    RyaInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    RyaInputFormat.setZooKeeperInstance(job, clientConfig);
    RyaInputFormat.setScanAuthorizations(job, authorizations);
            String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(TABLE_LAYOUT.SPO, tablePrefix);
            InputFormatBase.setInputTableName(job, tableName);
    return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXEdgeInputFormat.class, Object.class, Edge.class);
}
 
Example #7
Source File: GraphXEdgeInputFormatTest.java    From rya with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testInputFormat() throws Exception {
    RyaStatement input = RyaStatement.builder()
        .setSubject(new RyaIRI("http://www.google.com"))
        .setPredicate(new RyaIRI("http://some_other_uri"))
        .setObject(new RyaIRI("http://www.yahoo.com"))
        .setColumnVisibility(new byte[0])
        .setValue(new byte[0])
        .build();

    apiImpl.add(input);

    Job jobConf = Job.getInstance();

    GraphXEdgeInputFormat.setMockInstance(jobConf, instance.getInstanceName());
    GraphXEdgeInputFormat.setConnectorInfo(jobConf, username, password);
    GraphXEdgeInputFormat.setTableLayout(jobConf, TABLE_LAYOUT.SPO);
    GraphXEdgeInputFormat.setInputTableName(jobConf, table);
    GraphXEdgeInputFormat.setInputTableName(jobConf, table);

    GraphXEdgeInputFormat.setScanIsolation(jobConf, false);
    GraphXEdgeInputFormat.setLocalIterators(jobConf, false);
    GraphXEdgeInputFormat.setOfflineTableScan(jobConf, false);

    GraphXEdgeInputFormat inputFormat = new GraphXEdgeInputFormat();

    JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID());

    List<InputSplit> splits = inputFormat.getSplits(context);

    Assert.assertEquals(1, splits.size());

    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1));

    RecordReader reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext);

    RecordReader ryaStatementRecordReader = (RecordReader) reader;
    ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext);

    List<Edge> results = new ArrayList<Edge>();
    while(ryaStatementRecordReader.nextKeyValue()) {
        Edge writable = (Edge) ryaStatementRecordReader.getCurrentValue();
        long srcId = writable.srcId();
        long destId = writable.dstId();
        RyaTypeWritable rtw = null;
        Object text = ryaStatementRecordReader.getCurrentKey();
        Edge<RyaTypeWritable> edge = new Edge<RyaTypeWritable>(srcId, destId, rtw);
        results.add(edge);

        System.out.println(text);
    }

    System.out.println(results.size());
    System.out.println(results);
    Assert.assertTrue(results.size() == 2);
}
 
Example #8
Source File: GraphXEdgeInputFormat.java    From rya with Apache License 2.0 2 votes vote down vote up
/**
 * Instantiates a RecordReader for this InputFormat and a given task and
 * input split.
 *
 * @param split
 *            Defines the portion of the input this RecordReader is
 *            responsible for.
 * @param context
 *            The context of the task.
 * @return A RecordReader that can be used to fetch RyaStatementWritables.
 */
@Override
public RecordReader<Object, Edge> createRecordReader(final InputSplit split,
		final TaskAttemptContext context) {
	return new RyaStatementRecordReader();
}