Java Code Examples for org.apache.tinkerpop.gremlin.TestHelper#makeTestDataDirectory()

The following examples show how to use org.apache.tinkerpop.gremlin.TestHelper#makeTestDataDirectory() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TinkerGraphTest.java    From tinkergraph-gremlin with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldPersistToGraphML() {
    final String graphLocation = TestHelper.makeTestDataDirectory(TinkerGraphTest.class) + "shouldPersistToGraphML.xml";
    final File f = new File(graphLocation);
    if (f.exists() && f.isFile()) f.delete();

    final Configuration conf = new BaseConfiguration();
    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, "graphml");
    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, graphLocation);
    final TinkerGraph graph = TinkerGraph.open(conf);
    TinkerFactory.generateModern(graph);
    graph.close();

    final TinkerGraph reloadedGraph = TinkerGraph.open(conf);
    IoTest.assertModernGraph(reloadedGraph, true, true);
    reloadedGraph.close();
}
 
Example 2
Source File: TinkerGraphTest.java    From tinkergraph-gremlin with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldPersistToGraphSON() {
    final String graphLocation = TestHelper.makeTestDataDirectory(TinkerGraphTest.class) + "shouldPersistToGraphSON.json";
    final File f = new File(graphLocation);
    if (f.exists() && f.isFile()) f.delete();

    final Configuration conf = new BaseConfiguration();
    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, "graphson");
    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, graphLocation);
    final TinkerGraph graph = TinkerGraph.open(conf);
    TinkerFactory.generateModern(graph);
    graph.close();

    final TinkerGraph reloadedGraph = TinkerGraph.open(conf);
    IoTest.assertModernGraph(reloadedGraph, true, false);
    reloadedGraph.close();
}
 
Example 3
Source File: TinkerGraphTest.java    From tinkergraph-gremlin with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldPersistToGryo() {
    final String graphLocation = TestHelper.makeTestDataDirectory(TinkerGraphTest.class) + "shouldPersistToGryo.kryo";
    final File f = new File(graphLocation);
    if (f.exists() && f.isFile()) f.delete();

    final Configuration conf = new BaseConfiguration();
    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, "gryo");
    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, graphLocation);
    final TinkerGraph graph = TinkerGraph.open(conf);
    TinkerFactory.generateModern(graph);
    graph.close();

    final TinkerGraph reloadedGraph = TinkerGraph.open(conf);
    IoTest.assertModernGraph(reloadedGraph, true, false);
    reloadedGraph.close();
}
 
Example 4
Source File: TinkerGraphTest.java    From tinkergraph-gremlin with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldPersistToGryoAndHandleMultiProperties() {
    final String graphLocation = TestHelper.makeTestDataDirectory(TinkerGraphTest.class) + "shouldPersistToGryoMulti.kryo";
    final File f = new File(graphLocation);
    if (f.exists() && f.isFile()) f.delete();

    final Configuration conf = new BaseConfiguration();
    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, "gryo");
    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, graphLocation);
    final TinkerGraph graph = TinkerGraph.open(conf);
    TinkerFactory.generateTheCrew(graph);
    graph.close();

    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_DEFAULT_VERTEX_PROPERTY_CARDINALITY, VertexProperty.Cardinality.list.toString());
    final TinkerGraph reloadedGraph = TinkerGraph.open(conf);
    IoTest.assertCrewGraph(reloadedGraph, false);
    reloadedGraph.close();
}
 
Example 5
Source File: FileSystemStorageCheck.java    From tinkerpop with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldSupportDirectoryFileDistinction() throws Exception {
    // Make sure Spark is shut down before deleting its files and directories,
    // which are locked under Windows and fail the tests. See FileSystemStorageCheck
    graph.configuration().setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false);

    final Storage storage = FileSystemStorage.open(ConfUtil.makeHadoopConfiguration(graph.configuration()));
    final String directory1 = TestHelper.makeTestDataDirectory(FileSystemStorageCheck.class, "directory1");
    final String directory2 = TestHelper.makeTestDataDirectory(FileSystemStorageCheck.class, "directory2");
    for (int i = 0; i < 10; i++) {
        new File(directory1, "file1-" + i + ".txt.bz").createNewFile();
    }
    for (int i = 0; i < 5; i++) {
        new File(directory2, "file2-" + i + ".txt.bz").createNewFile();
    }
    super.checkFileDirectoryDistinction(storage, directory1, directory2);
    deleteDirectory(directory1);
    deleteDirectory(directory2);
}
 
Example 6
Source File: PersistedInputOutputRDDIntegrateTest.java    From tinkerpop with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldNotHaveDanglingPersistedComputeRDDs() throws Exception {
    Spark.create("local[4]");
    final String rddName = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
    final Configuration configuration = super.getBaseConfiguration();
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo"));
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    Graph graph = GraphFactory.open(configuration);
    ///
    assertEquals(6, graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)).V().out().count().next().longValue());
    assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(0, Spark.getContext().getPersistentRDDs().size());
    //
    assertEquals(2, graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)).V().out().out().count().next().longValue());
    assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(0, Spark.getContext().getPersistentRDDs().size());
    ///////
    Spark.close();
}
 
Example 7
Source File: PersistedInputOutputRDDIntegrateTest.java    From tinkerpop with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldNotPersistRDDAcrossJobs() throws Exception {
    Spark.create("local[4]");
    final String rddName = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
    final Configuration configuration = super.getBaseConfiguration();
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo"));
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false);  // because the spark context is NOT persisted, neither is the RDD
    Graph graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.EDGES)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(SparkGraphComputer.class),
                            "gremlin-groovy",
                            "g.V()").create(graph)).submit().get();
    ////////
    Spark.create("local[4]");
    assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(0, Spark.getContext().getPersistentRDDs().size());
    Spark.close();
}
 
Example 8
Source File: TinkerGraphTest.java    From tinkergraph-gremlin with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldPersistToAnyGraphFormat() {
    final String graphLocation = TestHelper.makeTestDataDirectory(TinkerGraphTest.class) + "shouldPersistToAnyGraphFormat.dat";
    final File f = new File(graphLocation);
    if (f.exists() && f.isFile()) f.delete();

    final Configuration conf = new BaseConfiguration();
    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_FORMAT, TestIoBuilder.class.getName());
    conf.setProperty(TinkerGraph.GREMLIN_TINKERGRAPH_GRAPH_LOCATION, graphLocation);
    final TinkerGraph graph = TinkerGraph.open(conf);
    TinkerFactory.generateModern(graph);

    //Test write graph
    graph.close();
    assertEquals(TestIoBuilder.calledOnMapper, 1);
    assertEquals(TestIoBuilder.calledGraph, 1);
    assertEquals(TestIoBuilder.calledCreate, 1);

    try (BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(f))){
        os.write("dummy string".getBytes());
    } catch (Exception e) {
        e.printStackTrace();
    }

    //Test read graph
    final TinkerGraph readGraph = TinkerGraph.open(conf);
    assertEquals(TestIoBuilder.calledOnMapper, 1);
    assertEquals(TestIoBuilder.calledGraph, 1);
    assertEquals(TestIoBuilder.calledCreate, 1);
}
 
Example 9
Source File: FileSystemStorageCheck.java    From tinkerpop with Apache License 2.0 5 votes vote down vote up
@Test
@LoadGraphWith(LoadGraphWith.GraphData.MODERN)
public void shouldSupportCopyMethods() throws Exception {
    // Make sure Spark is shut down before deleting its files and directories,
    // which are locked under Windows and fail the tests. See FileSystemStorageCheck
    graph.configuration().setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false);

    final Storage storage = FileSystemStorage.open(ConfUtil.makeHadoopConfiguration(graph.configuration()));
    final String outputLocation = graph.configuration().getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
    final String newOutputLocation = TestHelper.makeTestDataDirectory(FileSystemStorageCheck.class, "new-location-for-copy");
    // TestHelper creates the directory and we need it not to exist
    deleteDirectory(newOutputLocation);
    super.checkCopyMethods(storage, outputLocation, newOutputLocation, InputOutputHelper.getInputFormat((Class) Class.forName(graph.configuration().getString(Constants.GREMLIN_HADOOP_GRAPH_WRITER))), SequenceFileInputFormat.class);

}
 
Example 10
Source File: SparkContextStorageCheck.java    From tinkerpop with Apache License 2.0 5 votes vote down vote up
@Test
@LoadGraphWith(LoadGraphWith.GraphData.MODERN)
public void shouldSupportCopyMethods() throws Exception {
    final Storage storage = SparkContextStorage.open(graph.configuration());
    final String outputLocation = graph.configuration().getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
    final String newOutputLocation = TestHelper.makeTestDataDirectory(this.getClass(), "new-location-for-copy");

    super.checkCopyMethods(storage, outputLocation, newOutputLocation, PersistedInputRDD.class, PersistedInputRDD.class);
}
 
Example 11
Source File: PersistedInputOutputRDDIntegrateTest.java    From tinkerpop with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldPersistRDDBasedOnStorageLevel() throws Exception {
    Spark.create("local[4]");
    int counter = 0;
    for (final String storageLevel : Arrays.asList("MEMORY_ONLY", "DISK_ONLY", "MEMORY_ONLY_SER", "MEMORY_AND_DISK_SER")) {
        assertEquals(counter, Spark.getRDDs().size());
        assertEquals(counter, Spark.getContext().getPersistentRDDs().size());
        counter++;
        final String rddName = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
        final Configuration configuration = super.getBaseConfiguration();
        configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo"));
        configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
        configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
        configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, storageLevel);
        configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
        configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
        Graph graph = GraphFactory.open(configuration);
        graph.compute(SparkGraphComputer.class)
                .result(GraphComputer.ResultGraph.NEW)
                .persist(GraphComputer.Persist.EDGES)
                .program(TraversalVertexProgram.build()
                        .traversal(graph.traversal().withComputer(SparkGraphComputer.class),
                                "gremlin-groovy",
                                "g.V().groupCount('m').by('name').out()").create(graph)).submit().get();
        ////////
        assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
        assertEquals(StorageLevel.fromString(storageLevel), Spark.getRDD(Constants.getGraphLocation(rddName)).getStorageLevel());
        assertEquals(counter, Spark.getRDDs().size());
        assertEquals(counter, Spark.getContext().getPersistentRDDs().size());
    }
    Spark.close();
}
 
Example 12
Source File: GryoSerializerIntegrateTest.java    From tinkerpop with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldHaveAllRegisteredGryoSerializerClasses() throws Exception {
    // this is a stress test that ensures that when data is spilling to disk, persisted to an RDD, etc. the correct classes are registered with GryoSerializer.
    final TinkerGraph randomGraph = TinkerGraph.open();
    int totalVertices = 200000;
    TestHelper.createRandomGraph(randomGraph, totalVertices, 100);
    final String inputLocation = TestHelper.makeTestDataFile(GryoSerializerIntegrateTest.class,
                                                             UUID.randomUUID().toString(),
                                                             "random-graph.kryo");
    randomGraph.io(IoCore.gryo()).writeGraph(inputLocation);
    randomGraph.clear();
    randomGraph.close();

    final String outputLocation = TestHelper.makeTestDataDirectory(GryoSerializerIntegrateTest.class, UUID.randomUUID().toString());
    Configuration configuration = getBaseConfiguration();
    configuration.clearProperty(Constants.SPARK_SERIALIZER); // ensure proper default to GryoSerializer
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, inputLocation);
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, outputLocation);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false);
    Graph graph = GraphFactory.open(configuration);
    final GraphTraversal.Admin<Vertex, Map<Vertex, Collection<Vertex>>> traversal = graph.traversal().withComputer(SparkGraphComputer.class).V().group("m").<Map<Vertex, Collection<Vertex>>>cap("m").asAdmin();
    assertTrue(traversal.hasNext());
    assertEquals(traversal.next(), traversal.getSideEffects().get("m"));
    assertFalse(traversal.hasNext());
    assertTrue(traversal.getSideEffects().exists("m"));
    assertTrue(traversal.getSideEffects().get("m") instanceof Map);
    assertEquals(totalVertices, traversal.getSideEffects().<Map>get("m").size());

    configuration = getBaseConfiguration();
    configuration.clearProperty(Constants.SPARK_SERIALIZER); // ensure proper default to GryoSerializer
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, inputLocation);
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, "DISK_ONLY");
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "persisted-rdd");
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    graph = GraphFactory.open(configuration);
    assertEquals(totalVertices, graph.compute(SparkGraphComputer.class).program(PageRankVertexProgram.build().iterations(2).create(graph)).submit().get().graph().traversal().V().count().next().longValue());

    configuration = getBaseConfiguration();
    configuration.clearProperty(Constants.SPARK_SERIALIZER); // ensure proper default to GryoSerializer
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, "persisted-rdd");
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, outputLocation);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    graph = GraphFactory.open(configuration);
    assertEquals(totalVertices, graph.traversal().withComputer(SparkGraphComputer.class).V().count().next().longValue());

    configuration = getBaseConfiguration();
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, "persisted-rdd");
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, outputLocation);
    configuration.setProperty(Constants.GREMLIN_SPARK_GRAPH_STORAGE_LEVEL, "MEMORY_ONLY"); // this should be ignored as you can't change the persistence level once created
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, "MEMORY_AND_DISK");
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    graph = GraphFactory.open(configuration);
    assertEquals(totalVertices, graph.traversal().withComputer(SparkGraphComputer.class).V().count().next().longValue());
}
 
Example 13
Source File: PersistedInputOutputRDDIntegrateTest.java    From tinkerpop with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldPersistRDDAcrossJobs() throws Exception {
    Spark.create("local[4]");
    final String rddName = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
    final String rddName2 = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, UUID.randomUUID().toString());
    final Configuration configuration = super.getBaseConfiguration();
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo"));
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    Graph graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.EDGES)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(SparkGraphComputer.class),
                            "gremlin-groovy",
                            "g.V().count()").create(graph)).submit().get();
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(1, Spark.getContext().getPersistentRDDs().size());
    ///////
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName2);
    graph = GraphFactory.open(configuration);
    assertEquals(6, graph.traversal().withComputer(SparkGraphComputer.class).V().out().count().next().longValue());
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(1, Spark.getContext().getPersistentRDDs().size());
    ///////
    graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.EDGES)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(SparkGraphComputer.class),
                            "gremlin-groovy",
                            "g.V().count()").create(graph)).submit().get();
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2)));
    assertEquals(2, Spark.getContext().getPersistentRDDs().size());
    ///////
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName2);
    graph = GraphFactory.open(configuration);
    assertEquals(6, graph.traversal().withComputer(SparkGraphComputer.class).V().out().count().next().longValue());
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(1, Spark.getContext().getPersistentRDDs().size());
    ///////
    graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.EDGES)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(SparkGraphComputer.class),
                            "gremlin-groovy",
                            "g.V().count()").create(graph)).submit().get();
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2)));
    assertEquals(2, Spark.getContext().getPersistentRDDs().size());
    ///////
    graph = GraphFactory.open(configuration);
    assertEquals(6, graph.traversal().withComputer(SparkGraphComputer.class).V().out().count().next().longValue());
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(1, Spark.getContext().getPersistentRDDs().size());
    Spark.close();
}
 
Example 14
Source File: PersistedInputOutputRDDIntegrateTest.java    From tinkerpop with Apache License 2.0 4 votes vote down vote up
@Test
public void testComplexChain() throws Exception {
    Spark.create("local[4]");

    final String rddName = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, "testComplexChain", "graphRDD");
    final String rddName2 = TestHelper.makeTestDataDirectory(PersistedInputOutputRDDIntegrateTest.class, "testComplexChain", "graphRDD2");
    final Configuration configuration = super.getBaseConfiguration();
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo"));
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);

    assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(0, Spark.getContext().getPersistentRDDs().size());
    Graph graph = GraphFactory.open(configuration);
    graph = graph.compute(SparkGraphComputer.class).persist(GraphComputer.Persist.EDGES).program(PageRankVertexProgram.build().iterations(2).create(graph)).submit().get().graph();
    GraphTraversalSource g = graph.traversal();
    assertEquals(6l, g.V().count().next().longValue());
    assertEquals(6l, g.E().count().next().longValue());
    assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue());
    ////
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertEquals(1, Spark.getContext().getPersistentRDDs().size());
    ////
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName2);
    ////
    graph = GraphFactory.open(configuration);
    graph = graph.compute(SparkGraphComputer.class).persist(GraphComputer.Persist.EDGES).mapReduce(PageRankMapReduce.build().create()).program(PageRankVertexProgram.build().iterations(2).create(graph)).submit().get().graph();
    g = graph.traversal();
    assertEquals(6l, g.V().count().next().longValue());
    assertEquals(6l, g.E().count().next().longValue());
    assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue());
    ////
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2)));
    assertTrue(Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY)));
    assertEquals(3, Spark.getContext().getPersistentRDDs().size());
    ////
    graph = GraphFactory.open(configuration);
    graph = graph.compute(SparkGraphComputer.class).persist(GraphComputer.Persist.VERTEX_PROPERTIES).program(PageRankVertexProgram.build().iterations(2).create(graph)).submit().get().graph();
    g = graph.traversal();
    assertEquals(6l, g.V().count().next().longValue());
    assertEquals(0l, g.E().count().next().longValue());
    assertEquals(6l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue());
    ////
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName2)));
    assertFalse(Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY)));
    assertEquals(2, Spark.getContext().getPersistentRDDs().size());
    ////
    graph = GraphFactory.open(configuration);
    graph = graph.compute(SparkGraphComputer.class).persist(GraphComputer.Persist.NOTHING).program(PageRankVertexProgram.build().iterations(2).create(graph)).submit().get().graph();
    assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName2)));
    g = graph.traversal();
    assertEquals(0l, g.V().count().next().longValue());
    assertEquals(0l, g.E().count().next().longValue());
    assertEquals(0l, g.V().values(PageRankVertexProgram.PAGE_RANK).count().next().longValue());
    ////
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    assertFalse(Spark.hasRDD(Constants.getGraphLocation(rddName2)));
    assertFalse(Spark.hasRDD(Constants.getMemoryLocation(rddName2, PageRankMapReduce.DEFAULT_MEMORY_KEY)));
    assertEquals(1, Spark.getContext().getPersistentRDDs().size());
    Spark.close();
}
 
Example 15
Source File: LocalPropertyTest.java    From tinkerpop with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldSetThreadLocalProperties() throws Exception {
    final String testName = "ThreadLocalProperties";
    final String rddName = TestHelper.makeTestDataDirectory(LocalPropertyTest.class, UUID.randomUUID().toString());
    final Configuration configuration = new BaseConfiguration();
    configuration.setProperty("spark.master", "local[4]");
    configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName());
    configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo"));
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    configuration.setProperty("spark.jobGroup.id", "22");
    Graph graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.EDGES)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)),
                            "gremlin-groovy",
                            "g.V()").create(graph)).submit().get();
    ////////
    SparkConf sparkConfiguration = new SparkConf();
    sparkConfiguration.setAppName(testName);
    ConfUtil.makeHadoopConfiguration(configuration).forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
    JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration));
    JavaSparkStatusTracker statusTracker = sparkContext.statusTracker();
    assertTrue(statusTracker.getJobIdsForGroup("22").length >= 1);
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    ///////
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, null);
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);

    // just a note that this value should have always been set to true, but from the initial commit was false.
    // interestingly the last assertion had always passed up to spark 2.3.x when it started to fail. apparently
    // that assertion should likely have never passed, so it stands to reason that there was a bug in spark in
    // 2.2.x that was resolved for 2.3.x....that's my story and i'm sticking to it.
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    configuration.setProperty("spark.jobGroup.id", "44");
    graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.NOTHING)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(SparkGraphComputer.class),
                            "gremlin-groovy",
                            "g.V()").create(graph)).submit().get();
    ///////
    assertTrue(statusTracker.getJobIdsForGroup("44").length >= 1);
}