Java Code Examples for org.apache.spark.sql.Dataset#head()

The following examples show how to use org.apache.spark.sql.Dataset#head() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SpringSparkDemoApplication.java    From articles with Apache License 2.0 6 votes vote down vote up
public void testSparkOperations()
{
    Dataset<Row> csvDataSet1 = spark.read().option("header", true).csv("./src/main/resources/employees1.csv");

    System.out.println("Schema of the CSV file:");
    csvDataSet1.printSchema();

    System.out.println("Columns in CSV file:");
    System.out.println(Arrays.toString(csvDataSet1.columns()));

    System.out.println("Total data set 1 count: " + csvDataSet1.count());

    System.out.println("First 5 rows:");
    Row[] head = (Row[]) csvDataSet1.head(5);
    System.out.println(Arrays.toString(head));

    Dataset<Row> csvDataSet2 = spark.read().option("header", true).csv("./src/main/resources/employees2.csv");

    System.out.println("Total data set 2 count: " + csvDataSet2.count());
    System.out.println();

    Dataset<Row> dataSetUnion = csvDataSet1.union(csvDataSet2);
    System.out.println("Total data set union count: " + dataSetUnion.count());
}
 
Example 2
Source File: ConceptMapsTest.java    From bunsen with Apache License 2.0 5 votes vote down vote up
@Test
public void testLoadExpandedMappings() throws FHIRException {

  ConceptMap map = conceptMap("urn:cerner:map:testmap", "1");

  // Explicitly create a mapping dataset to simulate an ETL load from an external source.
  Mapping mapping = new Mapping();

  mapping.setConceptMapUri(map.getUrl());
  mapping.setConceptMapVersion(map.getVersion());
  mapping.setSourceValueSet("urn:source:valueset");
  mapping.setTargetValue("urn:target:valueset");
  mapping.setSourceSystem("urn:source:system");
  mapping.setSourceValue("urn:source:code:a");
  mapping.setTargetSystem("urn:target:system");
  mapping.setTargetValue("urn:target:code:1");

  Dataset<Mapping> mappings = spark.createDataset(Arrays.asList(mapping),
      ConceptMaps.getMappingEncoder());

  ConceptMaps maps = ConceptMaps.getEmpty(spark)
      .withExpandedMap(map, mappings);

  Dataset<Mapping> loadedMappings = maps.getMappings();

  Assert.assertEquals(1, loadedMappings.count());

  Mapping loadedMapping = loadedMappings.head();

  Assert.assertEquals(mapping, loadedMapping);
}
 
Example 3
Source File: ConceptMapsTest.java    From bunsen with Apache License 2.0 5 votes vote down vote up
@Test
public void testLoadExpandedMappings() throws FHIRException {

  ConceptMap map = conceptMap("urn:cerner:map:testmap", "1");

  // Explicitly create a mapping dataset to simulate an ETL load from an external source.
  Mapping mapping = new Mapping();

  mapping.setConceptMapUri(map.getUrl());
  mapping.setConceptMapVersion(map.getVersion());
  mapping.setSourceValueSet("urn:source:valueset");
  mapping.setTargetValue("urn:target:valueset");
  mapping.setSourceSystem("urn:source:system");
  mapping.setSourceValue("urn:source:code:a");
  mapping.setTargetSystem("urn:target:system");
  mapping.setTargetValue("urn:target:code:1");

  Dataset<Mapping> mappings = spark.createDataset(Arrays.asList(mapping),
      ConceptMaps.getMappingEncoder());

  ConceptMaps maps = ConceptMaps.getEmpty(spark)
      .withExpandedMap(map, mappings);

  Dataset<Mapping> loadedMappings = maps.getMappings();

  Assert.assertEquals(1, loadedMappings.count());

  Mapping loadedMapping = loadedMappings.head();

  Assert.assertEquals(mapping, loadedMapping);
}
 
Example 4
Source File: FhirEncodersTest.java    From bunsen with Apache License 2.0 4 votes vote down vote up
@Test
public void testFromParquet() throws IOException {

  Path dirPath = Files.createTempDirectory("encoder_test");

  String path = dirPath.resolve("out.parquet").toString();

  conditionsDataset.write().save(path);

  Dataset<Condition> ds = spark.read()
      .parquet(path)
      .as(encoders.of(Condition.class));

  Condition readCondition = ds.head();

  Assert.assertEquals(condition.getId(),
      readCondition.getId());
}
 
Example 5
Source File: FhirEncodersTest.java    From bunsen with Apache License 2.0 3 votes vote down vote up
@Test
public void testFromRdd() {

  JavaSparkContext context = new JavaSparkContext(spark.sparkContext());

  JavaRDD<Condition> conditionRdd = context.parallelize(ImmutableList.of(condition));

  Dataset<Condition> ds = spark.createDataset(conditionRdd.rdd(),
      encoders.of(Condition.class));

  Condition convertedCondition = ds.head();

  Assert.assertEquals(condition.getId(),
      convertedCondition.getId());
}