Java Code Examples for org.apache.spark.sql.Dataset#head()
The following examples show how to use
org.apache.spark.sql.Dataset#head() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SpringSparkDemoApplication.java From articles with Apache License 2.0 | 6 votes |
public void testSparkOperations() { Dataset<Row> csvDataSet1 = spark.read().option("header", true).csv("./src/main/resources/employees1.csv"); System.out.println("Schema of the CSV file:"); csvDataSet1.printSchema(); System.out.println("Columns in CSV file:"); System.out.println(Arrays.toString(csvDataSet1.columns())); System.out.println("Total data set 1 count: " + csvDataSet1.count()); System.out.println("First 5 rows:"); Row[] head = (Row[]) csvDataSet1.head(5); System.out.println(Arrays.toString(head)); Dataset<Row> csvDataSet2 = spark.read().option("header", true).csv("./src/main/resources/employees2.csv"); System.out.println("Total data set 2 count: " + csvDataSet2.count()); System.out.println(); Dataset<Row> dataSetUnion = csvDataSet1.union(csvDataSet2); System.out.println("Total data set union count: " + dataSetUnion.count()); }
Example 2
Source File: ConceptMapsTest.java From bunsen with Apache License 2.0 | 5 votes |
@Test public void testLoadExpandedMappings() throws FHIRException { ConceptMap map = conceptMap("urn:cerner:map:testmap", "1"); // Explicitly create a mapping dataset to simulate an ETL load from an external source. Mapping mapping = new Mapping(); mapping.setConceptMapUri(map.getUrl()); mapping.setConceptMapVersion(map.getVersion()); mapping.setSourceValueSet("urn:source:valueset"); mapping.setTargetValue("urn:target:valueset"); mapping.setSourceSystem("urn:source:system"); mapping.setSourceValue("urn:source:code:a"); mapping.setTargetSystem("urn:target:system"); mapping.setTargetValue("urn:target:code:1"); Dataset<Mapping> mappings = spark.createDataset(Arrays.asList(mapping), ConceptMaps.getMappingEncoder()); ConceptMaps maps = ConceptMaps.getEmpty(spark) .withExpandedMap(map, mappings); Dataset<Mapping> loadedMappings = maps.getMappings(); Assert.assertEquals(1, loadedMappings.count()); Mapping loadedMapping = loadedMappings.head(); Assert.assertEquals(mapping, loadedMapping); }
Example 3
Source File: ConceptMapsTest.java From bunsen with Apache License 2.0 | 5 votes |
@Test public void testLoadExpandedMappings() throws FHIRException { ConceptMap map = conceptMap("urn:cerner:map:testmap", "1"); // Explicitly create a mapping dataset to simulate an ETL load from an external source. Mapping mapping = new Mapping(); mapping.setConceptMapUri(map.getUrl()); mapping.setConceptMapVersion(map.getVersion()); mapping.setSourceValueSet("urn:source:valueset"); mapping.setTargetValue("urn:target:valueset"); mapping.setSourceSystem("urn:source:system"); mapping.setSourceValue("urn:source:code:a"); mapping.setTargetSystem("urn:target:system"); mapping.setTargetValue("urn:target:code:1"); Dataset<Mapping> mappings = spark.createDataset(Arrays.asList(mapping), ConceptMaps.getMappingEncoder()); ConceptMaps maps = ConceptMaps.getEmpty(spark) .withExpandedMap(map, mappings); Dataset<Mapping> loadedMappings = maps.getMappings(); Assert.assertEquals(1, loadedMappings.count()); Mapping loadedMapping = loadedMappings.head(); Assert.assertEquals(mapping, loadedMapping); }
Example 4
Source File: FhirEncodersTest.java From bunsen with Apache License 2.0 | 4 votes |
@Test public void testFromParquet() throws IOException { Path dirPath = Files.createTempDirectory("encoder_test"); String path = dirPath.resolve("out.parquet").toString(); conditionsDataset.write().save(path); Dataset<Condition> ds = spark.read() .parquet(path) .as(encoders.of(Condition.class)); Condition readCondition = ds.head(); Assert.assertEquals(condition.getId(), readCondition.getId()); }
Example 5
Source File: FhirEncodersTest.java From bunsen with Apache License 2.0 | 3 votes |
@Test public void testFromRdd() { JavaSparkContext context = new JavaSparkContext(spark.sparkContext()); JavaRDD<Condition> conditionRdd = context.parallelize(ImmutableList.of(condition)); Dataset<Condition> ds = spark.createDataset(conditionRdd.rdd(), encoders.of(Condition.class)); Condition convertedCondition = ds.head(); Assert.assertEquals(condition.getId(), convertedCondition.getId()); }