Java Code Examples for org.apache.spark.sql.types.StructType#fieldNames()
The following examples show how to use
org.apache.spark.sql.types.StructType#fieldNames() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataFrames.java From DataVec with Apache License 2.0 | 5 votes |
/** * Create a datavec schema * from a struct type * * @param structType the struct type to create the schema from * @return the created schema */ public static Schema fromStructType(StructType structType) { Schema.Builder builder = new Schema.Builder(); StructField[] fields = structType.fields(); String[] fieldNames = structType.fieldNames(); for (int i = 0; i < fields.length; i++) { String name = fields[i].dataType().typeName().toLowerCase(); switch (name) { case "double": builder.addColumnDouble(fieldNames[i]); break; case "float": builder.addColumnFloat(fieldNames[i]); break; case "long": builder.addColumnLong(fieldNames[i]); break; case "int": case "integer": builder.addColumnInteger(fieldNames[i]); break; case "string": builder.addColumnString(fieldNames[i]); break; default: throw new RuntimeException("Unknown type: " + name); } } return builder.build(); }
Example 2
Source File: MorphlineUtils.java From envelope with Apache License 2.0 | 5 votes |
@SuppressWarnings("serial") public static FlatMapFunction<Row, Row> morphlineMapper(final String morphlineFile, final String morphlineId, final StructType outputSchema, final boolean errorOnEmpty) { return new FlatMapFunction<Row, Row>() { @Override public Iterator<Row> call(Row row) throws Exception { // Retrieve the Command pipeline via ThreadLocal Pipeline pipeline = MorphlineUtils.getPipeline(morphlineFile, morphlineId); if (null == pipeline) { pipeline = MorphlineUtils.setPipeline(morphlineFile, morphlineId, new Collector(), true); } // Convert each Row into a Record StructType inputSchema = row.schema(); if (null == inputSchema) { throw new RuntimeException("Row does not have an associated StructType schema"); } Record inputRecord = new Record(); String[] fieldNames = inputSchema.fieldNames(); // TODO : Confirm nested object conversion for (int i = 0; i < fieldNames.length; i++) { inputRecord.put(fieldNames[i], row.get(i)); } // Process each Record via the Command pipeline List<Record> outputRecords = MorphlineUtils.executePipeline(pipeline, inputRecord, errorOnEmpty); // Convert each Record into a new Row List<Row> outputRows = Lists.newArrayListWithCapacity(outputRecords.size()); for (Record record : outputRecords) { outputRows.add(MorphlineUtils.convertToRow(outputSchema, record)); } return outputRows.iterator(); } }; }
Example 3
Source File: TestMorphlineUtils.java From envelope with Apache License 2.0 | 5 votes |
@Test public void morphlineMapper( final @Mocked MorphlineUtils.Pipeline pipeline, final @Mocked Row row, final @Mocked StructType schema ) throws Exception { new Expectations(MorphlineUtils.class) {{ MorphlineUtils.getPipeline("file", "id"); result = pipeline; times = 1; MorphlineUtils.executePipeline(pipeline, (Record) any, true); result = Lists.newArrayList(); times = 1; row.schema(); result = schema; row.get(anyInt); returns("val1", "val2"); times = 2; schema.fieldNames(); result = new String[] { "one", "two"}; }}; FlatMapFunction<Row, Row> function = MorphlineUtils.morphlineMapper("file", "id", schema, true); Iterator<Row> results = function.call(row); assertEquals("Invalid number of Rows returned", 0, Lists.newArrayList(results).size()); new Verifications() {{ Record record; MorphlineUtils.executePipeline(pipeline, record = withCapture(), true); assertEquals(2, record.getFields().size()); assertEquals("val1", record.get("one").get(0)); }}; }
Example 4
Source File: TestMorphlineUtils.java From envelope with Apache License 2.0 | 5 votes |
@Test public void morphlineMapperNoPipeline( final @Mocked MorphlineUtils.Pipeline pipeline, final @Mocked Row row, final @Mocked StructType schema ) throws Exception { new Expectations(MorphlineUtils.class) {{ MorphlineUtils.getPipeline("file", "id"); result = null; times = 1; MorphlineUtils.setPipeline("file", "id", (MorphlineUtils.Collector) any, true); result = pipeline; times = 1; MorphlineUtils.executePipeline(pipeline, (Record) any, true); result = Lists.newArrayList(); times = 1; row.schema(); result = schema; row.get(anyInt); returns("val1", "val2"); times = 2; schema.fieldNames(); result = new String[] { "one", "two"}; }}; FlatMapFunction<Row, Row> function = MorphlineUtils.morphlineMapper("file", "id", schema, true); Iterator<Row> results = function.call(row); assertEquals("Invalid number of Rows returned", 0, Lists.newArrayList(results).size()); new Verifications() {{ Record record; MorphlineUtils.executePipeline(pipeline, record = withCapture(), true); assertEquals(2, record.getFields().size()); assertEquals("val1", record.get("one").get(0)); }}; }
Example 5
Source File: RowUtils.java From envelope with Apache License 2.0 | 5 votes |
public static Row subsetRow(Row row, StructType subsetSchema) { Object[] values = new Object[subsetSchema.length()]; int i = 0; for (String fieldName : subsetSchema.fieldNames()) { values[i] = row.get(row.fieldIndex(fieldName)); i++; } Row subset = new RowWithSchema(subsetSchema, values); return subset; }
Example 6
Source File: TestRowUtils.java From envelope with Apache License 2.0 | 5 votes |
@Test public void testToRowValueMapRowNested( final @Mocked Row inputRow, final @Mocked StructType innerSchema, final @Mocked StructType outerSchema ) { DataType field = DataTypes.createMapType(DataTypes.StringType, DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, true) ); Map<Object, Object> expectedInnerMap = Maps.newHashMap(); expectedInnerMap.put("field1", 1); expectedInnerMap.put("field2", 2); Map<Object, Object> expectedOuterMap = Maps.newHashMap(); expectedOuterMap.put("outer", expectedInnerMap); new Expectations() {{ inputRow.schema(); returns(outerSchema, innerSchema); outerSchema.fieldNames(); result = new String[] {"outer"}; innerSchema.fieldNames(); result = new String[] {"field1", "field2"}; inputRow.get(0); returns(inputRow, 1); inputRow.get(1); result = 2; }}; assertEquals("Invalid list of values", expectedOuterMap, RowUtils.toRowValue(inputRow, field)); }
Example 7
Source File: DataFrames.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * Create a datavec schema * from a struct type * * @param structType the struct type to create the schema from * @return the created schema */ public static Schema fromStructType(StructType structType) { Schema.Builder builder = new Schema.Builder(); StructField[] fields = structType.fields(); String[] fieldNames = structType.fieldNames(); for (int i = 0; i < fields.length; i++) { String name = fields[i].dataType().typeName().toLowerCase(); switch (name) { case "double": builder.addColumnDouble(fieldNames[i]); break; case "float": builder.addColumnFloat(fieldNames[i]); break; case "long": builder.addColumnLong(fieldNames[i]); break; case "int": case "integer": builder.addColumnInteger(fieldNames[i]); break; case "string": builder.addColumnString(fieldNames[i]); break; default: throw new RuntimeException("Unknown type: " + name); } } return builder.build(); }
Example 8
Source File: SchemaIntrospectionApp.java From net.jgp.labs.spark with Apache License 2.0 | 4 votes |
private void start() { SparkSession spark = SparkSession.builder() .appName("Array to Dataframe (Dataset<Row>)") .master("local") .getOrCreate(); StructType schema = DataTypes.createStructType(new StructField[] { DataTypes.createStructField( "id", DataTypes.IntegerType, false), DataTypes.createStructField( "value-s", DataTypes.StringType, false), DataTypes.createStructField( "value-d", DataTypes.DoubleType, false), DataTypes.createStructField( "array", DataTypes.createArrayType(DataTypes.StringType, false), false), DataTypes.createStructField( "struct", DataTypes.createStructType(new StructField[] { DataTypes.createStructField( "sid", DataTypes.IntegerType, false), DataTypes.createStructField( "svalue", DataTypes.StringType, false) }), false), DataTypes.createStructField( "array-struct", DataTypes.createArrayType( DataTypes.createStructType(new StructField[] { DataTypes.createStructField( "asid", DataTypes.IntegerType, false), DataTypes.createStructField( "asvalue", DataTypes.StringType, false) })), false) }); List<Row> rows = new ArrayList<>(); for (int x = 0; x < 10; x++) { List<Row> subrows = new ArrayList<>(); for (int y = 1000; y < 1003; y++) { subrows.add(RowFactory.create(y, "Sub " + y)); } Row str = RowFactory.create(x * 5000, "Struct #" + x); String[] array = new String[] { "v" + (x * 100), "v" + (x * 100 + 1) }; rows.add( RowFactory.create(x, "Value " + x, x / 4.0, array, str, subrows)); } Dataset<Row> df = spark.createDataFrame(rows, schema); df.show(false); df.printSchema(); StructType readSchema = df.schema(); String[] fieldNames = readSchema.fieldNames(); int i = 0; for (String fieldName : fieldNames) { log.info("Field #{}: '{}'", i++, fieldName); } log.info("Catalog: '{}'", readSchema.catalogString()); StructField[] fields = readSchema.fields(); i = 0; for (StructField field : fields) { log.info("DDL for field #{}: '{}'", i++, field.toDDL()); } }