Java Code Examples for org.apache.spark.sql.types.StructType#fieldNames()

The following examples show how to use org.apache.spark.sql.types.StructType#fieldNames() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataFrames.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Create a datavec schema
 * from a struct type
 *
 * @param structType the struct type to create the schema from
 * @return the created schema
 */
public static Schema fromStructType(StructType structType) {
    Schema.Builder builder = new Schema.Builder();
    StructField[] fields = structType.fields();
    String[] fieldNames = structType.fieldNames();
    for (int i = 0; i < fields.length; i++) {
        String name = fields[i].dataType().typeName().toLowerCase();
        switch (name) {
            case "double":
                builder.addColumnDouble(fieldNames[i]);
                break;
            case "float":
                builder.addColumnFloat(fieldNames[i]);
                break;
            case "long":
                builder.addColumnLong(fieldNames[i]);
                break;
            case "int":
            case "integer":
                builder.addColumnInteger(fieldNames[i]);
                break;
            case "string":
                builder.addColumnString(fieldNames[i]);
                break;
            default:
                throw new RuntimeException("Unknown type: " + name);
        }
    }

    return builder.build();
}
 
Example 2
Source File: MorphlineUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("serial")
public static FlatMapFunction<Row, Row> morphlineMapper(final String morphlineFile, final String morphlineId,
                                                        final StructType outputSchema, final boolean errorOnEmpty) {
  return new FlatMapFunction<Row, Row>() {
    @Override
    public Iterator<Row> call(Row row) throws Exception {
      // Retrieve the Command pipeline via ThreadLocal
      Pipeline pipeline = MorphlineUtils.getPipeline(morphlineFile, morphlineId);

      if (null == pipeline) {
        pipeline = MorphlineUtils.setPipeline(morphlineFile, morphlineId, new Collector(), true);
      }

      // Convert each Row into a Record
      StructType inputSchema = row.schema();
      if (null == inputSchema) {
        throw new RuntimeException("Row does not have an associated StructType schema");
      }

      Record inputRecord = new Record();
      String[] fieldNames = inputSchema.fieldNames();

      // TODO : Confirm nested object conversion
      for (int i = 0; i < fieldNames.length; i++) {
        inputRecord.put(fieldNames[i], row.get(i));
      }

      // Process each Record via the Command pipeline
      List<Record> outputRecords = MorphlineUtils.executePipeline(pipeline, inputRecord, errorOnEmpty);

      // Convert each Record into a new Row
      List<Row> outputRows = Lists.newArrayListWithCapacity(outputRecords.size());
      for (Record record : outputRecords) {
        outputRows.add(MorphlineUtils.convertToRow(outputSchema, record));
      }

      return outputRows.iterator();
    }
  };
}
 
Example 3
Source File: TestMorphlineUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void morphlineMapper(
    final @Mocked MorphlineUtils.Pipeline pipeline,
    final @Mocked Row row,
    final @Mocked StructType schema
) throws Exception {

  new Expectations(MorphlineUtils.class) {{
    MorphlineUtils.getPipeline("file", "id"); result = pipeline; times = 1;
    MorphlineUtils.executePipeline(pipeline, (Record) any, true); result = Lists.newArrayList(); times = 1;
    row.schema(); result = schema;
    row.get(anyInt); returns("val1", "val2"); times = 2;
    schema.fieldNames(); result = new String[] { "one", "two"};
  }};

  FlatMapFunction<Row, Row> function = MorphlineUtils.morphlineMapper("file", "id", schema, true);
  Iterator<Row> results = function.call(row);

  assertEquals("Invalid number of Rows returned", 0, Lists.newArrayList(results).size());

  new Verifications() {{
    Record record;
    MorphlineUtils.executePipeline(pipeline, record = withCapture(), true);
    assertEquals(2, record.getFields().size());
    assertEquals("val1", record.get("one").get(0));
  }};
}
 
Example 4
Source File: TestMorphlineUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void morphlineMapperNoPipeline(
    final @Mocked MorphlineUtils.Pipeline pipeline,
    final @Mocked Row row,
    final @Mocked StructType schema
) throws Exception {

  new Expectations(MorphlineUtils.class) {{
    MorphlineUtils.getPipeline("file", "id"); result = null; times = 1;
    MorphlineUtils.setPipeline("file", "id", (MorphlineUtils.Collector) any, true); result = pipeline; times = 1;
    MorphlineUtils.executePipeline(pipeline, (Record) any, true); result = Lists.newArrayList(); times = 1;
    row.schema(); result = schema;
    row.get(anyInt); returns("val1", "val2"); times = 2;
    schema.fieldNames(); result = new String[] { "one", "two"};
  }};

  FlatMapFunction<Row, Row> function = MorphlineUtils.morphlineMapper("file", "id", schema, true);
  Iterator<Row> results = function.call(row);

  assertEquals("Invalid number of Rows returned", 0, Lists.newArrayList(results).size());

  new Verifications() {{
    Record record;
    MorphlineUtils.executePipeline(pipeline, record = withCapture(), true);
    assertEquals(2, record.getFields().size());
    assertEquals("val1", record.get("one").get(0));
  }};
}
 
Example 5
Source File: RowUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
public static Row subsetRow(Row row, StructType subsetSchema) {
  Object[] values = new Object[subsetSchema.length()];

  int i = 0;
  for (String fieldName : subsetSchema.fieldNames()) {
    values[i] = row.get(row.fieldIndex(fieldName));
    i++;
  }

  Row subset = new RowWithSchema(subsetSchema, values);

  return subset;
}
 
Example 6
Source File: TestRowUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testToRowValueMapRowNested(
    final @Mocked Row inputRow,
    final @Mocked StructType innerSchema,
    final @Mocked StructType outerSchema
) {
  DataType field = DataTypes.createMapType(DataTypes.StringType,
      DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, true)
  );

  Map<Object, Object> expectedInnerMap = Maps.newHashMap();
  expectedInnerMap.put("field1", 1);
  expectedInnerMap.put("field2", 2);

  Map<Object, Object> expectedOuterMap = Maps.newHashMap();
  expectedOuterMap.put("outer", expectedInnerMap);

  new Expectations() {{
    inputRow.schema(); returns(outerSchema, innerSchema);

    outerSchema.fieldNames(); result = new String[] {"outer"};
    innerSchema.fieldNames(); result = new String[] {"field1", "field2"};

    inputRow.get(0); returns(inputRow, 1);
    inputRow.get(1); result = 2;
  }};

  assertEquals("Invalid list of values", expectedOuterMap, RowUtils.toRowValue(inputRow, field));
}
 
Example 7
Source File: DataFrames.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Create a datavec schema
 * from a struct type
 *
 * @param structType the struct type to create the schema from
 * @return the created schema
 */
public static Schema fromStructType(StructType structType) {
    Schema.Builder builder = new Schema.Builder();
    StructField[] fields = structType.fields();
    String[] fieldNames = structType.fieldNames();
    for (int i = 0; i < fields.length; i++) {
        String name = fields[i].dataType().typeName().toLowerCase();
        switch (name) {
            case "double":
                builder.addColumnDouble(fieldNames[i]);
                break;
            case "float":
                builder.addColumnFloat(fieldNames[i]);
                break;
            case "long":
                builder.addColumnLong(fieldNames[i]);
                break;
            case "int":
            case "integer":
                builder.addColumnInteger(fieldNames[i]);
                break;
            case "string":
                builder.addColumnString(fieldNames[i]);
                break;
            default:
                throw new RuntimeException("Unknown type: " + name);
        }
    }

    return builder.build();
}
 
Example 8
Source File: SchemaIntrospectionApp.java    From net.jgp.labs.spark with Apache License 2.0 4 votes vote down vote up
private void start() {
  SparkSession spark = SparkSession.builder()
      .appName("Array to Dataframe (Dataset<Row>)")
      .master("local")
      .getOrCreate();

  StructType schema = DataTypes.createStructType(new StructField[] {
      DataTypes.createStructField(
          "id",
          DataTypes.IntegerType,
          false),
      DataTypes.createStructField(
          "value-s",
          DataTypes.StringType,
          false),
      DataTypes.createStructField(
          "value-d",
          DataTypes.DoubleType,
          false),
      DataTypes.createStructField(
          "array",
          DataTypes.createArrayType(DataTypes.StringType, false),
          false),
      DataTypes.createStructField(
          "struct",
          DataTypes.createStructType(new StructField[] {
              DataTypes.createStructField(
                  "sid",
                  DataTypes.IntegerType,
                  false),
              DataTypes.createStructField(
                  "svalue",
                  DataTypes.StringType,
                  false) }),
          false),
      DataTypes.createStructField(
          "array-struct",
          DataTypes.createArrayType(
              DataTypes.createStructType(new StructField[] {
                  DataTypes.createStructField(
                      "asid",
                      DataTypes.IntegerType,
                      false),
                  DataTypes.createStructField(
                      "asvalue",
                      DataTypes.StringType,
                      false) })),
          false) });

  List<Row> rows = new ArrayList<>();
  for (int x = 0; x < 10; x++) {
    List<Row> subrows = new ArrayList<>();
    for (int y = 1000; y < 1003; y++) {
      subrows.add(RowFactory.create(y, "Sub " + y));
    }
    Row str = RowFactory.create(x * 5000, "Struct #" + x);
    String[] array =
        new String[] { "v" + (x * 100), "v" + (x * 100 + 1) };
    rows.add(
        RowFactory.create(x, "Value " + x, x / 4.0, array, str, subrows));
  }

  Dataset<Row> df = spark.createDataFrame(rows, schema);
  df.show(false);
  df.printSchema();

  StructType readSchema = df.schema();
  String[] fieldNames = readSchema.fieldNames();
  int i = 0;
  for (String fieldName : fieldNames) {
    log.info("Field #{}: '{}'", i++, fieldName);
  }
  log.info("Catalog: '{}'", readSchema.catalogString());
  StructField[] fields = readSchema.fields();
  i = 0;
  for (StructField field : fields) {
    log.info("DDL for field #{}: '{}'", i++, field.toDDL());
  }
}