Java Code Examples for org.apache.spark.sql.Row#schema()

The following examples show how to use org.apache.spark.sql.Row#schema() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MorphlineUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("serial")
public static FlatMapFunction<Row, Row> morphlineMapper(final String morphlineFile, final String morphlineId,
                                                        final StructType outputSchema, final boolean errorOnEmpty) {
  return new FlatMapFunction<Row, Row>() {
    @Override
    public Iterator<Row> call(Row row) throws Exception {
      // Retrieve the Command pipeline via ThreadLocal
      Pipeline pipeline = MorphlineUtils.getPipeline(morphlineFile, morphlineId);

      if (null == pipeline) {
        pipeline = MorphlineUtils.setPipeline(morphlineFile, morphlineId, new Collector(), true);
      }

      // Convert each Row into a Record
      StructType inputSchema = row.schema();
      if (null == inputSchema) {
        throw new RuntimeException("Row does not have an associated StructType schema");
      }

      Record inputRecord = new Record();
      String[] fieldNames = inputSchema.fieldNames();

      // TODO : Confirm nested object conversion
      for (int i = 0; i < fieldNames.length; i++) {
        inputRecord.put(fieldNames[i], row.get(i));
      }

      // Process each Record via the Command pipeline
      List<Record> outputRecords = MorphlineUtils.executePipeline(pipeline, inputRecord, errorOnEmpty);

      // Convert each Record into a new Row
      List<Row> outputRows = Lists.newArrayListWithCapacity(outputRecords.size());
      for (Record record : outputRecords) {
        outputRows.add(MorphlineUtils.convertToRow(outputSchema, record));
      }

      return outputRows.iterator();
    }
  };
}
 
Example 2
Source File: RawTranslator.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<Row> translate(Row message) {
  List<Object> values = Lists.newArrayList();

  for (StructField field : message.schema().fields()) {
    values.add(message.getAs(field.name()));
  }

  Row row = new RowWithSchema(message.schema(), values.toArray());

  return Collections.singleton(row);
}
 
Example 3
Source File: TestMorphlineUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void morphlineMapper(
    final @Mocked MorphlineUtils.Pipeline pipeline,
    final @Mocked Row row,
    final @Mocked StructType schema
) throws Exception {

  new Expectations(MorphlineUtils.class) {{
    MorphlineUtils.getPipeline("file", "id"); result = pipeline; times = 1;
    MorphlineUtils.executePipeline(pipeline, (Record) any, true); result = Lists.newArrayList(); times = 1;
    row.schema(); result = schema;
    row.get(anyInt); returns("val1", "val2"); times = 2;
    schema.fieldNames(); result = new String[] { "one", "two"};
  }};

  FlatMapFunction<Row, Row> function = MorphlineUtils.morphlineMapper("file", "id", schema, true);
  Iterator<Row> results = function.call(row);

  assertEquals("Invalid number of Rows returned", 0, Lists.newArrayList(results).size());

  new Verifications() {{
    Record record;
    MorphlineUtils.executePipeline(pipeline, record = withCapture(), true);
    assertEquals(2, record.getFields().size());
    assertEquals("val1", record.get("one").get(0));
  }};
}
 
Example 4
Source File: TestMorphlineUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void morphlineMapperNoPipeline(
    final @Mocked MorphlineUtils.Pipeline pipeline,
    final @Mocked Row row,
    final @Mocked StructType schema
) throws Exception {

  new Expectations(MorphlineUtils.class) {{
    MorphlineUtils.getPipeline("file", "id"); result = null; times = 1;
    MorphlineUtils.setPipeline("file", "id", (MorphlineUtils.Collector) any, true); result = pipeline; times = 1;
    MorphlineUtils.executePipeline(pipeline, (Record) any, true); result = Lists.newArrayList(); times = 1;
    row.schema(); result = schema;
    row.get(anyInt); returns("val1", "val2"); times = 2;
    schema.fieldNames(); result = new String[] { "one", "two"};
  }};

  FlatMapFunction<Row, Row> function = MorphlineUtils.morphlineMapper("file", "id", schema, true);
  Iterator<Row> results = function.call(row);

  assertEquals("Invalid number of Rows returned", 0, Lists.newArrayList(results).size());

  new Verifications() {{
    Record record;
    MorphlineUtils.executePipeline(pipeline, record = withCapture(), true);
    assertEquals(2, record.getFields().size());
    assertEquals("val1", record.get("one").get(0));
  }};
}
 
Example 5
Source File: TestMorphlineUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test (expected = RuntimeException.class)
public void morphlineMapperNoSchema(
    final @Mocked MorphlineUtils.Pipeline pipeline,
    final @Mocked Row row,
    final @Mocked StructType schema
) throws Exception {

  new Expectations(MorphlineUtils.class) {{
    MorphlineUtils.getPipeline("file", "id"); result = pipeline; times = 1;
    row.schema(); result = null;
  }};

  FlatMapFunction<Row, Row> function = MorphlineUtils.morphlineMapper("file", "id", schema, true);
  function.call(row);
}
 
Example 6
Source File: RowUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
public static Row set(Row row, String fieldName, Object replacement) {
  Object[] values = new Object[row.length()];

  for (int i = 0; i < row.schema().fields().length; i++) {
    if (i == row.fieldIndex(fieldName)) {
      values[i] = replacement;
    } else {
      values[i] = row.get(i);
    }
  }

  return new RowWithSchema(row.schema(), values);
}
 
Example 7
Source File: TranslateFunction.java    From envelope with Apache License 2.0 5 votes vote down vote up
private void validateMessageSchema(Row message) {
  if (message.schema() == null) {
    throw new RuntimeException("Translator must be provided raw messages with an embedded schema");
  }

  if (!hasValueField(message)) {
    throw new RuntimeException("Translator must be provided raw messages with a '" +
        Translator.VALUE_FIELD_NAME + "' field");
  }
}
 
Example 8
Source File: TestRowUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testToRowValueMapRowNested(
    final @Mocked Row inputRow,
    final @Mocked StructType innerSchema,
    final @Mocked StructType outerSchema
) {
  DataType field = DataTypes.createMapType(DataTypes.StringType,
      DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, true)
  );

  Map<Object, Object> expectedInnerMap = Maps.newHashMap();
  expectedInnerMap.put("field1", 1);
  expectedInnerMap.put("field2", 2);

  Map<Object, Object> expectedOuterMap = Maps.newHashMap();
  expectedOuterMap.put("outer", expectedInnerMap);

  new Expectations() {{
    inputRow.schema(); returns(outerSchema, innerSchema);

    outerSchema.fieldNames(); result = new String[] {"outer"};
    innerSchema.fieldNames(); result = new String[] {"field1", "field2"};

    inputRow.get(0); returns(inputRow, 1);
    inputRow.get(1); result = 2;
  }};

  assertEquals("Invalid list of values", expectedOuterMap, RowUtils.toRowValue(inputRow, field));
}
 
Example 9
Source File: ZooKeeperOutput.java    From envelope with Apache License 2.0 4 votes vote down vote up
@Override
public void applyRandomMutations(List<Row> planned) throws Exception {
  if (planned.size() > 1000) {
    throw new RuntimeException(
        "ZooKeeper output does not support applying more than 1000 mutations at a time. " +
        "This is to prevent misuse of ZooKeeper as a regular data store. " + 
        "Do not use ZooKeeper for storing anything more than small pieces of metadata.");
  }

  ZooKeeper zk;
  try {
    zk = connection.getZooKeeper();
  } catch (Exception e) {
    throw new RuntimeException("Could not connect to ZooKeeper output", e);
  }
  
  for (Row plan : planned) {
    if (plan.schema() == null) {
      throw new RuntimeException("Mutation row provided to ZooKeeper output must contain a schema");
    }
    
    MutationType mutationType = PlannerUtils.getMutationType(plan);
    plan = PlannerUtils.removeMutationTypeField(plan);
    
    Row key = RowUtils.subsetRow(plan, SchemaUtils.subsetSchema(plan.schema(), keyFieldNames));
    String znode = znodesForFilter(zk, key).iterator().next(); // There can only be one znode per full key
    byte[] value = serializeRow(RowUtils.subsetRow(plan, SchemaUtils.subtractSchema(plan.schema(), keyFieldNames)));
    
    switch (mutationType) {
      case DELETE:
        zk.delete(znode, -1);
        break;
      case UPSERT:
        prepareZnode(zk, znode);
        zk.setData(znode, value, -1);
        break;
      default:
        throw new RuntimeException("ZooKeeper output does not support mutation type: " + PlannerUtils.getMutationType(plan));
    }
  }
}
 
Example 10
Source File: EventTimeUpsertPlanner.java    From envelope with Apache License 2.0 4 votes vote down vote up
@Override
public List<Row> planMutationsForKey(Row key, List<Row> arrivingForKey, List<Row> existingForKey) {
  resetCurrentSystemTime();
  
  if (key.schema() == null) {
    throw new RuntimeException("Key sent to event time upsert planner does not contain a schema");
  }
  
  List<Row> planned = Lists.newArrayList();

  if (arrivingForKey.size() > 1) {
    Collections.sort(arrivingForKey, Collections.reverseOrder(eventTimeModel));
  }
  Row arriving = arrivingForKey.get(0);

  if (arriving.schema() == null) {
    throw new RuntimeException("Arriving row sent to event time upsert planner does not contain a schema");
  }
  
  arriving = PlannerUtils.appendMutationTypeField(arriving);
  
  if (hasLastUpdatedField()) {
    arriving = lastUpdatedTimeModel.appendFields(arriving);
  }

  Row existing = null;
  if (!existingForKey.isEmpty()) {
    existing = existingForKey.get(0);

    if (arriving.schema() == null) {
      throw new RuntimeException("Existing row sent to event time upsert planner does not contain a schema");
    }
  }

  if (existing == null) {
    if (hasLastUpdatedField()) {
      arriving = lastUpdatedTimeModel.setCurrentSystemTime(arriving);
    }

    if (hasSurrogateKeyField()) {
      arriving = PlannerUtils.appendSurrogateKey(arriving, getSurrogateKeyFieldName());
    }

    planned.add(PlannerUtils.setMutationType(arriving, MutationType.INSERT));
  }
  else if (PlannerUtils.before(eventTimeModel, arriving, existing)) {
    // We do nothing because the arriving record is older than the existing record
  }
  else if ((PlannerUtils.simultaneous(eventTimeModel, arriving, existing) ||
            PlannerUtils.after(eventTimeModel, arriving, existing)) &&
           RowUtils.different(arriving, existing, valueFieldNames))
  {
    if (hasLastUpdatedField()) {
      arriving = lastUpdatedTimeModel.setCurrentSystemTime(arriving);
    }
    planned.add(PlannerUtils.setMutationType(arriving, MutationType.UPDATE));
  }

  return planned;
}
 
Example 11
Source File: TranslateFunction.java    From envelope with Apache License 2.0 4 votes vote down vote up
private void validateTranslatedSchema(Row translationResult) {
  if (translationResult.schema() == null) {
    throw new RuntimeException("Translator must translate to rows with an embedded schema");
  }
}