Java Code Examples for org.apache.beam.sdk.values.KV#getValue()

The following examples show how to use org.apache.beam.sdk.values.KV#getValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KVStringStringToKettleRowFn.java    From kettle-beam with Apache License 2.0 7 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {
  try {

    KV<String,String> kv = processContext.element();
    inputCounter.inc();

    Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() );
    outputRow[ 0 ] = kv.getKey(); // String
    outputRow[ 1 ] = kv.getValue(); // String

    processContext.output( new KettleRow( outputRow ) );
    writtenCounter.inc();

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in KV<Long,String> to Kettle Row conversion function", e );
    throw new RuntimeException( "Error in KV<Long,String> to Kettle Row conversion function", e );
  }
}
 
Example 2
Source File: KVLongStringToHopRowFn.java    From hop with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {
  try {

    KV<Long,String> kv = processContext.element();
    inputCounter.inc();

    Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() );
    outputRow[ 0 ] = kv.getKey();
    outputRow[ 1 ] = kv.getValue();

    processContext.output( new HopRow( outputRow ) );
    writtenCounter.inc();

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in KV<Long,String> to Hop Row conversion function", e );
    throw new RuntimeException( "Error in KV<Long,String> to Hop Row conversion function", e );
  }
}
 
Example 3
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 4
Source File: SplunkEventWriter.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(
    @Element KV<Integer, SplunkEvent> input,
    OutputReceiver<SplunkWriteError> receiver,
    BoundedWindow window,
    @StateId(BUFFER_STATE_NAME) BagState<SplunkEvent> bufferState,
    @StateId(COUNT_STATE_NAME) ValueState<Long> countState,
    @TimerId(TIME_ID_NAME) Timer timer) throws IOException {

  Long count = MoreObjects.<Long>firstNonNull(countState.read(), 0L);
  SplunkEvent event = input.getValue();
  INPUT_COUNTER.inc();
  bufferState.add(event);
  count += 1;
  countState.write(count);
  timer.offset(Duration.standardSeconds(DEFAULT_FLUSH_DELAY)).setRelative();

  if (count >= batchCount) {

    LOG.info("Flushing batch of {} events", count);
    flush(receiver, bufferState, countState);
  }
}
 
Example 5
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {
  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 6
Source File: BigQueryDynamicConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 7
Source File: KVLongStringToKettleRowFn.java    From kettle-beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {
  try {

    KV<Long,String> kv = processContext.element();
    inputCounter.inc();

    Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() );
    outputRow[ 0 ] = kv.getKey();
    outputRow[ 1 ] = kv.getValue();

    processContext.output( new KettleRow( outputRow ) );
    writtenCounter.inc();

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in KV<Long,String> to Kettle Row conversion function", e );
    throw new RuntimeException( "Error in KV<Long,String> to Kettle Row conversion function", e );
  }
}
 
Example 8
Source File: BigQueryDynamicConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 9
Source File: KVStringStringToHopRowFn.java    From hop with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {
  try {

    KV<String,String> kv = processContext.element();
    inputCounter.inc();

    Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() );
    outputRow[ 0 ] = kv.getKey(); // String
    outputRow[ 1 ] = kv.getValue(); // String

    processContext.output( new HopRow( outputRow ) );
    writtenCounter.inc();

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in KV<Long,String> to Hop Row conversion function", e );
    throw new RuntimeException( "Error in KV<Long,String> to Hop Row conversion function", e );
  }
}
 
Example 10
Source File: GroupByKeyAndWindowDoFnTransformTest.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
private void checkOutput(final KV<String, List<String>> expected, final KV<String, Iterable<String>> result) {

    // check key
    assertEquals(expected.getKey(), result.getKey());

    // check value
    final List<String> resultValue = new ArrayList<>();
    final List<String> expectedValue = new ArrayList<>(expected.getValue());
    result.getValue().iterator().forEachRemaining(resultValue::add);
    Collections.sort(resultValue);
    Collections.sort(expectedValue);

    assertEquals(expectedValue, resultValue);
  }
 
Example 11
Source File: PubsubMessageToTableRow.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Given a KV containing a destination and a message, return the message content as a {@link
 * TableRow} ready to pass to {@link org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO}.
 */
public TableRow kvToTableRow(KV<TableDestination, PubsubMessage> kv) {
  if (format == null) {
    format = createFormat();
  }
  final TableReference ref = kv.getKey().getTableReference();
  final TableId tableId = TableId.of(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
  final PubsubMessage message = kv.getValue();
  return Json.asTableRow(format.apply(tableId, message.getAttributeMap(), message.getPayload()));
}
 
Example 12
Source File: TextRowToMutation.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) throws IOException {
  /**
   * Input string is one line but Apache CSVParser process multiple lines, so we only take the
   * first item in the result list
   */
  KV<String, String> kv = c.element();
  String tableName = kv.getKey();
  Ddl ddl = c.sideInput(ddlView);
  Map<String, List<TableManifest.Column>> tableColumnsMap = c.sideInput(tableColumnsView);
  Table table = ddl.table(tableName);
  Reader in = new StringReader(kv.getValue());
  CSVFormat csvFormat =
      CSVFormat.newFormat(columnDelimiter.get())
          .withQuote(fieldQualifier.get())
          .withIgnoreEmptyLines(true)
          .withTrailingDelimiter(trailingDelimiter.get())
          .withEscape(escape.get())
          .withNullString(nullString.get());
  CSVParser parser = new CSVParser(in, csvFormat);
  List<CSVRecord> list = parser.getRecords();
  if (list.isEmpty()) {
    return;
  }
  if (list.size() > 1) {
    throw new RuntimeException("Unable to parse this row: " + c.element());
  }
  CSVRecord row = list.get(0);
  writeBuilder = Mutation.newInsertOrUpdateBuilder(table.name());
  try {
    c.output(parseRow(writeBuilder, row, table, tableColumnsMap.get(tableName)));
  } catch (IllegalArgumentException e) {
    throw new RuntimeException(
        String.format("Error to parseRow. row: %s, table: %s", row, table), e);
  }
}
 
Example 13
Source File: CSVContentProcessorDoFn.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, OffsetRangeTracker tracker) {
  for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) {
    String fileName = c.element().getKey();
    String key = String.format("%s_%d", fileName, i);
    List<String> rows = c.element().getValue().stream().skip(1).collect(Collectors.toList());
    List<FieldId> headers =
        Arrays.stream(c.element().getValue().get(0).split(","))
            .map(header -> FieldId.newBuilder().setName(header).build())
            .collect(Collectors.toList());
    KV<Integer, Integer> lineRange = createStartEnd(rows.size(), i);
    int startOfLine = lineRange.getKey();
    int endOfLine = lineRange.getValue();

    List<String> lines = new ArrayList<>();

    for (int index = startOfLine - 1; index < endOfLine; index++) {

      lines.add(rows.get(index));
    }
    Table batchData = Util.createDLPTable(headers, lines);

    if (batchData.getRowsCount() > 0) {
      LOG.info(
          "Current Restriction From: {}, Current Restriction To: {}, StartofLine: {}, End Of Line {}, BatchData {}",
          tracker.currentRestriction().getFrom(),
          tracker.currentRestriction().getTo(),
          startOfLine,
          endOfLine,
          batchData.getRowsCount());
      c.output(KV.of(key, batchData));
      lines.clear();
    }
  }
}
 
Example 14
Source File: BeamWordCount.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Override
public String apply(final KV<String, Long> input) {
  return input.getKey() + ": " + input.getValue();
}
 
Example 15
Source File: WordCount.java    From deployment-examples with MIT License 4 votes vote down vote up
@Override
public String apply(KV<String, Long> input) {
  return input.getKey() + ": " + input.getValue();
}
 
Example 16
Source File: BigQueryMappers.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Override
public TableRow getTableRow(KV<TableId, TableRow> input) {
  return input.getValue();
}
 
Example 17
Source File: ErrorConverters.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Override
public PubsubMessage apply(KV<T, Map<String, String>> kv) {
  return new PubsubMessage(encode(payloadCoder(), kv.getKey()), kv.getValue());
}
 
Example 18
Source File: BigQueryMappers.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Override
public TableRow getTableRow(KV<TableId, TableRow> input) {
  return input.getValue();
}
 
Example 19
Source File: AssemblerFn.java    From hop with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {

  try {

    KV<HopRow, KV<HopRow, HopRow>> element = processContext.element();
    KV<HopRow, HopRow> value = element.getValue();

    HopRow key = element.getKey();
    HopRow leftValue = value.getKey();
    HopRow rightValue = value.getValue();

    Object[] outputRow = RowDataUtil.allocateRowData( outputRowMeta.size() );
    int index = 0;

    // Hop style, first the left values
    //
    if (leftValue.allNull()) {
      index+=leftVRowMeta.size();
    } else {
      for ( int i = 0; i < leftVRowMeta.size(); i++ ) {
        outputRow[ index++ ] = leftValue.getRow()[ i ];
      }
    }

    // Now the left key
    //
    if (leftValue.allNull()) {
      index+=leftKRowMeta.size();
    } else {
      for ( int i = 0; i < leftKRowMeta.size(); i++ ) {
        outputRow[ index++ ] = key.getRow()[ i ];
      }
    }

    // Then the right key
    //
    if (rightValue.allNull()) {
      // No right key given if the value is null
      //
      index+=leftKRowMeta.size();
    } else {
      for ( int i = 0; i < leftKRowMeta.size(); i++ ) {
        outputRow[ index++ ] = key.getRow()[ i ];
      }
    }

    // Finally the right values
    //
    if (rightValue.allNull()) {
      index+=rightVRowMeta.size();
    } else {
      for ( int i = 0; i < rightVRowMeta.size(); i++ ) {
        outputRow[ index++ ] = rightValue.getRow()[ i ];
      }
    }

    // System.out.println("Assembled row : "+outputRowMeta.getString(outputRow));

    processContext.output( new HopRow( outputRow ) );
    writtenCounter.inc();

  } catch(Exception e) {
    errorCounter.inc();
    LOG.error( "Error assembling rows", e);
    throw new RuntimeException( "Error assembling output KV<row, KV<row, row>>", e );
  }
}
 
Example 20
Source File: WordCount.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Override
public String apply(KV<String, Long> input) {
  return input.getKey() + ": " + input.getValue();
}