Java Code Examples for org.apache.beam.sdk.values.KV#getKey()

The following examples show how to use org.apache.beam.sdk.values.KV#getKey() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KVStringStringToKettleRowFn.java    From kettle-beam with Apache License 2.0 7 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {
  try {

    KV<String,String> kv = processContext.element();
    inputCounter.inc();

    Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() );
    outputRow[ 0 ] = kv.getKey(); // String
    outputRow[ 1 ] = kv.getValue(); // String

    processContext.output( new KettleRow( outputRow ) );
    writtenCounter.inc();

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in KV<Long,String> to Kettle Row conversion function", e );
    throw new RuntimeException( "Error in KV<Long,String> to Kettle Row conversion function", e );
  }
}
 
Example 2
Source File: KVLongStringToHopRowFn.java    From hop with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {
  try {

    KV<Long,String> kv = processContext.element();
    inputCounter.inc();

    Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() );
    outputRow[ 0 ] = kv.getKey();
    outputRow[ 1 ] = kv.getValue();

    processContext.output( new HopRow( outputRow ) );
    writtenCounter.inc();

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in KV<Long,String> to Hop Row conversion function", e );
    throw new RuntimeException( "Error in KV<Long,String> to Hop Row conversion function", e );
  }
}
 
Example 3
Source File: KVStringStringToHopRowFn.java    From hop with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {
  try {

    KV<String,String> kv = processContext.element();
    inputCounter.inc();

    Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() );
    outputRow[ 0 ] = kv.getKey(); // String
    outputRow[ 1 ] = kv.getValue(); // String

    processContext.output( new HopRow( outputRow ) );
    writtenCounter.inc();

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in KV<Long,String> to Hop Row conversion function", e );
    throw new RuntimeException( "Error in KV<Long,String> to Hop Row conversion function", e );
  }
}
 
Example 4
Source File: KVLongStringToKettleRowFn.java    From kettle-beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {
  try {

    KV<Long,String> kv = processContext.element();
    inputCounter.inc();

    Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() );
    outputRow[ 0 ] = kv.getKey();
    outputRow[ 1 ] = kv.getValue();

    processContext.output( new KettleRow( outputRow ) );
    writtenCounter.inc();

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in KV<Long,String> to Kettle Row conversion function", e );
    throw new RuntimeException( "Error in KV<Long,String> to Kettle Row conversion function", e );
  }
}
 
Example 5
Source File: S3Import.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@Override
public TableDestination getTable(KV<String, TableRow> destination) {
  TableDestination dest =
      new TableDestination(destination.getKey(), "pii-tokenized output data from dataflow");
  LOG.debug("Table Destination {}", dest.getTableSpec());
  return dest;
}
 
Example 6
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@Override
public TableDestination getTable(KV<String, TableRow> destination) {
  TableDestination dest =
      new TableDestination(destination.getKey(), "pii-tokenized output data from dataflow");
  LOG.debug("Table Destination {}", dest.getTableSpec());
  return dest;
}
 
Example 7
Source File: BQDestination.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@Override
public TableDestination getTable(KV<String, List<String>> destination) {

  TableDestination dest =
      new TableDestination(destination.getKey(), "pii-tokenized output data from dataflow");
  LOG.debug("Table Destination {}", dest.toString());
  return dest;
}
 
Example 8
Source File: CSVContentProcessorDoFn.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, OffsetRangeTracker tracker) {
  for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) {
    String fileName = c.element().getKey();
    String key = String.format("%s_%d", fileName, i);
    List<String> rows = c.element().getValue().stream().skip(1).collect(Collectors.toList());
    List<FieldId> headers =
        Arrays.stream(c.element().getValue().get(0).split(","))
            .map(header -> FieldId.newBuilder().setName(header).build())
            .collect(Collectors.toList());
    KV<Integer, Integer> lineRange = createStartEnd(rows.size(), i);
    int startOfLine = lineRange.getKey();
    int endOfLine = lineRange.getValue();

    List<String> lines = new ArrayList<>();

    for (int index = startOfLine - 1; index < endOfLine; index++) {

      lines.add(rows.get(index));
    }
    Table batchData = Util.createDLPTable(headers, lines);

    if (batchData.getRowsCount() > 0) {
      LOG.info(
          "Current Restriction From: {}, Current Restriction To: {}, StartofLine: {}, End Of Line {}, BatchData {}",
          tracker.currentRestriction().getFrom(),
          tracker.currentRestriction().getTo(),
          startOfLine,
          endOfLine,
          batchData.getRowsCount());
      c.output(KV.of(key, batchData));
      lines.clear();
    }
  }
}
 
Example 9
Source File: ExportTransform.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public List<Export.Table> addInput(List<Export.Table> accumulator, KV<String, String> input) {
  ExportProtos.Export.Table.Builder tablesBuilder = ExportProtos.Export.Table.newBuilder();

  String tableName = input.getKey();
  tablesBuilder.setName(tableName);
  tablesBuilder.setManifestFile(tableManifestFileName(tableName));

  accumulator.add(tablesBuilder.build());
  return accumulator;
}
 
Example 10
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public TableDestination getTable(KV<String, TableRow> destination) {
  TableDestination dest =
      new TableDestination(destination.getKey(), "pii-tokenized output data from dataflow");
  LOG.debug("Table Destination {}", dest.getTableSpec());
  return dest;
}
 
Example 11
Source File: KeyValueToGenericRecordFn.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Generates the records using {@link GenericRecordBuilder}. */
@ProcessElement
public void processElement(ProcessContext c) {

  KV<String, String> message = c.element();
  String attributeKey = message.getKey();
  String attributeValue = message.getValue();

  Map<String, String> attributeMap;

  if (attributeValue != null) {
    if (attributeKey != null) {
      attributeMap = Collections.singletonMap(attributeKey, attributeValue);
    } else {
      attributeMap = Collections.singletonMap("", attributeValue);
    }
  } else {
    attributeMap = Collections.EMPTY_MAP;
  }

  c.output(
      new GenericRecordBuilder(SCHEMA)
          .set("message", attributeValue)
          .set("attributes", attributeMap)
          .set("timestamp", c.timestamp().getMillis())
          .build());
}
 
Example 12
Source File: ImportTransform.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  KV<String, TableManifest> kv = c.element();
  String table = kv.getKey();
  TableManifest manifest = kv.getValue();
  boolean gcs = GcsPath.GCS_URI.matcher(importDirectory.get()).matches();
  if (gcs) {
    validateGcsFiles(c, table, manifest);
  } else {
    validateLocalFiles(c, table, manifest);
  }
}
 
Example 13
Source File: BigQueryDynamicConverters.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public TableDestination getTable(KV<TableId, TableRow> destination) {
  TableId tableId = destination.getKey();
  String tableName = String.format("%s.%s", tableId.getDataset(), tableId.getTable());
  TableDestination dest =
      new TableDestination(tableName, "Name of table pulled from datafields");

  return dest;
}
 
Example 14
Source File: BigQueryMappers.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Override
public TableId getTableId(KV<TableId, TableRow> input) {
  return input.getKey();
}
 
Example 15
Source File: WordCount.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Override
public String apply(KV<String, Long> input) {
  return input.getKey() + ": " + input.getValue();
}
 
Example 16
Source File: BigQueryMappers.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Override
public TableId getTableId(KV<TableId, TableRow> input) {
  return input.getKey();
}
 
Example 17
Source File: WordCount.java    From deployment-examples with MIT License 4 votes vote down vote up
@Override
public String apply(KV<String, Long> input) {
  return input.getKey() + ": " + input.getValue();
}
 
Example 18
Source File: BeamWordCount.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Override
public String apply(final KV<String, Long> input) {
  return input.getKey() + ": " + input.getValue();
}
 
Example 19
Source File: AssemblerFn.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {

  try {

    KV<KettleRow, KV<KettleRow, KettleRow>> element = processContext.element();
    KV<KettleRow, KettleRow> value = element.getValue();

    KettleRow key = element.getKey();
    KettleRow leftValue = value.getKey();
    KettleRow rightValue = value.getValue();

    Object[] outputRow = RowDataUtil.allocateRowData( outputRowMeta.size() );
    int index = 0;

    // Kettle style, first the left values
    //
    if (leftValue.allNull()) {
      index+=leftVRowMeta.size();
    } else {
      for ( int i = 0; i < leftVRowMeta.size(); i++ ) {
        outputRow[ index++ ] = leftValue.getRow()[ i ];
      }
    }

    // Now the left key
    //
    if (leftValue.allNull()) {
      index+=leftKRowMeta.size();
    } else {
      for ( int i = 0; i < leftKRowMeta.size(); i++ ) {
        outputRow[ index++ ] = key.getRow()[ i ];
      }
    }

    // Then the right key
    //
    if (rightValue.allNull()) {
      // No right key given if the value is null
      //
      index+=leftKRowMeta.size();
    } else {
      for ( int i = 0; i < leftKRowMeta.size(); i++ ) {
        outputRow[ index++ ] = key.getRow()[ i ];
      }
    }

    // Finally the right values
    //
    if (rightValue.allNull()) {
      index+=rightVRowMeta.size();
    } else {
      for ( int i = 0; i < rightVRowMeta.size(); i++ ) {
        outputRow[ index++ ] = rightValue.getRow()[ i ];
      }
    }

    // System.out.println("Assembled row : "+outputRowMeta.getString(outputRow));

    processContext.output( new KettleRow( outputRow ) );
    writtenCounter.inc();

  } catch(Exception e) {
    errorCounter.inc();
    LOG.error( "Error assembling rows", e);
    throw new RuntimeException( "Error assembling output KV<row, KV<row, row>>", e );
  }
}
 
Example 20
Source File: AssemblerFn.java    From hop with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {

  try {

    KV<HopRow, KV<HopRow, HopRow>> element = processContext.element();
    KV<HopRow, HopRow> value = element.getValue();

    HopRow key = element.getKey();
    HopRow leftValue = value.getKey();
    HopRow rightValue = value.getValue();

    Object[] outputRow = RowDataUtil.allocateRowData( outputRowMeta.size() );
    int index = 0;

    // Hop style, first the left values
    //
    if (leftValue.allNull()) {
      index+=leftVRowMeta.size();
    } else {
      for ( int i = 0; i < leftVRowMeta.size(); i++ ) {
        outputRow[ index++ ] = leftValue.getRow()[ i ];
      }
    }

    // Now the left key
    //
    if (leftValue.allNull()) {
      index+=leftKRowMeta.size();
    } else {
      for ( int i = 0; i < leftKRowMeta.size(); i++ ) {
        outputRow[ index++ ] = key.getRow()[ i ];
      }
    }

    // Then the right key
    //
    if (rightValue.allNull()) {
      // No right key given if the value is null
      //
      index+=leftKRowMeta.size();
    } else {
      for ( int i = 0; i < leftKRowMeta.size(); i++ ) {
        outputRow[ index++ ] = key.getRow()[ i ];
      }
    }

    // Finally the right values
    //
    if (rightValue.allNull()) {
      index+=rightVRowMeta.size();
    } else {
      for ( int i = 0; i < rightVRowMeta.size(); i++ ) {
        outputRow[ index++ ] = rightValue.getRow()[ i ];
      }
    }

    // System.out.println("Assembled row : "+outputRowMeta.getString(outputRow));

    processContext.output( new HopRow( outputRow ) );
    writtenCounter.inc();

  } catch(Exception e) {
    errorCounter.inc();
    LOG.error( "Error assembling rows", e);
    throw new RuntimeException( "Error assembling output KV<row, KV<row, row>>", e );
  }
}