Java Code Examples for org.apache.beam.sdk.values.KV#of()

The following examples show how to use org.apache.beam.sdk.values.KV#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CSVStreamingPipelineTest.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@Test
public void testCSVStreamingInitialRestriction() {
  CSVContentProcessorDoFn csv =
      new CSVContentProcessorDoFn(ValueProvider.StaticValueProvider.of(2));
  String[] lines1 = {"line1", "line2", "line3", "line4"};
  String[] lines2 = {"line1", "line2", "line3", "line4", "line5", "line6"};

  KV<String, List<String>> input1 = KV.of("FileName", Arrays.asList(lines1));
  KV<String, List<String>> input2 = KV.of("FileName", Arrays.asList(lines2));

  OffsetRange rangeResult1 = csv.getInitialRestriction(input1);
  assertEquals(rangeResult1.getFrom(), 1);
  assertEquals(rangeResult1.getTo(), 3);

  OffsetRange rangeResult2 = csv.getInitialRestriction(input2);
  assertEquals(rangeResult2.getFrom(), 1);
  assertEquals(rangeResult2.getTo(), 4);
}
 
Example 2
Source File: MultinomialLogisticRegression.java    From incubator-nemo with Apache License 2.0 6 votes vote down vote up
/**
 * Method for parsing lines of inputs.
 *
 * @param input input line.
 * @return the parsed key-value pair.
 */
private KV<Integer, Pair<ArrayList<Integer>, ArrayList<Double>>> parseLine(final String input) {
  final String text = input.trim();
  if (text.startsWith("#") || text.length() == 0) { // comments or newline
    return null;
  }

  final String[] split = text.split("\\s+|:");
  final Integer output = Integer.parseInt(split[0]);

  final ArrayList<Integer> indices = new ArrayList<>(split.length / 2);
  final ArrayList<Double> data = new ArrayList<>(split.length / 2);
  for (Integer index = 0; index < split.length / 2; ++index) {
    indices.add(index, Integer.parseInt(split[2 * index + 1]) - 1);
    data.add(index, Double.parseDouble(split[2 * index + 2]));
  }

  return KV.of(output, Pair.of(indices, data));
}
 
Example 3
Source File: DataGeneratorReader.java    From scotty-window-processor with Apache License 2.0 6 votes vote down vote up
@Override
public boolean advance() throws IOException {
    //Generate with limit
    if (throughputLimit != 0) {
        now = System.currentTimeMillis();
        if (this.counter < this.throughputLimit && now < this.lastTime + 1000) {
            this.counter++;
            this.current = KV.of(key, random.nextInt());
            this. currentTimestamp = this.source.timestampFn.apply(current.getValue());
            return true;
        } else {
            if (now > this.lastTime + 1000) {
                lastTime = now;
                counter = 0;
            }
            return false;
        }
    } else {
        this.counter++;
        this.current = KV.of(key, random.nextInt());
        this.currentTimestamp = source.timestampFn.apply(current.getValue());
        return true;
    }
}
 
Example 4
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@Override
public KV<String, TableRow> getDestination(ValueInSingleWindow<KV<String, TableRow>> element) {
  String key = element.getValue().getKey();
  String tableName = String.format("%s:%s.%s", projectId.get(), datasetName.get(), key);
  // Strip the file name to only the letters and numbers so that it is a valid BQ table id.
  tableName = tableName.replaceAll("[^a-zA-Z0-9]", "");
  LOG.debug("Table Name {}", tableName);
  return KV.of(tableName, element.getValue().getValue());
}
 
Example 5
Source File: BigQueryMappers.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public KV<TableId, TableRow> getOutputObject(TableRow input) {
  TableId tableId = getTableId(input);
  TableRow tableRow = getTableRow(input);

  return KV.of(tableId, tableRow);
}
 
Example 6
Source File: CSVStreamingPipelineTest.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@Test
public void testSplitRestriction() {
  CSVContentProcessorDoFn csv =
      new CSVContentProcessorDoFn(ValueProvider.StaticValueProvider.of(2));
  OffsetRange off = new OffsetRange(2, 5);
  DoFn.OutputReceiver out = mock(DoFn.OutputReceiver.class);

  String[] lines2 = {"line1", "line2", "line3", "line4", "line5", "line6"};
  KV<String, List<String>> input1 = KV.of("FileName", Arrays.asList(lines2));

  csv.splitRestriction(input1, off, out);
  verify(out, times(3)).output(any(OffsetRange.class));
}
 
Example 7
Source File: KettleKeyValueFn.java    From kettle-beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {

  try {

    // Get an input row
    //
    KettleRow inputKettleRow = processContext.element();
    readCounter.inc();

    Object[] inputRow = inputKettleRow.getRow();

    // Copy over the data...
    //
    Object[] keyRow = RowDataUtil.allocateRowData( keyIndexes.length );
    for ( int i = 0; i< keyIndexes.length; i++) {
      keyRow[i] = inputRow[ keyIndexes[i]];
    }

    // Copy over the values...
    //
    Object[] valueRow = RowDataUtil.allocateRowData( valueIndexes.length );
    for ( int i = 0; i< valueIndexes.length; i++) {
      valueRow[i] = inputRow[ valueIndexes[i]];
    }

    KV<KettleRow, KettleRow> keyValue = KV.of( new KettleRow(keyRow), new KettleRow( valueRow ) );
    processContext.output( keyValue );

  } catch(Exception e) {
    errorCounter.inc();
    LOG.error("Error splitting row into key and value", e);
    throw new RuntimeException( "Unable to split row into key and value", e );
  }
}
 
Example 8
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public KV<String, TableRow> getDestination(ValueInSingleWindow<KV<String, TableRow>> element) {
  String key = element.getValue().getKey();
  String tableName = String.format("%s:%s.%s", projectId.get(), datasetName.get(), key);
  LOG.debug("Table Name {}", tableName);
  return KV.of(tableName, element.getValue().getValue());
}
 
Example 9
Source File: HopKeyValueFn.java    From hop with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {

  try {

    // Get an input row
    //
    HopRow inputHopRow = processContext.element();
    readCounter.inc();

    Object[] inputRow = inputHopRow.getRow();

    // Copy over the data...
    //
    Object[] keyRow = RowDataUtil.allocateRowData( keyIndexes.length );
    for ( int i = 0; i< keyIndexes.length; i++) {
      keyRow[i] = inputRow[ keyIndexes[i]];
    }

    // Copy over the values...
    //
    Object[] valueRow = RowDataUtil.allocateRowData( valueIndexes.length );
    for ( int i = 0; i< valueIndexes.length; i++) {
      valueRow[i] = inputRow[ valueIndexes[i]];
    }

    KV<HopRow, HopRow> keyValue = KV.of( new HopRow(keyRow), new HopRow( valueRow ) );
    processContext.output( keyValue );

  } catch(Exception e) {
    errorCounter.inc();
    LOG.error("Error splitting row into key and value", e);
    throw new RuntimeException( "Unable to split row into key and value", e );
  }
}
 
Example 10
Source File: DataGeneratorReader.java    From scotty-window-processor with Apache License 2.0 5 votes vote down vote up
public DataGeneratorReader(int throughputLimit, DataGeneratorSource source, Checkpoint mark) {
    this.source = source;
    this.throughputLimit = throughputLimit;
    if (mark == null) {
        // Because we have not emitted an element yet, and start() calls advance, we need to
        // "un-advance" so that start() produces the correct output.
        this.current = KV.of(key, random.nextInt());
    } else {
        this.current = KV.of(mark.getLastEmittedKey(), mark.getLastEmittedValue());
        this.firstStarted = mark.getStartTime();
    }
}
 
Example 11
Source File: Max.java    From scotty-window-processor with Apache License 2.0 4 votes vote down vote up
@Override
public KV<Integer, Integer> combine(KV<Integer, Integer> partialAggregate1, KV<Integer, Integer> partialAggregate2) {
    return KV.of(partialAggregate1.getKey(), Math.max(partialAggregate1.getValue(), partialAggregate2.getValue()));
}
 
Example 12
Source File: TestUtils.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
static KV<ByteString, Iterable<Mutation>> createBigtableRowMutations(String key) {
  List<Mutation> mutations = new ArrayList<>();
  return KV.of(toByteString(key), mutations);
}
 
Example 13
Source File: DynamicJdbcIOTest.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Override
public KV<Integer, String> mapRow(ResultSet resultSet) throws Exception {
  return KV.of(resultSet.getInt("id"), resultSet.getString("name"));
}
 
Example 14
Source File: Count.java    From scotty-window-processor with Apache License 2.0 4 votes vote down vote up
@Override
public KV<Integer, Integer> invert(KV<Integer, Integer> currentAggregate, KV<Integer, Integer> toRemove) {
    return KV.of(currentAggregate.getKey(), currentAggregate.getValue() - toRemove.getValue());
}
 
Example 15
Source File: Sum.java    From scotty-window-processor with Apache License 2.0 4 votes vote down vote up
@Override
public KV<Integer, Integer> invert( KV<Integer, Integer> currentAggregate,  KV<Integer, Integer> toRemove) {
    return KV.of(currentAggregate.getKey(),currentAggregate.getValue()-toRemove.getValue());
}
 
Example 16
Source File: Mean.java    From scotty-window-processor with Apache License 2.0 4 votes vote down vote up
@Override
public KV<Integer, Pair> combine(KV<Integer, Pair> partialAggregate1, KV<Integer, Pair> partialAggregate2) {
    return KV.of(partialAggregate1.getKey(), new Pair(partialAggregate1.getValue().sum + partialAggregate2.getValue().sum,
            partialAggregate1.getValue().count + partialAggregate2.getValue().count));
}
 
Example 17
Source File: KafkaToBigQueryTest.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
/** Tests the {@link KafkaToBigQuery} pipeline end-to-end. */
@Test
public void testKafkaToBigQueryE2E() throws Exception {
  // Test input
  final String key = "{\"id\": \"1001\"}";
  final String badKey = "{\"id\": \"1002\"}";
  final String payload = "{\"ticker\": \"GOOGL\", \"price\": 1006.94}";
  final String badPayload = "{\"tickets\": \"AMZ\", \"proctor\": 007";
  final KV<String, String> message = KV.of(key, payload);
  final KV<String, String> badMessage = KV.of(badKey, badPayload);

  final Instant timestamp =
          new DateTime(2022, 2, 22, 22, 22, 22, 222, DateTimeZone.UTC).toInstant();

  final FailsafeElementCoder<KV<String, String>, String> coder =
          FailsafeElementCoder.of(
                  KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), StringUtf8Coder.of());

  CoderRegistry coderRegistry = pipeline.getCoderRegistry();
  coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);

  KafkaToBigQuery.Options options =
          PipelineOptionsFactory.create().as(KafkaToBigQuery.Options.class);

  options.setJavascriptTextTransformGcsPath(TRANSFORM_FILE_PATH);
  options.setJavascriptTextTransformFunctionName("transform");

  // Build pipeline
  PCollectionTuple transformOut =
          pipeline
                  .apply(
                          "CreateInput",
                          Create.of(message)
                                  .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
                  .apply("ConvertMessageToTableRow", new MessageToTableRow(options));

  // Assert
  PAssert.that(transformOut.get(KafkaToBigQuery.UDF_DEADLETTER_OUT)).empty();
  PAssert.that(transformOut.get(KafkaToBigQuery.TRANSFORM_DEADLETTER_OUT)).empty();
  PAssert.that(transformOut.get(KafkaToBigQuery.TRANSFORM_OUT))
          .satisfies(
                  collection -> {
                    TableRow result = collection.iterator().next();
                    assertThat(result.get("ticker"), is(equalTo("GOOGL")));
                    assertThat(result.get("price"), is(equalTo(1006.94)));
                    return null;
                  });

  // Execute pipeline
  pipeline.run();

  // Build pipeline with malformed payload
  PCollectionTuple badTransformOut =
          pipeline
                  .apply(
                          "CreateBadInput",
                          Create.of(badMessage)
                                  .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
                  .apply("ConvertMessageToTableRow", new MessageToTableRow(options));

  // Assert
  PAssert.that(badTransformOut.get(KafkaToBigQuery.UDF_DEADLETTER_OUT))
          .satisfies(
                  collection -> {
                    FailsafeElement badResult = collection.iterator().next();
                    assertThat(badResult.getOriginalPayload(), is(equalTo(badMessage)));
                    assertThat(badResult.getPayload(), is(equalTo(badPayload)));
                    return null;
                  });
  PAssert.that(badTransformOut.get(KafkaToBigQuery.TRANSFORM_DEADLETTER_OUT)).empty();
  PAssert.that(badTransformOut.get(KafkaToBigQuery.TRANSFORM_OUT)).empty();


  // Execute pipeline
  pipeline.run();
}
 
Example 18
Source File: Sum.java    From scotty-window-processor with Apache License 2.0 4 votes vote down vote up
@Override
public  KV<Integer, Integer> combine( KV<Integer, Integer> partialAggregate1,  KV<Integer, Integer> partialAggregate2) {
    return KV.of(partialAggregate1.getKey(), partialAggregate1.getValue()+ partialAggregate2.getValue());
}
 
Example 19
Source File: Quantile.java    From scotty-window-processor with Apache License 2.0 4 votes vote down vote up
@Override
public KV<Integer, QuantileTreeMap> lift(KV<Integer, Integer> inputTuple) {
    return KV.of(inputTuple.getKey(),new QuantileTreeMap(Math.toIntExact(inputTuple.getValue()),quantile));
}
 
Example 20
Source File: Quantile.java    From scotty-window-processor with Apache License 2.0 4 votes vote down vote up
@Override
public KV<Integer, QuantileTreeMap> clone(KV<Integer, QuantileTreeMap> partialAggregate) {
    return KV.of(partialAggregate.getKey(),partialAggregate.getValue().clone());
}