org.apache.beam.sdk.values.KV Java Examples

The following examples show how to use org.apache.beam.sdk.values.KV. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KVStringStringToKettleRowFn.java    From kettle-beam with Apache License 2.0 7 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {
  try {

    KV<String,String> kv = processContext.element();
    inputCounter.inc();

    Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() );
    outputRow[ 0 ] = kv.getKey(); // String
    outputRow[ 1 ] = kv.getValue(); // String

    processContext.output( new KettleRow( outputRow ) );
    writtenCounter.inc();

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in KV<Long,String> to Kettle Row conversion function", e );
    throw new RuntimeException( "Error in KV<Long,String> to Kettle Row conversion function", e );
  }
}
 
Example #2
Source File: ChangelogTableDynamicDestinations.java    From DataflowTemplates with Apache License 2.0 7 votes vote down vote up
@Override
public TableSchema getSchema(String targetTable) {
  Map<String, KV<Schema, Schema>> schemaMap = this.sideInput(schemaMapView);
  KV<Schema, Schema> keyAndValueSchemas = schemaMap.get(targetTable);

  TableFieldSchema rowSchema = new TableFieldSchema()
      .setName("fullRecord")
      .setType("RECORD")
      .setMode("NULLABLE")   // This field is null for deletions
      .setFields(BigQueryUtils.toTableSchema(keyAndValueSchemas.getValue()).getFields());

  TableFieldSchema pkSchema = new TableFieldSchema()
      .setName("primaryKey")
      .setType("RECORD")
      .setFields(BigQueryUtils.toTableSchema(keyAndValueSchemas.getKey()).getFields());

  TableSchema changelogTableSchema = new TableSchema()
      .setFields(Arrays.asList(
          rowSchema,
          pkSchema,
          new TableFieldSchema().setName("operation").setType("STRING"),
          new TableFieldSchema().setName("timestampMs").setType("INT64"),
          new TableFieldSchema().setName("tableName").setType("STRING")));

  return changelogTableSchema;
}
 
Example #3
Source File: MergeStatementBuildingFn.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(
    ProcessContext c,
    @StateId("table_created") ValueState<Boolean> tableCreated) {
  KV<String, KV<Schema, Schema>> tableAndSchemas = c.element();

  // Start by actually fetching whether we created the table or not.
  if (!createdCache) {
    Boolean actuallyCreated = firstNonNull(tableCreated.read(), false);
    createdCache = actuallyCreated;
  }
  // Once we know for sure if we created the table, then we act on creating (or not).
  if (!createdCache) {
    tableCreated.write(true);
    createdCache = true;
    c.output(KV.of(tableAndSchemas.getKey(),
        buildCreateTableAction(tableAndSchemas, projectId, replicaDatasetId)));
    this.tablesCreated.inc();
  }

  c.output(KV.of(tableAndSchemas.getKey(),
      buildMergeStatementAction(
          tableAndSchemas, projectId, changelogDatasetId, replicaDatasetId)));
  this.mergeStatementsIssued.inc();
}
 
Example #4
Source File: S3Import.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@GetInitialRestriction
public OffsetRange getInitialRestriction(KV<String, ReadableFile> file) throws IOException {
  long totalBytes = file.getValue().getMetadata().sizeBytes();
  long totalSplit = 0;
  if (totalBytes < BATCH_SIZE) {
    totalSplit = 2;
  } else {
    totalSplit = totalSplit + (totalBytes / BATCH_SIZE);
    long remaining = totalBytes % BATCH_SIZE;
    if (remaining > 0) {
      totalSplit = totalSplit + 2;
    }
  }

  LOG.debug(
      "Total Bytes {} for File {} -Initial Restriction range from 1 to: {}",
      totalBytes,
      file.getKey(),
      totalSplit);
  return new OffsetRange(1, totalSplit);
}
 
Example #5
Source File: SplunkEventWriter.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(
    @Element KV<Integer, SplunkEvent> input,
    OutputReceiver<SplunkWriteError> receiver,
    BoundedWindow window,
    @StateId(BUFFER_STATE_NAME) BagState<SplunkEvent> bufferState,
    @StateId(COUNT_STATE_NAME) ValueState<Long> countState,
    @TimerId(TIME_ID_NAME) Timer timer) throws IOException {

  Long count = MoreObjects.<Long>firstNonNull(countState.read(), 0L);
  SplunkEvent event = input.getValue();
  INPUT_COUNTER.inc();
  bufferState.add(event);
  count += 1;
  countState.write(count);
  timer.offset(Duration.standardSeconds(DEFAULT_FLUSH_DELAY)).setRelative();

  if (count >= batchCount) {

    LOG.info("Flushing batch of {} events", count);
    flush(receiver, bufferState, countState);
  }
}
 
Example #6
Source File: WordCount.java    From incubator-nemo with Apache License 2.0 6 votes vote down vote up
/**
 * Static method to generate the word count Beam pipeline.
 * @param options options for the pipeline.
 * @param inputFilePath the input file path.
 * @param outputFilePath the output file path.
 * @return the generated pipeline.
 */
static Pipeline generateWordCountPipeline(final PipelineOptions options,
                                                 final String inputFilePath, final String outputFilePath) {
  final Pipeline p = Pipeline.create(options);
  final PCollection<String> result = GenericSourceSink.read(p, inputFilePath)
    .apply(MapElements.<String, KV<String, Long>>via(new SimpleFunction<String, KV<String, Long>>() {
      @Override
      public KV<String, Long> apply(final String line) {
        final String[] words = line.split(" +");
        final String documentId = words[0] + "#" + words[1];
        final Long count = Long.parseLong(words[2]);
        return KV.of(documentId, count);
      }
    }))
    .apply(Sum.longsPerKey())
    .apply(MapElements.<KV<String, Long>, String>via(new SimpleFunction<KV<String, Long>, String>() {
      @Override
      public String apply(final KV<String, Long> kv) {
        return kv.getKey() + ": " + kv.getValue();
      }
    }));
  GenericSourceSink.write(result, outputFilePath);
  return p;
}
 
Example #7
Source File: ExportTransform.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  if (Objects.equals(c.element().getKey(), EMPTY_EXPORT_FILE)) {
    return;
  }
  Iterable<String> files = c.element().getValue();
  Iterator<String> it = files.iterator();
  boolean gcs = it.hasNext() && GcsPath.GCS_URI.matcher(it.next()).matches();
  TableManifest proto;
  if (gcs) {
    Iterable<GcsPath> gcsPaths = Iterables.transform(files, s -> GcsPath.fromUri(s));
    proto = buildGcsManifest(c, gcsPaths);
  } else {
    Iterable<Path> paths = Iterables.transform(files, s -> Paths.get(s));
    proto = buildLocalManifest(paths);
  }
  try {
    c.output(KV.of(c.element().getKey(), JsonFormat.printer().print(proto)));
  } catch (InvalidProtocolBufferException e) {
    throw new RuntimeException(e);
  }
}
 
Example #8
Source File: BeamModel.java    From streamingbook with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(@Element KV<String, Integer> kv,
                           @Timestamp Instant timestamp,
                           BoundedWindow window,
                           PaneInfo pane,
                           OutputReceiver<String> output) {
    StringBuilder builder = new StringBuilder(String.format(
        "%s: %s:%-2d %s %-7s index=%d",
        Utils.formatWindow(window), kv.getKey(), kv.getValue(),
        Utils.formatTime(timestamp), pane.getTiming(), pane.getIndex()));
    if (pane.getNonSpeculativeIndex() > -1)
        builder.append(" onTimeIndex=" + pane.getNonSpeculativeIndex());
    if (pane.isFirst())
        builder.append(" isFirst");
    if (pane.isLast())
        builder.append(" isLast");
    output.output(builder.toString());
}
 
Example #9
Source File: KafkaIO.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PDone expand(PCollection<KV<K, V>> input) {
  checkArgument(
      getProducerConfig().get(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG) != null,
      "withBootstrapServers() is required");
  checkArgument(getTopic() != null, "withTopic() is required");
  checkArgument(getKeySerializer() != null, "withKeySerializer() is required");
  checkArgument(getValueSerializer() != null, "withValueSerializer() is required");

  if (isEOS()) {
    KafkaExactlyOnceSink.ensureEOSSupport();

    // TODO: Verify that the group_id does not have existing state stored on Kafka unless
    //       this is an upgrade. This avoids issues with simple mistake of reusing group_id
    //       across multiple runs or across multiple jobs. This is checked when the sink
    //       transform initializes while processing the output. It might be better to
    //       check here to catch common mistake.

    input.apply(new KafkaExactlyOnceSink<>(this));
  } else {
    input.apply(ParDo.of(new KafkaWriter<>(this)));
  }
  return PDone.in(input.getPipeline());
}
 
Example #10
Source File: KettleRowToKVStringStringFn.java    From kettle-beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement( ProcessContext processContext ) {
  try {
    KettleRow kettleRow = processContext.element();
    inputCounter.inc();

    String key = rowMeta.getString(kettleRow.getRow(), keyIndex);
    String value = rowMeta.getString(kettleRow.getRow(), valueIndex);

    processContext.output( KV.of( key, value ) );
    writtenCounter.inc();

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in KettleRow to KV<String,String> function", e );
    throw new RuntimeException( "Error in KettleRow to KV<String,String> function", e );
  }
}
 
Example #11
Source File: KafkaToBigQuery.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context) {
  FailsafeElement<KV<String, String>, String> failsafeElement = context.element();
  final KV<String, String> message = failsafeElement.getOriginalPayload();

  // Format the timestamp for insertion
  String timestamp =
      TIMESTAMP_FORMATTER.print(context.timestamp().toDateTime(DateTimeZone.UTC));

  // Build the table row
  final TableRow failedRow =
      new TableRow()
          .set("timestamp", timestamp)
          .set("errorMessage", failsafeElement.getErrorMessage())
          .set("stacktrace", failsafeElement.getStacktrace());

  // Only set the payload if it's populated on the message.
  failedRow.set(
      "payloadString",
      "key: "
          + (message.getKey() == null ? "" : message.getKey())
          + "value: "
          + (message.getValue() == null ? "" : message.getValue()));
  context.output(failedRow);
}
 
Example #12
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example #13
Source File: RedisFeatureSink.java    From feast with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<FeatureSetReference> prepareWrite(
    PCollection<KV<FeatureSetReference, FeatureSetProto.FeatureSetSpec>> featureSetSpecs) {
  if (getRedisConfig() != null) {
    RedisClient redisClient =
        RedisClient.create(
            RedisURI.create(getRedisConfig().getHost(), getRedisConfig().getPort()));
    try {
      redisClient.connect();
    } catch (RedisConnectionException e) {
      throw new RuntimeException(
          String.format(
              "Failed to connect to Redis at host: '%s' port: '%d'. Please check that your Redis is running and accessible from Feast.",
              getRedisConfig().getHost(), getRedisConfig().getPort()));
    }
    redisClient.shutdown();
  } else if (getRedisClusterConfig() == null) {
    throw new RuntimeException(
        "At least one RedisConfig or RedisClusterConfig must be provided to Redis Sink");
  }
  specsView = featureSetSpecs.apply(ParDo.of(new ReferenceToString())).apply(View.asMultimap());
  return featureSetSpecs.apply(Keys.create());
}
 
Example #14
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {
  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example #15
Source File: StatefulTeamScoreTest.java    From deployment-examples with MIT License 6 votes vote down vote up
/**
 * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs
 * correctly for one team.
 */
@Test
public void testScoreUpdatesOneTeam() {

  TestStream<KV<String, GameActionInfo>> createEvents =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class)))
          .advanceWatermarkTo(baseTime)
          .addElements(
              event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)),
              event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)),
              event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)),
              event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)),
              event(TestUser.RED_TWO, 201, Duration.standardSeconds(50)))
          .advanceWatermarkToInfinity();

  PCollection<KV<String, Integer>> teamScores =
      p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100)));

  String redTeam = TestUser.RED_ONE.getTeam();

  PAssert.that(teamScores)
      .inWindow(GlobalWindow.INSTANCE)
      .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401));

  p.run().waitUntilFinish();
}
 
Example #16
Source File: BigQueryMergerTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Test
public void testAutoValueMergeInfoClass() throws Exception {
  MergeInfo mergeInfo =
      MergeInfo.create(
          TIMESTAMP_META_FIELD,
          DELETED_META_FIELD,
          TABLE_1,
          TABLE_2,
          FULL_COLUMN_LIST,
          PRIMARY_KEY_COLUMNS);

  PCollection<KV<String, MergeInfo>> result =
      pipeline
          .apply(Create.of(mergeInfo))
          .apply(
              WithKeys.<String, MergeInfo>of(mi -> mi.getReplicaTable())
                  .withKeyType(TypeDescriptors.strings()))
          .apply(
              new TriggerPerKeyOnFixedIntervals<>(Duration.standardMinutes(WINDOW_SIZE_MINUTES)));

  PAssert.that(result).containsInAnyOrder(KV.of(mergeInfo.getReplicaTable(), mergeInfo));
  pipeline.run().waitUntilFinish();
}
 
Example #17
Source File: BulkDecompressor.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context) {
  ResourceId inputFile = context.element().resourceId();

  // Output a record to the failure file if the file doesn't match a known compression.
  if (!Compression.AUTO.isCompressed(inputFile.toString())) {
    String errorMsg =
        String.format(UNCOMPRESSED_ERROR_MSG, inputFile.toString(), SUPPORTED_COMPRESSIONS);

    context.output(DEADLETTER_TAG, KV.of(inputFile.toString(), errorMsg));
  } else {
    try {
      ResourceId outputFile = decompress(inputFile);
      context.output(outputFile.toString());
    } catch (IOException e) {
      LOG.error(e.getMessage());
      context.output(DEADLETTER_TAG, KV.of(inputFile.toString(), e.getMessage()));
    }
  }
}
 
Example #18
Source File: CSVContentProcessorDoFn.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@GetInitialRestriction
public OffsetRange getInitialRestriction(KV<String, List<String>> contents) {

  this.numberOfRows = contents.getValue().size() - 1;
  int totalSplit = 0;
  totalSplit = this.numberOfRows / this.batchSize.get().intValue();
  int remaining = this.numberOfRows % this.batchSize.get().intValue();
  if (remaining > 0) {
    totalSplit = totalSplit + 2;

  } else {
    totalSplit = totalSplit + 1;
  }
  LOG.info("Initial Restriction range from 1 to: {}", totalSplit);
  return new OffsetRange(1, totalSplit);
}
 
Example #19
Source File: ExampleEchoPipelineTest.java    From deployment-examples with MIT License 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
  try {
    // Our Library takes a single command in position 0 which it will echo back in the result
    SubProcessCommandLineArgs commands = new SubProcessCommandLineArgs();
    Command command = new Command(0, String.valueOf(c.element().getValue()));
    commands.putCommand(command);

    // The ProcessingKernel deals with the execution of the process
    SubProcessKernel kernel = new SubProcessKernel(configuration, binaryName);

    // Run the command and work through the results
    List<String> results = kernel.exec(commands);
    for (String s : results) {
      c.output(KV.of(c.element().getKey(), s));
    }
  } catch (Exception ex) {
    LOG.error("Error processing element ", ex);
    throw ex;
  }
}
 
Example #20
Source File: KeyValueToGenericRecordFnTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Test whether {@link KeyValueToGenericRecordFn} correctly maps the message. */
@Test
@Category(NeedsRunner.class)
public void testKeyValueToGenericRecordFn() throws Exception {

  // Create the test input.
  final String key = "Name";
  final String value = "Generic";
  final KV<String, String> message = KV.of(key, value);

  // Apply the ParDo.
  PCollection<GenericRecord> results =
      pipeline
          .apply(
              "CreateInput",
              Create.of(message)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
          .apply("GenericRecordCreation", ParDo.of(new KeyValueToGenericRecordFn()))
          .setCoder(AvroCoder.of(GenericRecord.class, KeyValueToGenericRecordFn.SCHEMA));

  // Assert on the results.
  PAssert.that(results)
      .satisfies(
          collection -> {
            GenericRecord result = collection.iterator().next();
            assertThat(result.get("message").toString(), is(equalTo(value)));
            assertThat(result.get("attributes").toString(), is(equalTo("{Name=Generic}")));
            return null;
          });
  // Run the pipeline.
  pipeline.run();
}
 
Example #21
Source File: TextImportTransformTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test(expected = PipelineExecutionException.class)
public void readImportManifestInvalidTable() throws Exception {
  Path f11 = Files.createTempFile("table1-file", "1");

  Path manifestFile = Files.createTempFile("import-manifest", ".json");
  Charset charset = Charset.forName("UTF-8");
  try (BufferedWriter writer = Files.newBufferedWriter(manifestFile, charset)) {
    // An invalid json string (missing the ending close "}").
    String jsonString =
        String.format(
            "{\"tables\": ["
                + "{\"table_name\": \"NON_EXIST_TABLE\","
                + "\"file_patterns\":[\"%s\"]}"
                + "]",
            f11.toString());
    writer.write(jsonString, 0, jsonString.length());
  } catch (IOException e) {
    e.printStackTrace();
  }

  ValueProvider<String> importManifest =
      ValueProvider.StaticValueProvider.of(manifestFile.toString());
  PCollectionView<Ddl> ddlView =
      pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton());

  PCollection<KV<String, String>> tableAndFiles =
      pipeline
          .apply("Read manifest file", new ReadImportManifest(importManifest))
          .apply("Resolve data files", new ResolveDataFiles(importManifest, ddlView));

  pipeline.run();
}
 
Example #22
Source File: KafkaIO.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<V> input) {
  return input
      .apply(
          "Kafka values with default key",
          MapElements.via(
              new SimpleFunction<V, KV<K, V>>() {
                @Override
                public KV<K, V> apply(V element) {
                  return KV.of(null, element);
                }
              }))
      .setCoder(KvCoder.of(new NullOnlyCoder<>(), input.getCoder()))
      .apply(kvWriteTransform);
}
 
Example #23
Source File: BQDestination.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, List<String>> destination) {

  TableSchema schema = Util.getSchema(destination.getValue());
  LOG.debug("***Schema {}", schema.toString());
  return schema;
}
 
Example #24
Source File: ImportTransform.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
public CreateTables(
    SpannerConfig spannerConfig,
    PCollectionView<List<KV<String, String>>> avroSchemasView,
    PCollectionView<Ddl> informationSchemaView,
    ValueProvider<Boolean> earlyIndexCreateFlag) {
  this.spannerConfig = spannerConfig;
  this.avroSchemasView = avroSchemasView;
  this.informationSchemaView = informationSchemaView;
  this.earlyIndexCreateFlag = earlyIndexCreateFlag;
}
 
Example #25
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public TableDestination getTable(KV<String, TableRow> destination) {
  TableDestination dest =
      new TableDestination(destination.getKey(), "pii-tokenized output data from dataflow");
  LOG.debug("Table Destination {}", dest.getTableSpec());
  return dest;
}
 
Example #26
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES))
               .triggering(AfterWatermark.pastEndOfWindow()
                           .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE))
                           .withLateFirings(AfterPane.elementCountAtLeast(1)))
               .withAllowedLateness(TWO_MINUTES)
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #27
Source File: DynamicJdbcIOTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testRead() throws Exception {

  PCollection<KV<Integer, String>> rows =
      pipeline.apply(
          DynamicJdbcIO.<KV<Integer, String>>read()
              .withDataSourceConfiguration(
                  DynamicJdbcIO.DynamicDataSourceConfiguration.create(
                      ValueProvider.StaticValueProvider.of("org.apache.derby.jdbc.ClientDriver"),
                      ValueProvider.StaticValueProvider.of(
                          "jdbc:derby://localhost:" + port + "/target/beam")))
              .withQuery(
                  ValueProvider.StaticValueProvider.of("select name, id from " + readTableName))
              .withRowMapper(new TestRowMapper())
              .withCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of())));

  PAssert.thatSingleton(rows.apply("Count All", Count.globally()))
      .isEqualTo((long) EXPECTED_ROW_COUNT);

  List<KV<Integer, String>> expectedList =
      IntStream.range(0, EXPECTED_ROW_COUNT)
          .mapToObj(i -> KV.of(i, TEST_ROW_SUFFIX + "-" + i))
          .collect(Collectors.toList());

  PAssert.that(rows).containsInAnyOrder(expectedList);
  pipeline.run();
}
 
Example #28
Source File: BigStreamMapper.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public KV<TableId, TableRow> getOutputObject(TableRow input) {
  TableId tableId = getTableId(input);
  TableRow tableRow = getTableRow(input);

  return KV.of(tableId, tableRow);
}
 
Example #29
Source File: BeamModelTest.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
Create.TimestampedValues<KV<String, Integer>> createBatch() {
    return Create.timestamped(
        score("TeamX", 5, "12:00:26"),
        score("TeamX", 7, "12:02:26"),
        score("TeamX", 3, "12:03:39"),
        score("TeamX", 4, "12:04:19"),
        score("TeamX", 8, "12:03:06"),
        score("TeamX", 3, "12:06:39"),
        score("TeamX", 9, "12:01:26"),
        score("TeamX", 8, "12:07:26"),
        score("TeamX", 1, "12:07:46"));
}
 
Example #30
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
private List<String> getHeaders(List<KV<String, List<String>>> headerMap, String fileKey) {
  return headerMap.stream()
      .filter(map -> map.getKey().equalsIgnoreCase(fileKey))
      .findFirst()
      .map(e -> e.getValue())
      .orElse(null);
}