org.apache.beam.sdk.values.TypeDescriptors Java Examples

The following examples show how to use org.apache.beam.sdk.values.TypeDescriptors. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WordCount.java    From java-docs-samples with Apache License 2.0 7 votes vote down vote up
public static void main(String[] args) {
  WordCountOptions options = PipelineOptionsFactory.fromArgs(args)
      .withValidation().as(WordCountOptions.class);

  Pipeline pipeline = Pipeline.create(options);
  pipeline
      .apply("Read lines", TextIO.read().from(options.getInputFile()))
      // [END value_provider]
      .apply("Find words", FlatMapElements.into(TypeDescriptors.strings())
          .via((String line) -> Arrays.asList(line.split("[^\\p{L}]+"))))
      .apply("Filter empty words", Filter.by((String word) -> !word.isEmpty()))
      .apply("Filter with substring", ParDo.of(new FilterWithSubstring(
          options.getWithSubstring(), options.getIsCaseSensitive())))
      .apply("Count words", Count.perElement())
      .apply("Format results", MapElements.into(TypeDescriptors.strings())
          .via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue()))
      // [START nested_value_provider]
      .apply("Write results", TextIO.write().to(NestedValueProvider.of(
          options.getOutputBucket(),
          (String bucket) -> String.format("gs://%s/samples/dataflow/wordcount/outputs", bucket)
      )));
      // [END nested_value_provider]
  pipeline.run();
}
 
Example #2
Source File: DistinctTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWindow_applyIf() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<String> uniq =
      Distinct.of(dataset)
          .applyIf(
              true,
              b ->
                  b.windowBy(FixedWindows.of(Duration.standardHours(1)))
                      .triggeredBy(DefaultTrigger.of())
                      .discardingFiredPanes())
          .output();
  final Distinct distinct = (Distinct) TestUtils.getProducer(uniq);
  assertTrue(distinct.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get());
  assertEquals(
      FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn());
  assertEquals(DefaultTrigger.of(), windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
}
 
Example #3
Source File: TopPerKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWindow_applyIf() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<Triple<String, Long, Long>> result =
      TopPerKey.of(dataset)
          .keyBy(s -> s)
          .valueBy(s -> 1L)
          .scoreBy(s -> 1L)
          .applyIf(
              true,
              b ->
                  b.windowBy(FixedWindows.of(org.joda.time.Duration.standardHours(1)))
                      .triggeredBy(DefaultTrigger.of())
                      .accumulatingFiredPanes())
          .output();
  final TopPerKey tpk = (TopPerKey) TestUtils.getProducer(result);
  assertTrue(tpk.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) tpk.getWindow().get());
  assertEquals(
      FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn());
  assertEquals(DefaultTrigger.of(), windowDesc.getTrigger());
  assertEquals(AccumulationMode.ACCUMULATING_FIRED_PANES, windowDesc.getAccumulationMode());
}
 
Example #4
Source File: FhirIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void test_FhirIO_failedReads() {
  List<String> badMessageIDs = Arrays.asList("foo", "bar");
  FhirIO.Read.Result readResult =
      pipeline.apply(Create.of(badMessageIDs)).apply(FhirIO.readResources());

  PCollection<HealthcareIOError<String>> failed = readResult.getFailedReads();

  PCollection<String> resources = readResult.getResources();

  PCollection<String> failedMsgIds =
      failed.apply(
          MapElements.into(TypeDescriptors.strings()).via(HealthcareIOError::getDataResource));

  PAssert.that(failedMsgIds).containsInAnyOrder(badMessageIDs);
  PAssert.that(resources).empty();
  pipeline.run();
}
 
Example #5
Source File: HadoopFormatIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWritingDataFailInvalidValueType() {

  conf.set(HadoopFormatIO.OUTPUT_DIR, tmpFolder.getRoot().getAbsolutePath());
  List<KV<Text, Text>> data = new ArrayList<>();
  data.add(KV.of(new Text("key"), new Text("value")));
  TypeDescriptor<Text> textTypeDescriptor = new TypeDescriptor<Text>() {};
  PCollection<KV<Text, Text>> input =
      p.apply(Create.of(data))
          .setTypeDescriptor(TypeDescriptors.kvs(textTypeDescriptor, textTypeDescriptor));

  thrown.expect(Pipeline.PipelineExecutionException.class);
  thrown.expectMessage(Text.class.getName());

  input.apply(
      "Write",
      HadoopFormatIO.<Text, Text>write()
          .withConfiguration(conf)
          .withPartitioning()
          .withExternalSynchronization(new HDFSSynchronization(getLocksDirPath())));

  p.run().waitUntilFinish();
}
 
Example #6
Source File: DistinctTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild_Windowing() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<String> uniq =
      Distinct.of(dataset)
          .windowBy(FixedWindows.of(org.joda.time.Duration.standardHours(1)))
          .triggeredBy(DefaultTrigger.of())
          .accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES)
          .output();
  final Distinct distinct = (Distinct) TestUtils.getProducer(uniq);
  assertTrue(distinct.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get());
  assertEquals(
      FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn());
  assertEquals(DefaultTrigger.of(), windowDesc.getTrigger());
}
 
Example #7
Source File: HL7v2IOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void test_HL7v2IO_failedReads() {
  List<String> badMessageIDs =
      Arrays.asList(
          "projects/a/locations/b/datasets/c/hl7V2Stores/d/messages/foo",
          "projects/a/locations/b/datasets/c/hl7V2Stores/d/messages/bar");
  HL7v2IO.Read.Result readResult =
      pipeline.apply(Create.of(badMessageIDs)).apply(HL7v2IO.getAll());

  PCollection<HealthcareIOError<String>> failed = readResult.getFailedReads();

  PCollection<HL7v2Message> messages = readResult.getMessages();

  PCollection<String> failedMsgIds =
      failed.apply(
          MapElements.into(TypeDescriptors.strings()).via(HealthcareIOError::getDataResource));

  PAssert.that(failedMsgIds).containsInAnyOrder(badMessageIDs);
  PAssert.that(messages).empty();
  pipeline.run();
}
 
Example #8
Source File: ParsePayloadTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test
public void testVersionInPayload() {
  ValueProvider<String> schemasLocation = pipeline.newProvider("schemas.tar.gz");

  // printf '{"version":4}' | base64 -> eyJ2ZXJzaW9uIjo0fQ==
  String input = "{\"attributeMap\":" //
      + "{\"document_namespace\":\"telemetry\"" //
      + ",\"app_name\":\"Firefox\"" //
      + ",\"document_id\":\"2c3a0767-d84a-4d02-8a92-fa54a3376049\"" //
      + ",\"document_type\":\"main\"" //
      + "},\"payload\":\"eyJ2ZXJzaW9uIjo0fQ==\"}";

  Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline.apply(Create.of(input))
      .apply(InputFileFormat.json.decode()).apply(ParsePayload.of(schemasLocation));

  PCollection<String> exceptions = result.failures().apply(MapElements
      .into(TypeDescriptors.strings()).via(message -> message.getAttribute("exception_class")));

  PAssert.that(result.output()).empty();

  // If we get a ValidationException here, it means we successfully extracted version from
  // the payload and found a valid schema; we expect the payload to not validate.
  PAssert.that(exceptions).containsInAnyOrder("org.everit.json.schema.ValidationException");

  pipeline.run();
}
 
Example #9
Source File: ParsePayloadTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test
public void testMetadataInPayload() {
  ValueProvider<String> schemasLocation = pipeline.newProvider("schemas.tar.gz");

  String input = "{\"id\":null,\"document_id\":\"2c3a0767-d84a-4d02-8a92-fa54a3376049\""
      + ",\"metadata\":{\"document_namespace\":\"test\",\"document_type\":\"test\""
      + ",\"document_version\":\"1\",\"geo\":{\"country\":\"FI\"}}}";

  Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline //
      .apply(Create.of(input)) //
      .apply(InputFileFormat.text.decode()) //
      .apply(ParsePayload.of(schemasLocation));

  PAssert.that(result.failures()).empty();

  final PCollection<Integer> attributeCounts = result.output().apply(MapElements
      .into(TypeDescriptors.integers()).via(message -> message.getAttributeMap().size()));
  PAssert.thatSingleton(attributeCounts).isEqualTo(5);

  final String expectedMain = "{\"id\":null}";
  final PCollection<String> main = result.output() //
      .apply("encodeTextMain", OutputFileFormat.text.encode());
  PAssert.thatSingleton(main).isEqualTo(expectedMain);

  pipeline.run();
}
 
Example #10
Source File: LimitPayloadSizeTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test
public void testLimit() {
  List<String> passingPayloads = ImmutableList.of("", "abcdefg",
      StringUtils.repeat("abcdefg", 50));
  List<String> failingPayloads = ImmutableList.of(StringUtils.repeat("abcdefghij", 51));

  WithFailures.Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline //
      .apply(Create.of(Iterables.concat(passingPayloads, failingPayloads))) //
      .apply(InputFileFormat.text.decode()) //
      .apply("LimitPayloadSize", LimitPayloadSize.toBytes(500));

  PAssert
      .that(result.output().apply("get success payload",
          MapElements.into(TypeDescriptors.strings()).via(m -> new String(m.getPayload())))) //
      .containsInAnyOrder(passingPayloads);
  PAssert
      .that(result.failures().apply("get failure payload",
          MapElements.into(TypeDescriptors.strings()).via(m -> new String(m.getPayload())))) //
      .containsInAnyOrder(failingPayloads);

  pipeline.run();
}
 
Example #11
Source File: HadoopFormatIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWritingDataFailInvalidKeyType() {

  conf.set(HadoopFormatIO.OUTPUT_DIR, tmpFolder.getRoot().getAbsolutePath());
  List<KV<String, Employee>> data = new ArrayList<>();
  data.add(KV.of("key", new Employee("name", "address")));
  PCollection<KV<String, Employee>> input =
      p.apply("CreateData", Create.of(data))
          .setTypeDescriptor(
              TypeDescriptors.kvs(
                  new TypeDescriptor<String>() {}, new TypeDescriptor<Employee>() {}));

  thrown.expect(Pipeline.PipelineExecutionException.class);
  thrown.expectMessage(String.class.getName());

  input.apply(
      "Write",
      HadoopFormatIO.<String, Employee>write()
          .withConfiguration(conf)
          .withPartitioning()
          .withExternalSynchronization(new HDFSSynchronization(getLocksDirPath())));
  p.run().waitUntilFinish();
}
 
Example #12
Source File: SnsIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCustomCoder() throws Exception {
  final PublishRequest request1 = createSampleMessage("my_first_message");

  final TupleTag<PublishResult> results = new TupleTag<>();
  final AmazonSNS amazonSnsSuccess = getAmazonSnsMockSuccess();
  final MockCoder mockCoder = new MockCoder();

  final PCollectionTuple snsWrites =
      p.apply(Create.of(request1))
          .apply(
              SnsIO.write()
                  .withTopicName(topicName)
                  .withAWSClientsProvider(new Provider(amazonSnsSuccess))
                  .withResultOutputTag(results)
                  .withCoder(mockCoder));

  final PCollection<Long> publishedResultsSize =
      snsWrites
          .get(results)
          .apply(MapElements.into(TypeDescriptors.strings()).via(result -> result.getMessageId()))
          .apply(Count.globally());
  PAssert.that(publishedResultsSize).containsInAnyOrder(ImmutableList.of(1L));
  p.run().waitUntilFinish();
  assertThat(mockCoder.captured).isNotNull();
}
 
Example #13
Source File: BigQueryMergerTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Test
public void testAutoValueMergeInfoClass() throws Exception {
  MergeInfo mergeInfo =
      MergeInfo.create(
          TIMESTAMP_META_FIELD,
          DELETED_META_FIELD,
          TABLE_1,
          TABLE_2,
          FULL_COLUMN_LIST,
          PRIMARY_KEY_COLUMNS);

  PCollection<KV<String, MergeInfo>> result =
      pipeline
          .apply(Create.of(mergeInfo))
          .apply(
              WithKeys.<String, MergeInfo>of(mi -> mi.getReplicaTable())
                  .withKeyType(TypeDescriptors.strings()))
          .apply(
              new TriggerPerKeyOnFixedIntervals<>(Duration.standardMinutes(WINDOW_SIZE_MINUTES)));

  PAssert.that(result).containsInAnyOrder(KV.of(mergeInfo.getReplicaTable(), mergeInfo));
  pipeline.run().waitUntilFinish();
}
 
Example #14
Source File: BeamJdbcAvroSchema.java    From dbeam with Apache License 2.0 6 votes vote down vote up
/** Generate Avro schema by reading one row. Expose Beam metrics via a Beam PTransform. */
public static Schema createSchema(
    final Pipeline pipeline, final JdbcExportArgs args, final Connection connection)
    throws Exception {
  final long startTime = System.nanoTime();
  final Schema generatedSchema = generateAvroSchema(args, connection);
  final long elapsedTimeSchema = (System.nanoTime() - startTime) / 1000000;
  LOGGER.info("Elapsed time to schema {} seconds", elapsedTimeSchema / 1000.0);

  final Counter cnt =
      Metrics.counter(BeamJdbcAvroSchema.class.getCanonicalName(), "schemaElapsedTimeMs");
  pipeline
      .apply(
          "ExposeSchemaCountersSeed",
          Create.of(Collections.singletonList(0)).withType(TypeDescriptors.integers()))
      .apply(
          "ExposeSchemaCounters",
          MapElements.into(TypeDescriptors.integers())
              .via(
                  v -> {
                    cnt.inc(elapsedTimeSchema);
                    return v;
                  }));
  return generatedSchema;
}
 
Example #15
Source File: UserScoreTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Test that bad input data is dropped appropriately. */
@Test
@Category(ValidatesRunner.class)
public void testUserScoresBadInput() throws Exception {

  PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of()));

  PCollection<KV<String, Integer>> extract =
      input
          .apply(ParDo.of(new ParseEventFn()))
          .apply(
              MapElements.into(
                      TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers()))
                  .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));

  PAssert.that(extract).empty();

  p.run().waitUntilFinish();
}
 
Example #16
Source File: ExternalTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public Map<String, ExpansionService.TransformProvider> knownTransforms() {
  return ImmutableMap.of(
      TEST_URN_SIMPLE,
          spec -> MapElements.into(TypeDescriptors.strings()).via((String x) -> x + x),
      TEST_URN_LE,
          spec -> Filter.lessThanEq(Integer.parseInt(spec.getPayload().toStringUtf8())),
      TEST_URN_MULTI,
          spec ->
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          if (c.element() % 2 == 0) {
                            c.output(c.element());
                          } else {
                            c.output(odd, c.element());
                          }
                        }
                      })
                  .withOutputTags(even, TupleTagList.of(odd)));
}
 
Example #17
Source File: CountByKey.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<KeyT, Long>> expand(PCollectionList<InputT> inputs) {
  return ReduceByKey.named(getName().orElse(null))
      .of(PCollectionLists.getOnlyElement(inputs))
      .keyBy(getKeyExtractor(), getKeyType().orElse(null))
      .valueBy(v -> 1L, TypeDescriptors.longs())
      .combineBy(Sums.ofLongs())
      .applyIf(
          getWindow().isPresent(),
          builder -> {
            @SuppressWarnings("unchecked")
            final ReduceByKey.WindowByInternalBuilder<InputT, KeyT, Long> cast =
                (ReduceByKey.WindowByInternalBuilder) builder;
            return cast.windowBy(
                getWindow()
                    .orElseThrow(
                        () ->
                            new IllegalStateException(
                                "Unable to resolve windowing for CountByKey expansion.")));
          })
      .output();
}
 
Example #18
Source File: ReduceWindow.java    From beam with Apache License 2.0 6 votes vote down vote up
private ReduceWindow(
    @Nullable String name,
    UnaryFunction<InputT, ValueT> valueExtractor,
    @Nullable TypeDescriptor<ValueT> valueType,
    VoidFunction<AccT> accumulatorFactory,
    BinaryFunction<AccT, ValueT, AccT> accumulate,
    CombinableBinaryFunction<AccT> mergeAccumulators,
    UnaryFunction<AccT, OutputT> outputFn,
    @Nullable TypeDescriptor<AccT> accumulatorType,
    @Nullable BinaryFunction<ValueT, ValueT, Integer> valueComparator,
    @Nullable Window<InputT> window,
    TypeDescriptor<OutputT> outputType) {
  super(name, outputType, e -> B_ZERO, TypeDescriptors.bytes(), window);
  this.accumulatorFactory = requireNonNull(accumulatorFactory);
  this.accumulate = requireNonNull(accumulate);
  this.mergeAccumulators = requireNonNull(mergeAccumulators);
  this.outputFn = requireNonNull(outputFn);
  this.accumulatorType = accumulatorType;
  this.valueExtractor = requireNonNull(valueExtractor);
  this.valueType = valueType;
  this.valueComparator = valueComparator;
  this.reducer = null;
}
 
Example #19
Source File: ReduceByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild_Windowing() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<KV<String, Long>> reduced =
      ReduceByKey.of(dataset)
          .keyBy(s -> s)
          .valueBy(s -> 1L)
          .combineBy(Sums.ofLongs())
          .windowBy(FixedWindows.of(Duration.standardHours(1)))
          .triggeredBy(DefaultTrigger.of())
          .accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES)
          .output();

  final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced);

  assertTrue(reduce.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final Window<? extends BoundedWindow> window = (Window) reduce.getWindow().get();
  assertEquals(FixedWindows.of(org.joda.time.Duration.standardHours(1)), window.getWindowFn());
  assertEquals(DefaultTrigger.of(), WindowDesc.of(window).getTrigger());
  assertSame(
      AccumulationMode.DISCARDING_FIRED_PANES, WindowDesc.of(window).getAccumulationMode());
  assertFalse(reduce.getValueComparator().isPresent());
}
 
Example #20
Source File: JacksonTransformsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWritingInvalidJsonsWithFailuresLambda() {
  WithFailures.Result<PCollection<String>, KV<MyPojo, String>> result =
      pipeline
          .apply(
              Create.of(Iterables.concat(POJOS, INVALID_POJOS))
                  .withCoder(SerializableCoder.of(MyPojo.class)))
          .apply(
              AsJsons.of(MyPojo.class)
                  .exceptionsInto(
                      TypeDescriptors.kvs(
                          TypeDescriptor.of(MyPojo.class), TypeDescriptors.strings()))
                  .exceptionsVia(
                      f -> KV.of(f.element(), f.exception().getClass().getCanonicalName())));
  result.output().setCoder(StringUtf8Coder.of());

  PAssert.that(result.output()).containsInAnyOrder(VALID_JSONS);
  assertWritingWithErrorFunctionHandler(result);

  pipeline.run();
}
 
Example #21
Source File: DynamicDestinations.java    From beam with Apache License 2.0 6 votes vote down vote up
Coder<DestinationT> getDestinationCoderWithDefault(CoderRegistry registry)
    throws CannotProvideCoderException {
  Coder<DestinationT> destinationCoder = getDestinationCoder();
  if (destinationCoder != null) {
    return destinationCoder;
  }
  // If dynamicDestinations doesn't provide a coder, try to find it in the coder registry.
  TypeDescriptor<DestinationT> descriptor =
      extractFromTypeParameters(
          this,
          DynamicDestinations.class,
          new TypeDescriptors.TypeVariableExtractor<
              DynamicDestinations<T, DestinationT>, DestinationT>() {});
  try {
    return registry.getCoder(descriptor);
  } catch (CannotProvideCoderException e) {
    throw new CannotProvideCoderException(
        "Failed to infer coder for DestinationT from type "
            + descriptor
            + ", please provide it explicitly by overriding getDestinationCoder()",
        e);
  }
}
 
Example #22
Source File: DataflowRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<T> expand(PCollection<ValueWithRecordId<T>> input) {
  return input
      .apply(
          WithKeys.of(
                  (ValueWithRecordId<T> value) ->
                      Arrays.hashCode(value.getId()) % NUM_RESHARD_KEYS)
              .withKeyType(TypeDescriptors.integers()))
      // Reshuffle will dedup based on ids in ValueWithRecordId by passing the data through
      // WindmillSink.
      .apply(Reshuffle.of())
      .apply(
          "StripIds",
          ParDo.of(
              new DoFn<KV<Integer, ValueWithRecordId<T>>, T>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  c.output(c.element().getValue().getValue());
                }
              }));
}
 
Example #23
Source File: BeamSqlBuiltinFunctionsIntegrationTestBase.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PDone expand(PBegin begin) {
  PCollection<Boolean> result =
      begin
          .apply(Create.of(DUMMY_ROW).withRowSchema(DUMMY_SCHEMA))
          .apply(SqlTransform.query("SELECT " + expr))
          .apply(MapElements.into(TypeDescriptors.booleans()).via(row -> row.getBoolean(0)));

  PAssert.that(result)
      .satisfies(
          input -> {
            assertTrue("Test expression is false: " + expr, Iterables.getOnlyElement(input));
            return null;
          });
  return PDone.in(begin.getPipeline());
}
 
Example #24
Source File: ReduceByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild_sortedValues() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<KV<String, List<Long>>> reduced =
      ReduceByKey.of(dataset)
          .keyBy(s -> s)
          .valueBy(s -> 1L)
          .reduceBy(s -> s.collect(Collectors.toList()))
          .withSortedValues(Long::compare)
          .windowBy(FixedWindows.of(Duration.standardHours(1)))
          .triggeredBy(DefaultTrigger.of())
          .accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES)
          .output();
  final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced);
  assertTrue(reduce.getValueComparator().isPresent());
}
 
Example #25
Source File: JacksonTransformsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testParsingInvalidJsonsWithFailuresLambda() {
  WithFailures.Result<PCollection<MyPojo>, KV<String, String>> result =
      pipeline
          .apply(Create.of(Iterables.concat(VALID_JSONS, INVALID_JSONS)))
          .apply(
              ParseJsons.of(MyPojo.class)
                  .exceptionsInto(
                      TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.strings()))
                  .exceptionsVia(
                      f -> KV.of(f.element(), f.exception().getClass().getCanonicalName())));
  result.output().setCoder(SerializableCoder.of(MyPojo.class));

  PAssert.that(result.output()).containsInAnyOrder(POJOS);
  assertParsingWithErrorFunctionHandler(result);

  pipeline.run();
}
 
Example #26
Source File: SumByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild_Windowing() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<KV<String, Long>> counted =
      SumByKey.of(dataset)
          .keyBy(s -> s)
          .valueBy(s -> 1L)
          .windowBy(FixedWindows.of(org.joda.time.Duration.standardHours(1)))
          .triggeredBy(DefaultTrigger.of())
          .discardingFiredPanes()
          .withAllowedLateness(Duration.millis(1000))
          .output();
  final SumByKey sum = (SumByKey) TestUtils.getProducer(counted);
  assertTrue(sum.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) sum.getWindow().get());
  assertEquals(
      FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn());
  assertEquals(DefaultTrigger.of(), windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
  assertEquals(Duration.millis(1000), windowDesc.getAllowedLateness());
}
 
Example #27
Source File: KeyByBigQueryTableDestination.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Override
public Result<PCollection<KV<TableDestination, PubsubMessage>>, PubsubMessage> expand(
    PCollection<PubsubMessage> messages) {
  return messages
      .apply(MapElements.into(TypeDescriptors.kvs(TypeDescriptor.of(TableDestination.class),
          TypeDescriptor.of(PubsubMessage.class))).via((PubsubMessage msg) -> {
            msg = PubsubConstraints.ensureNonNull(msg);
            return KV.of(getTableDestination(msg.getAttributeMap()), msg);
          }).exceptionsInto(TypeDescriptor.of(PubsubMessage.class))
          .exceptionsVia((WithFailures.ExceptionElement<PubsubMessage> ee) -> {
            try {
              throw ee.exception();
            } catch (IllegalArgumentException e) {
              return FailureMessage.of(KeyByBigQueryTableDestination.class.getSimpleName(), //
                  ee.element(), //
                  ee.exception());
            }
          }));
}
 
Example #28
Source File: ReduceByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuild_CombineByStream() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<KV<String, Long>> reduced =
      ReduceByKey.of(dataset)
          .keyBy(s -> s)
          .valueBy(s -> 1L)
          .combineBy(s -> s.mapToLong(e -> e).sum())
          .output();
  final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced);
  assertNotNull(reduce.getReducer());
  assertFalse(reduce.isCombineFnStyle());
}
 
Example #29
Source File: HadoopFormatIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<Integer, KV<KeyT, ValueT>>> expand(PCollection<KV<KeyT, ValueT>> input) {
  return input
      .apply(
          "AssignTask",
          ParDo.of(new AssignTaskFn<KeyT, ValueT>(configView)).withSideInputs(configView))
      .setTypeDescriptor(
          TypeDescriptors.kvs(TypeDescriptors.integers(), input.getTypeDescriptor()))
      .apply("GroupByTaskId", GroupByKey.create())
      .apply("FlattenGroupedTasks", ParDo.of(new FlattenGroupedTasks<>()));
}
 
Example #30
Source File: ReduceByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Validates the output type upon a `.reduceBy` operation on global window. */
@Test
public void testReductionType0_outputValues() {
  execute(
      new AbstractTestCase<Integer, Set<Integer>>() {

        @Override
        protected List<Integer> getInput() {
          return Arrays.asList(1, 2, 3, 4, 5, 6, 7, 9);
        }

        @Override
        protected TypeDescriptor<Integer> getInputType() {
          return TypeDescriptors.integers();
        }

        @Override
        protected PCollection<Set<Integer>> getOutput(PCollection<Integer> input) {
          return ReduceByKey.of(input)
              .keyBy(e -> e % 2)
              .valueBy(e -> e)
              .reduceBy(s -> s.collect(Collectors.toSet()))
              .outputValues();
        }

        @Override
        public List<Set<Integer>> getUnorderedOutput() {
          return Arrays.asList(Sets.newHashSet(2, 4, 6), Sets.newHashSet(1, 3, 5, 7, 9));
        }
      });
}