org.apache.beam.sdk.coders.KvCoder Java Examples

The following examples show how to use org.apache.beam.sdk.coders.KvCoder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CombineValuesFnFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo(
    AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) {
  GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner =
      GlobalCombineFnRunners.create(combineFn.getFn());
  DoFn<KV<K, AccumT>, KV<K, OutputT>> doFn =
      new ExtractOutputDoFn<>(combineFnRunner, sideInputReader);

  KvCoder<K, AccumT> inputCoder = null;
  if (combineFn.getKvCoder() != null) {
    inputCoder =
        KvCoder.of(combineFn.getKvCoder().getKeyCoder(), combineFn.getAccumulatorCoder());
  }
  return DoFnInfo.forFn(
      doFn,
      combineFn.getWindowingStrategy(),
      combineFn.getSideInputViews(),
      inputCoder,
      Collections.emptyMap(), // Not needed here.
      new TupleTag<>(PropertyNames.OUTPUT),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example #2
Source File: JacksonTransformsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWritingInvalidJsonsWithFailuresDefaultHandler() {
  WithFailures.Result<PCollection<String>, KV<MyPojo, Map<String, String>>> result =
      pipeline
          .apply(
              Create.of(Iterables.concat(POJOS, INVALID_POJOS))
                  .withCoder(SerializableCoder.of(MyPojo.class)))
          .apply(AsJsons.of(MyPojo.class).exceptionsVia());

  result.output().setCoder(StringUtf8Coder.of());

  result
      .failures()
      .setCoder(
          KvCoder.of(
              SerializableCoder.of(MyPojo.class),
              MapCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())));

  PAssert.that(result.output()).containsInAnyOrder(VALID_JSONS);
  assertWritingWithErrorMapHandler(result);

  pipeline.run();
}
 
Example #3
Source File: GroupNonMergingWindowsFunctionsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private <W extends BoundedWindow> GroupByKeyIterator<String, Integer, W> createGbkIterator(
    W window, Coder<W> winCoder, WindowingStrategy<Object, W> winStrategy)
    throws Coder.NonDeterministicException {

  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  final WindowedValue.FullWindowedValueCoder<KV<String, Integer>> winValCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
          winStrategy.getWindowFn().windowCoder());

  ItemFactory<String, Integer, W> factory =
      ItemFactory.forWindow(keyCoder, winValCoder, winCoder, window);
  List<Tuple2<ByteArray, byte[]>> items =
      Arrays.asList(
          factory.create("k1", 1),
          factory.create("k1", 2),
          factory.create("k2", 3),
          factory.create("k2", 4),
          factory.create("k2", 5));
  return new GroupByKeyIterator<>(items.iterator(), keyCoder, winStrategy, winValCoder);
}
 
Example #4
Source File: SdkComponentsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void registerCoder() throws IOException {
  Coder<?> coder =
      KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(SetCoder.of(ByteArrayCoder.of())));
  String id = components.registerCoder(coder);
  assertThat(components.registerCoder(coder), equalTo(id));
  assertThat(id, not(isEmptyOrNullString()));
  Coder<?> equalCoder =
      KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(SetCoder.of(ByteArrayCoder.of())));
  assertThat(components.registerCoder(equalCoder), equalTo(id));
  Coder<?> otherCoder = VarLongCoder.of();
  assertThat(components.registerCoder(otherCoder), not(equalTo(id)));

  components.toComponents().getCodersOrThrow(id);
  components.toComponents().getCodersOrThrow(components.registerCoder(otherCoder));
}
 
Example #5
Source File: CloningBundleFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void keyedBundleWorkingCoderSucceedsClonesOutput() {
  PCollection<Integer> created = p.apply(Create.of(1, 3).withCoder(VarIntCoder.of()));

  PCollection<KV<String, Iterable<Integer>>> keyed =
      created
          .apply(WithKeys.of("foo"))
          .setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))
          .apply(GroupByKey.create());
  WindowedValue<KV<String, Iterable<Integer>>> foos =
      WindowedValue.valueInGlobalWindow(
          KV.<String, Iterable<Integer>>of("foo", ImmutableList.of(1, 3)));
  CommittedBundle<KV<String, Iterable<Integer>>> keyedBundle =
      factory
          .createKeyedBundle(StructuralKey.of("foo", StringUtf8Coder.of()), keyed)
          .add(foos)
          .commit(Instant.now());

  assertThat(keyedBundle.getElements(), containsInAnyOrder(foos));
  assertThat(
      Iterables.getOnlyElement(keyedBundle.getElements()).getValue(),
      not(theInstance(foos.getValue())));
  assertThat(keyedBundle.getPCollection(), equalTo(keyed));
  assertThat(keyedBundle.getKey(), equalTo(StructuralKey.of("foo", StringUtf8Coder.of())));
}
 
Example #6
Source File: Reify.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<K, V>> expand(PCollection<KV<K, TimestampedValue<V>>> input) {
  KvCoder<K, TimestampedValue<V>> kvCoder = (KvCoder<K, TimestampedValue<V>>) input.getCoder();
  TimestampedValueCoder<V> tvCoder = (TimestampedValueCoder<V>) kvCoder.getValueCoder();
  return input
      .apply(
          ParDo.of(
              new DoFn<KV<K, TimestampedValue<V>>, KV<K, V>>() {
                @Override
                public Duration getAllowedTimestampSkew() {
                  return Duration.millis(Long.MAX_VALUE);
                }

                @ProcessElement
                public void processElement(
                    @Element KV<K, TimestampedValue<V>> kv, OutputReceiver<KV<K, V>> r) {
                  r.outputWithTimestamp(
                      KV.of(kv.getKey(), kv.getValue().getValue()),
                      kv.getValue().getTimestamp());
                }
              }))
      .setCoder(KvCoder.of(kvCoder.getKeyCoder(), tvCoder.getValueCoder()));
}
 
Example #7
Source File: StreamingDataflowWorker.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Extracts the userland key coder, if any, from the coder used in the initial read step of a
 * stage. This encodes many assumptions about how the streaming execution context works.
 */
@Nullable
private Coder<?> extractKeyCoder(Coder<?> readCoder) {
  if (!(readCoder instanceof WindowedValueCoder)) {
    throw new RuntimeException(
        String.format(
            "Expected coder for streaming read to be %s, but received %s",
            WindowedValueCoder.class.getSimpleName(), readCoder));
  }

  // Note that TimerOrElementCoder is a backwards-compatibility class
  // that is really a FakeKeyedWorkItemCoder
  Coder<?> valueCoder = ((WindowedValueCoder<?>) readCoder).getValueCoder();

  if (valueCoder instanceof KvCoder<?, ?>) {
    return ((KvCoder<?, ?>) valueCoder).getKeyCoder();
  }
  if (!(valueCoder instanceof WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<?, ?>)) {
    return null;
  }

  return ((WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<?, ?>) valueCoder).getKeyCoder();
}
 
Example #8
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static <K, InputT, OutputT>
    SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn(
        PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform,
        Pipeline pipeline,
        KvCoder<K, InputT> kvInputCoder) {
  if (transform instanceof GroupByKey) {
    return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>)
        SystemReduceFn.buffering(kvInputCoder.getValueCoder());
  } else if (transform instanceof Combine.PerKey) {
    final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn =
        ((Combine.PerKey) transform).getFn();
    return SystemReduceFn.combining(
        kvInputCoder.getKeyCoder(),
        AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder));
  } else {
    throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey.");
  }
}
 
Example #9
Source File: WriteToGCSAvroTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/** Test whether {@link WriteToGCSAvro} throws an exception if no output directory is provided. */
@Test
public void testWriteWithoutOutputDirectory() {
  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("withOutputDirectory(outputDirectory) called with null input.");

  pipeline
      .apply(
          "CreateInput",
          Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
      .apply(
          "WriteTextFile(s)",
          WriteToGCSAvro.newBuilder()
              .withOutputDirectory(null)
              .withOutputFilenamePrefix(AVRO_FILENAME_PREFIX)
              .setNumShards(NUM_SHARDS)
              .withTempLocation(FAKE_TEMP_LOCATION)
              .build());
  pipeline.run();
}
 
Example #10
Source File: ReduceFnTester.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link ReduceFnTester} for the given {@link WindowingStrategy} and {@link CombineFn},
 * creating a {@link TriggerStateMachine} from the {@link Trigger} in the {@link
 * WindowingStrategy}.
 */
public static <W extends BoundedWindow, AccumT, OutputT>
    ReduceFnTester<Integer, OutputT, W> combining(
        WindowingStrategy<?, W> strategy,
        CombineFn<Integer, AccumT, OutputT> combineFn,
        Coder<OutputT> outputCoder)
        throws Exception {

  CoderRegistry registry = CoderRegistry.createDefault();
  // Ensure that the CombineFn can be converted into an AppliedCombineFn
  AppliedCombineFn.withInputCoder(
      combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));

  return combining(
      strategy,
      TriggerStateMachines.stateMachineForTrigger(
          TriggerTranslation.toProto(strategy.getTrigger())),
      combineFn,
      outputCoder);
}
 
Example #11
Source File: DataflowSideInputHandlerFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void emptyResultForEmptyCollection() {
  ImmutableMap<String, SideInputReader> sideInputReadersMap =
      ImmutableMap.<String, SideInputReader>builder()
          .put(TRANSFORM_ID, fakeSideInputReader)
          .build();

  ImmutableMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>
      sideInputIdToPCollectionViewMap =
          ImmutableMap.<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>builder()
              .put(sideInputId, view)
              .build();

  DataflowSideInputHandlerFactory factory =
      DataflowSideInputHandlerFactory.of(sideInputReadersMap, sideInputIdToPCollectionViewMap);
  MultimapSideInputHandler<String, Integer, GlobalWindow> handler =
      factory.forMultimapSideInput(
          TRANSFORM_ID,
          SIDE_INPUT_NAME,
          KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
          GlobalWindow.Coder.INSTANCE);

  Iterable<Integer> result = handler.get("foo2", GlobalWindow.INSTANCE);
  assertThat(result, emptyIterable());
}
 
Example #12
Source File: WriteToGCSTextTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Test whether {@link WriteToGCSText} throws an exception if temporary directory is not provided.
 */
@Test
public void testWriteWithoutTempLocation() {
  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("withTempLocation(tempLocation) called with null input. ");

  pipeline
      .apply(
          "CreateInput",
          Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
      .apply(
          "WriteTextFile(s)",
          WriteToGCSText.newBuilder()
              .withOutputDirectory(FAKE_DIR)
              .withOutputFilenamePrefix(TEXT_FILENAME_PREFIX)
              .setNumShards(NUM_SHARDS)
              .withTempLocation(null)
              .build());
  pipeline.run();
}
 
Example #13
Source File: CombineTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT> Coder<AccumT> extractAccumulatorCoder(
    GlobalCombineFn<InputT, AccumT, ?> combineFn,
    AppliedPTransform<PCollection<KV<K, InputT>>, ?, Combine.PerKey<K, InputT, ?>> transform)
    throws IOException {
  try {
    @SuppressWarnings("unchecked")
    PCollection<KV<K, InputT>> mainInput =
        (PCollection<KV<K, InputT>>)
            Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform));
    return combineFn.getAccumulatorCoder(
        transform.getPipeline().getCoderRegistry(),
        ((KvCoder<K, InputT>) mainInput.getCoder()).getValueCoder());
  } catch (CannotProvideCoderException e) {
    throw new IOException("Could not obtain a Coder for the accumulator", e);
  }
}
 
Example #14
Source File: ToIsmRecordForMultimapDoFnFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public ParDoFn create(
    PipelineOptions options,
    CloudObject cloudUserFn,
    List<SideInputInfo> sideInputInfos,
    TupleTag<?> mainOutputTag,
    Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices,
    DataflowExecutionContext<?> executionContext,
    DataflowOperationContext operationContext)
    throws Exception {
  Coder<?> coder =
      CloudObjects.coderFromCloudObject(
          CloudObject.fromSpec(Structs.getObject(cloudUserFn, PropertyNames.ENCODING)));
  checkState(
      coder instanceof IsmRecordCoder,
      "Expected to received an instanceof an %s but got %s",
      IsmRecordCoder.class.getSimpleName(),
      coder);
  IsmRecordCoder<?> ismRecordCoder = (IsmRecordCoder<?>) coder;
  return new ToIsmRecordForMultimapParDoFn(
      KvCoder.of(
          ismRecordCoder.getCoderArguments().get(0), ismRecordCoder.getCoderArguments().get(1)));
}
 
Example #15
Source File: GroupNonMergingWindowsFunctions.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Creates composite key of K and W and group all values for that composite key with Spark's
 * repartitionAndSortWithinPartitions. Stream of sorted by composite key's is transformed to key
 * with iterator of all values for that key (via {@link GroupByKeyIterator}).
 *
 * <p>repartitionAndSortWithinPartitions is used because all values are not collected into memory
 * at once, but streamed with iterator unlike GroupByKey (it minimizes memory pressure).
 */
static <K, V, W extends BoundedWindow>
    JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupByKeyAndWindow(
        JavaRDD<WindowedValue<KV<K, V>>> rdd,
        Coder<K> keyCoder,
        Coder<V> valueCoder,
        WindowingStrategy<?, W> windowingStrategy,
        Partitioner partitioner) {
  final Coder<W> windowCoder = windowingStrategy.getWindowFn().windowCoder();
  FullWindowedValueCoder<KV<K, V>> windowedKvCoder =
      WindowedValue.FullWindowedValueCoder.of(KvCoder.of(keyCoder, valueCoder), windowCoder);
  JavaPairRDD<ByteArray, byte[]> windowInKey =
      bringWindowToKey(
          rdd, keyCoder, windowCoder, wv -> CoderHelpers.toByteArray(wv, windowedKvCoder));
  return windowInKey
      .repartitionAndSortWithinPartitions(getPartitioner(partitioner, rdd))
      .mapPartitions(
          it -> new GroupByKeyIterator<>(it, keyCoder, windowingStrategy, windowedKvCoder))
      .filter(Objects::nonNull); // filter last null element from GroupByKeyIterator
}
 
Example #16
Source File: StatefulParDoEvaluatorFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() {
  Instant now = Instant.ofEpochMilli(0);
  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))
                  .addElements(TimestampedValue.of(KV.of("", 1), now.plus(2)))
                  .addElements(TimestampedValue.of(KV.of("", 2), now.plus(1)))
                  .advanceWatermarkTo(now.plus(1))
                  .addElements(TimestampedValue.of(KV.of("", 3), now))
                  .advanceWatermarkToInfinity())
          .apply(
              Window.<KV<String, Integer>>into(new GlobalWindows())
                  .withAllowedLateness(Duration.millis(2)));
  PCollection<String> result = input.apply(ParDo.of(statefulConcat()));
  PAssert.that(result).containsInAnyOrder("3", "3:2", "3:2:1");
  pipeline.run();
}
 
Example #17
Source File: StatefulTeamScoreTest.java    From deployment-examples with MIT License 6 votes vote down vote up
/**
 * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs
 * correctly for one team.
 */
@Test
public void testScoreUpdatesOneTeam() {

  TestStream<KV<String, GameActionInfo>> createEvents =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class)))
          .advanceWatermarkTo(baseTime)
          .addElements(
              event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)),
              event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)),
              event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)),
              event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)),
              event(TestUser.RED_TWO, 201, Duration.standardSeconds(50)))
          .advanceWatermarkToInfinity();

  PCollection<KV<String, Integer>> teamScores =
      p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100)));

  String redTeam = TestUser.RED_ONE.getTeam();

  PAssert.that(teamScores)
      .inWindow(GlobalWindow.INSTANCE)
      .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401));

  p.run().waitUntilFinish();
}
 
Example #18
Source File: WriteToGCSTextTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/** Test whether {@link WriteToGCSText} throws an exception if no output directory is provided. */
@Test
public void testWriteWithoutOutputDirectory() {
  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("withOutputDirectory(outputDirectory) called with null input.");

  pipeline
      .apply(
          "CreateInput",
          Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
      .apply(
          "WriteTextFile(s)",
          WriteToGCSText.newBuilder()
              .withOutputDirectory(null)
              .withOutputFilenamePrefix(TEXT_FILENAME_PREFIX)
              .setNumShards(NUM_SHARDS)
              .withTempLocation(FAKE_TEMP_LOCATION)
              .build());
  pipeline.run();
}
 
Example #19
Source File: WriteFiles.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public WriteFilesResult<DestinationT> expand(
    PCollection<List<FileResult<DestinationT>>> input) {

  List<PCollectionView<?>> finalizeSideInputs = Lists.newArrayList(getSideInputs());
  if (numShardsView != null) {
    finalizeSideInputs.add(numShardsView);
  }
  PCollection<KV<DestinationT, String>> outputFilenames =
      input
          .apply("Finalize", ParDo.of(new FinalizeFn()).withSideInputs(finalizeSideInputs))
          .setCoder(KvCoder.of(destinationCoder, StringUtf8Coder.of()))
          // Reshuffle the filenames to make sure they are observable downstream
          // only after each one is done finalizing.
          .apply(Reshuffle.viaRandomKey());

  TupleTag<KV<DestinationT, String>> perDestinationOutputFilenamesTag =
      new TupleTag<>("perDestinationOutputFilenames");
  return WriteFilesResult.in(
      input.getPipeline(), perDestinationOutputFilenamesTag, outputFilenames);
}
 
Example #20
Source File: BigQueryFeatureSink.java    From feast with Apache License 2.0 6 votes vote down vote up
/** @param featureSetSpecs Feature set to be written */
@Override
public PCollection<FeatureSetReference> prepareWrite(
    PCollection<KV<FeatureSetReference, FeatureSetProto.FeatureSetSpec>> featureSetSpecs) {
  PCollection<KV<FeatureSetReference, TableSchema>> schemas =
      featureSetSpecs
          .apply(
              "GenerateTableSchema",
              ParDo.of(
                  new FeatureSetSpecToTableSchema(
                      DatasetId.of(getProjectId(), getDatasetId()), getBQClient())))
          .setCoder(
              KvCoder.of(
                  AvroCoder.of(FeatureSetReference.class),
                  FeatureSetSpecToTableSchema.TableSchemaCoder.of()));

  schemasView =
      schemas
          .apply("ReferenceString", ParDo.of(new ReferenceToString()))
          .apply("View", View.asMultimap());

  return schemas.apply("Ready", Keys.create());
}
 
Example #21
Source File: SortValues.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Retrieves the {@link Coder} for the secondary key-value pairs. */
@SuppressWarnings("unchecked")
private static <PrimaryKeyT, SecondaryKeyT, ValueT>
    KvCoder<SecondaryKeyT, ValueT> getSecondaryKeyValueCoder(
        Coder<KV<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>> inputCoder) {
  if (!(inputCoder instanceof KvCoder)) {
    throw new IllegalStateException("SortValues requires its input to use KvCoder");
  }
  @SuppressWarnings("unchecked")
  KvCoder<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>> kvCoder =
      (KvCoder<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>) inputCoder;

  if (!(kvCoder.getValueCoder() instanceof IterableCoder)) {
    throw new IllegalStateException(
        "SortValues requires the values be encoded with IterableCoder");
  }
  IterableCoder<KV<SecondaryKeyT, ValueT>> iterableCoder =
      (IterableCoder<KV<SecondaryKeyT, ValueT>>) (kvCoder.getValueCoder());

  if (!(iterableCoder.getElemCoder() instanceof KvCoder)) {
    throw new IllegalStateException(
        "SortValues requires the secondary key-value pairs to use KvCoder");
  }
  return (KvCoder<SecondaryKeyT, ValueT>) (iterableCoder.getElemCoder());
}
 
Example #22
Source File: ReduceFnTester.java    From beam with Apache License 2.0 6 votes vote down vote up
public static <W extends BoundedWindow, AccumT, OutputT>
    ReduceFnTester<Integer, OutputT, W> combining(
        WindowingStrategy<?, W> strategy,
        TriggerStateMachine triggerStateMachine,
        CombineFnWithContext<Integer, AccumT, OutputT> combineFn,
        Coder<OutputT> outputCoder,
        PipelineOptions options,
        SideInputReader sideInputReader)
        throws Exception {
  CoderRegistry registry = CoderRegistry.createDefault();
  AppliedCombineFn<String, Integer, AccumT, OutputT> fn =
      AppliedCombineFn.withInputCoder(
          combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));

  return new ReduceFnTester<>(
      strategy,
      triggerStateMachine,
      SystemReduceFn.combining(StringUtf8Coder.of(), fn),
      outputCoder,
      options,
      sideInputReader);
}
 
Example #23
Source File: DeduplicateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testTriggeredRepresentativeValuesWithType() {
  Instant base = new Instant(0);
  TestStream<KV<Integer, String>> values =
      TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of()))
          .advanceWatermarkTo(base)
          .addElements(
              TimestampedValue.of(KV.of(1, "k1"), base),
              TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(10))),
              TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(20))))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              TimestampedValue.of(KV.of(1, "k1"), base.plus(Duration.standardSeconds(30))),
              TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(40))),
              TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(50))))
          .advanceWatermarkToInfinity();

  PCollection<KV<Integer, String>> distinctValues =
      p.apply(values)
          .apply(
              Deduplicate.withRepresentativeValueFn(new Keys<Integer>())
                  .withRepresentativeCoder(VarIntCoder.of()));

  PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"), KV.of(2, "k2"), KV.of(3, "k3"));
  p.run();
}
 
Example #24
Source File: OuterLeftJoinTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(expected = NullPointerException.class)
public void testJoinRightCollectionNull() {
  p.enableAbandonedNodeEnforcement(false);
  Join.leftOuterJoin(
      p.apply(
          Create.of(leftListOfKv).withCoder(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))),
      null,
      "");
}
 
Example #25
Source File: OuterLeftJoinTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(expected = NullPointerException.class)
public void testJoinLeftCollectionNull() {
  p.enableAbandonedNodeEnforcement(false);
  Join.leftOuterJoin(
      null,
      p.apply(
          Create.of(rightListOfKv)
              .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))),
      "");
}
 
Example #26
Source File: WindowDoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private WindowDoFnOperator<Long, Long, Long> getWindowDoFnOperator() {
  WindowingStrategy<Object, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(FixedWindows.of(standardMinutes(1)));

  TupleTag<KV<Long, Long>> outputTag = new TupleTag<>("main-output");

  SystemReduceFn<Long, Long, long[], Long, BoundedWindow> reduceFn =
      SystemReduceFn.combining(
          VarLongCoder.of(),
          AppliedCombineFn.withInputCoder(
              Sum.ofLongs(),
              CoderRegistry.createDefault(),
              KvCoder.of(VarLongCoder.of(), VarLongCoder.of())));

  Coder<IntervalWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();
  SingletonKeyedWorkItemCoder<Long, Long> workItemCoder =
      SingletonKeyedWorkItemCoder.of(VarLongCoder.of(), VarLongCoder.of(), windowCoder);
  FullWindowedValueCoder<SingletonKeyedWorkItem<Long, Long>> inputCoder =
      WindowedValue.getFullCoder(workItemCoder, windowCoder);
  FullWindowedValueCoder<KV<Long, Long>> outputCoder =
      WindowedValue.getFullCoder(KvCoder.of(VarLongCoder.of(), VarLongCoder.of()), windowCoder);

  return new WindowDoFnOperator<Long, Long, Long>(
      reduceFn,
      "stepName",
      (Coder) inputCoder,
      outputTag,
      emptyList(),
      new MultiOutputOutputManagerFactory<>(outputTag, outputCoder),
      windowingStrategy,
      emptyMap(),
      emptyList(),
      PipelineOptionsFactory.as(FlinkPipelineOptions.class),
      VarLongCoder.of(),
      new WorkItemKeySelector(VarLongCoder.of()));
}
 
Example #27
Source File: StreamingDataflowWorkerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private List<ParallelInstruction> makeUnboundedSourcePipeline(
    int numMessagesPerShard, // Total number of messages in each split of the unbounded source.
    DoFn<ValueWithRecordId<KV<Integer, Integer>>, String> doFn)
    throws Exception {
  DataflowPipelineOptions options =
      PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
  options.setNumWorkers(1);
  CloudObject codec =
      CloudObjects.asCloudObject(
          WindowedValue.getFullCoder(
              ValueWithRecordId.ValueWithRecordIdCoder.of(
                  KvCoder.of(VarIntCoder.of(), VarIntCoder.of())),
              GlobalWindow.Coder.INSTANCE),
          /*sdkComponents=*/ null);

  return Arrays.asList(
      new ParallelInstruction()
          .setSystemName("Read")
          .setOriginalName("OriginalReadName")
          .setRead(
              new ReadInstruction()
                  .setSource(
                      CustomSources.serializeToCloudSource(
                              new TestCountingSource(numMessagesPerShard), options)
                          .setCodec(codec)))
          .setOutputs(
              Arrays.asList(
                  new InstructionOutput()
                      .setName("read_output")
                      .setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME)
                      .setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME)
                      .setCodec(codec))),
      makeDoFnInstruction(doFn, 0, StringUtf8Coder.of(), WindowingStrategy.globalDefault()),
      makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
}
 
Example #28
Source File: OuterFullJoinTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(expected = NullPointerException.class)
public void testJoinLeftCollectionNull() {
  p.enableAbandonedNodeEnforcement(false);
  Join.fullOuterJoin(
      null,
      p.apply(
          Create.of(rightListOfKv)
              .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))),
      "",
      "");
}
 
Example #29
Source File: OuterRightJoinTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(expected = NullPointerException.class)
public void testJoinNullValueIsNull() {
  p.enableAbandonedNodeEnforcement(false);
  Join.rightOuterJoin(
      p.apply("CreateLeft", Create.empty(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))),
      p.apply(
          "CreateRight", Create.empty(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))),
      null);
}
 
Example #30
Source File: GroupIntoBatches.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<K, Iterable<InputT>>> expand(PCollection<KV<K, InputT>> input) {
  Duration allowedLateness = input.getWindowingStrategy().getAllowedLateness();

  checkArgument(
      input.getCoder() instanceof KvCoder,
      "coder specified in the input PCollection is not a KvCoder");
  KvCoder inputCoder = (KvCoder) input.getCoder();
  Coder<K> keyCoder = (Coder<K>) inputCoder.getCoderArguments().get(0);
  Coder<InputT> valueCoder = (Coder<InputT>) inputCoder.getCoderArguments().get(1);

  return input.apply(
      ParDo.of(new GroupIntoBatchesDoFn<>(batchSize, allowedLateness, keyCoder, valueCoder)));
}