Java Code Examples for org.apache.beam.sdk.coders.KvCoder

The following examples show how to use org.apache.beam.sdk.coders.KvCoder. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: ReduceFnTester.java    License: Apache License 2.0 6 votes vote down vote up
public static <W extends BoundedWindow, AccumT, OutputT>
    ReduceFnTester<Integer, OutputT, W> combining(
        WindowingStrategy<?, W> strategy,
        TriggerStateMachine triggerStateMachine,
        CombineFnWithContext<Integer, AccumT, OutputT> combineFn,
        Coder<OutputT> outputCoder,
        PipelineOptions options,
        SideInputReader sideInputReader)
        throws Exception {
  CoderRegistry registry = CoderRegistry.createDefault();
  AppliedCombineFn<String, Integer, AccumT, OutputT> fn =
      AppliedCombineFn.withInputCoder(
          combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));

  return new ReduceFnTester<>(
      strategy,
      triggerStateMachine,
      SystemReduceFn.combining(StringUtf8Coder.of(), fn),
      outputCoder,
      options,
      sideInputReader);
}
 
Example 2
Source Project: feast   Source File: BigQueryFeatureSink.java    License: Apache License 2.0 6 votes vote down vote up
/** @param featureSetSpecs Feature set to be written */
@Override
public PCollection<FeatureSetReference> prepareWrite(
    PCollection<KV<FeatureSetReference, FeatureSetProto.FeatureSetSpec>> featureSetSpecs) {
  PCollection<KV<FeatureSetReference, TableSchema>> schemas =
      featureSetSpecs
          .apply(
              "GenerateTableSchema",
              ParDo.of(
                  new FeatureSetSpecToTableSchema(
                      DatasetId.of(getProjectId(), getDatasetId()), getBQClient())))
          .setCoder(
              KvCoder.of(
                  AvroCoder.of(FeatureSetReference.class),
                  FeatureSetSpecToTableSchema.TableSchemaCoder.of()));

  schemasView =
      schemas
          .apply("ReferenceString", ParDo.of(new ReferenceToString()))
          .apply("View", View.asMultimap());

  return schemas.apply("Ready", Keys.create());
}
 
Example 3
Source Project: deployment-examples   Source File: StatefulTeamScoreTest.java    License: MIT License 6 votes vote down vote up
/**
 * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs
 * correctly for one team.
 */
@Test
public void testScoreUpdatesOneTeam() {

  TestStream<KV<String, GameActionInfo>> createEvents =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class)))
          .advanceWatermarkTo(baseTime)
          .addElements(
              event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)),
              event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)),
              event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)),
              event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)),
              event(TestUser.RED_TWO, 201, Duration.standardSeconds(50)))
          .advanceWatermarkToInfinity();

  PCollection<KV<String, Integer>> teamScores =
      p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100)));

  String redTeam = TestUser.RED_ONE.getTeam();

  PAssert.that(teamScores)
      .inWindow(GlobalWindow.INSTANCE)
      .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401));

  p.run().waitUntilFinish();
}
 
Example 4
Source Project: beam   Source File: StatefulParDoEvaluatorFactoryTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() {
  Instant now = Instant.ofEpochMilli(0);
  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))
                  .addElements(TimestampedValue.of(KV.of("", 1), now.plus(2)))
                  .addElements(TimestampedValue.of(KV.of("", 2), now.plus(1)))
                  .advanceWatermarkTo(now.plus(1))
                  .addElements(TimestampedValue.of(KV.of("", 3), now))
                  .advanceWatermarkToInfinity())
          .apply(
              Window.<KV<String, Integer>>into(new GlobalWindows())
                  .withAllowedLateness(Duration.millis(2)));
  PCollection<String> result = input.apply(ParDo.of(statefulConcat()));
  PAssert.that(result).containsInAnyOrder("3", "3:2", "3:2:1");
  pipeline.run();
}
 
Example 5
Source Project: beam   Source File: ToIsmRecordForMultimapDoFnFactory.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public ParDoFn create(
    PipelineOptions options,
    CloudObject cloudUserFn,
    List<SideInputInfo> sideInputInfos,
    TupleTag<?> mainOutputTag,
    Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices,
    DataflowExecutionContext<?> executionContext,
    DataflowOperationContext operationContext)
    throws Exception {
  Coder<?> coder =
      CloudObjects.coderFromCloudObject(
          CloudObject.fromSpec(Structs.getObject(cloudUserFn, PropertyNames.ENCODING)));
  checkState(
      coder instanceof IsmRecordCoder,
      "Expected to received an instanceof an %s but got %s",
      IsmRecordCoder.class.getSimpleName(),
      coder);
  IsmRecordCoder<?> ismRecordCoder = (IsmRecordCoder<?>) coder;
  return new ToIsmRecordForMultimapParDoFn(
      KvCoder.of(
          ismRecordCoder.getCoderArguments().get(0), ismRecordCoder.getCoderArguments().get(1)));
}
 
Example 6
Source Project: beam   Source File: CombineTranslation.java    License: Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT> Coder<AccumT> extractAccumulatorCoder(
    GlobalCombineFn<InputT, AccumT, ?> combineFn,
    AppliedPTransform<PCollection<KV<K, InputT>>, ?, Combine.PerKey<K, InputT, ?>> transform)
    throws IOException {
  try {
    @SuppressWarnings("unchecked")
    PCollection<KV<K, InputT>> mainInput =
        (PCollection<KV<K, InputT>>)
            Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform));
    return combineFn.getAccumulatorCoder(
        transform.getPipeline().getCoderRegistry(),
        ((KvCoder<K, InputT>) mainInput.getCoder()).getValueCoder());
  } catch (CannotProvideCoderException e) {
    throw new IOException("Could not obtain a Coder for the accumulator", e);
  }
}
 
Example 7
Source Project: DataflowTemplates   Source File: WriteToGCSTextTest.java    License: Apache License 2.0 6 votes vote down vote up
/** Test whether {@link WriteToGCSText} throws an exception if no output directory is provided. */
@Test
public void testWriteWithoutOutputDirectory() {
  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("withOutputDirectory(outputDirectory) called with null input.");

  pipeline
      .apply(
          "CreateInput",
          Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
      .apply(
          "WriteTextFile(s)",
          WriteToGCSText.newBuilder()
              .withOutputDirectory(null)
              .withOutputFilenamePrefix(TEXT_FILENAME_PREFIX)
              .setNumShards(NUM_SHARDS)
              .withTempLocation(FAKE_TEMP_LOCATION)
              .build());
  pipeline.run();
}
 
Example 8
Source Project: DataflowTemplates   Source File: WriteToGCSTextTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test whether {@link WriteToGCSText} throws an exception if temporary directory is not provided.
 */
@Test
public void testWriteWithoutTempLocation() {
  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("withTempLocation(tempLocation) called with null input. ");

  pipeline
      .apply(
          "CreateInput",
          Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
      .apply(
          "WriteTextFile(s)",
          WriteToGCSText.newBuilder()
              .withOutputDirectory(FAKE_DIR)
              .withOutputFilenamePrefix(TEXT_FILENAME_PREFIX)
              .setNumShards(NUM_SHARDS)
              .withTempLocation(null)
              .build());
  pipeline.run();
}
 
Example 9
Source Project: beam   Source File: ReduceFnTester.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link ReduceFnTester} for the given {@link WindowingStrategy} and {@link CombineFn},
 * creating a {@link TriggerStateMachine} from the {@link Trigger} in the {@link
 * WindowingStrategy}.
 */
public static <W extends BoundedWindow, AccumT, OutputT>
    ReduceFnTester<Integer, OutputT, W> combining(
        WindowingStrategy<?, W> strategy,
        CombineFn<Integer, AccumT, OutputT> combineFn,
        Coder<OutputT> outputCoder)
        throws Exception {

  CoderRegistry registry = CoderRegistry.createDefault();
  // Ensure that the CombineFn can be converted into an AppliedCombineFn
  AppliedCombineFn.withInputCoder(
      combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));

  return combining(
      strategy,
      TriggerStateMachines.stateMachineForTrigger(
          TriggerTranslation.toProto(strategy.getTrigger())),
      combineFn,
      outputCoder);
}
 
Example 10
Source Project: DataflowTemplates   Source File: WriteToGCSAvroTest.java    License: Apache License 2.0 6 votes vote down vote up
/** Test whether {@link WriteToGCSAvro} throws an exception if no output directory is provided. */
@Test
public void testWriteWithoutOutputDirectory() {
  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("withOutputDirectory(outputDirectory) called with null input.");

  pipeline
      .apply(
          "CreateInput",
          Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
      .apply(
          "WriteTextFile(s)",
          WriteToGCSAvro.newBuilder()
              .withOutputDirectory(null)
              .withOutputFilenamePrefix(AVRO_FILENAME_PREFIX)
              .setNumShards(NUM_SHARDS)
              .withTempLocation(FAKE_TEMP_LOCATION)
              .build());
  pipeline.run();
}
 
Example 11
Source Project: beam   Source File: GroupByKeyTranslator.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static <K, InputT, OutputT>
    SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn(
        PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform,
        Pipeline pipeline,
        KvCoder<K, InputT> kvInputCoder) {
  if (transform instanceof GroupByKey) {
    return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>)
        SystemReduceFn.buffering(kvInputCoder.getValueCoder());
  } else if (transform instanceof Combine.PerKey) {
    final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn =
        ((Combine.PerKey) transform).getFn();
    return SystemReduceFn.combining(
        kvInputCoder.getKeyCoder(),
        AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder));
  } else {
    throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey.");
  }
}
 
Example 12
Source Project: beam   Source File: CombineValuesFnFactory.java    License: Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo(
    AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) {
  GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner =
      GlobalCombineFnRunners.create(combineFn.getFn());
  DoFn<KV<K, AccumT>, KV<K, OutputT>> doFn =
      new ExtractOutputDoFn<>(combineFnRunner, sideInputReader);

  KvCoder<K, AccumT> inputCoder = null;
  if (combineFn.getKvCoder() != null) {
    inputCoder =
        KvCoder.of(combineFn.getKvCoder().getKeyCoder(), combineFn.getAccumulatorCoder());
  }
  return DoFnInfo.forFn(
      doFn,
      combineFn.getWindowingStrategy(),
      combineFn.getSideInputViews(),
      inputCoder,
      Collections.emptyMap(), // Not needed here.
      new TupleTag<>(PropertyNames.OUTPUT),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 13
Source Project: beam   Source File: SortValues.java    License: Apache License 2.0 6 votes vote down vote up
/** Retrieves the {@link Coder} for the secondary key-value pairs. */
@SuppressWarnings("unchecked")
private static <PrimaryKeyT, SecondaryKeyT, ValueT>
    KvCoder<SecondaryKeyT, ValueT> getSecondaryKeyValueCoder(
        Coder<KV<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>> inputCoder) {
  if (!(inputCoder instanceof KvCoder)) {
    throw new IllegalStateException("SortValues requires its input to use KvCoder");
  }
  @SuppressWarnings("unchecked")
  KvCoder<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>> kvCoder =
      (KvCoder<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>) inputCoder;

  if (!(kvCoder.getValueCoder() instanceof IterableCoder)) {
    throw new IllegalStateException(
        "SortValues requires the values be encoded with IterableCoder");
  }
  IterableCoder<KV<SecondaryKeyT, ValueT>> iterableCoder =
      (IterableCoder<KV<SecondaryKeyT, ValueT>>) (kvCoder.getValueCoder());

  if (!(iterableCoder.getElemCoder() instanceof KvCoder)) {
    throw new IllegalStateException(
        "SortValues requires the secondary key-value pairs to use KvCoder");
  }
  return (KvCoder<SecondaryKeyT, ValueT>) (iterableCoder.getElemCoder());
}
 
Example 14
Source Project: beam   Source File: GroupNonMergingWindowsFunctions.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates composite key of K and W and group all values for that composite key with Spark's
 * repartitionAndSortWithinPartitions. Stream of sorted by composite key's is transformed to key
 * with iterator of all values for that key (via {@link GroupByKeyIterator}).
 *
 * <p>repartitionAndSortWithinPartitions is used because all values are not collected into memory
 * at once, but streamed with iterator unlike GroupByKey (it minimizes memory pressure).
 */
static <K, V, W extends BoundedWindow>
    JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupByKeyAndWindow(
        JavaRDD<WindowedValue<KV<K, V>>> rdd,
        Coder<K> keyCoder,
        Coder<V> valueCoder,
        WindowingStrategy<?, W> windowingStrategy,
        Partitioner partitioner) {
  final Coder<W> windowCoder = windowingStrategy.getWindowFn().windowCoder();
  FullWindowedValueCoder<KV<K, V>> windowedKvCoder =
      WindowedValue.FullWindowedValueCoder.of(KvCoder.of(keyCoder, valueCoder), windowCoder);
  JavaPairRDD<ByteArray, byte[]> windowInKey =
      bringWindowToKey(
          rdd, keyCoder, windowCoder, wv -> CoderHelpers.toByteArray(wv, windowedKvCoder));
  return windowInKey
      .repartitionAndSortWithinPartitions(getPartitioner(partitioner, rdd))
      .mapPartitions(
          it -> new GroupByKeyIterator<>(it, keyCoder, windowingStrategy, windowedKvCoder))
      .filter(Objects::nonNull); // filter last null element from GroupByKeyIterator
}
 
Example 15
Source Project: beam   Source File: DataflowSideInputHandlerFactoryTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void emptyResultForEmptyCollection() {
  ImmutableMap<String, SideInputReader> sideInputReadersMap =
      ImmutableMap.<String, SideInputReader>builder()
          .put(TRANSFORM_ID, fakeSideInputReader)
          .build();

  ImmutableMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>
      sideInputIdToPCollectionViewMap =
          ImmutableMap.<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>builder()
              .put(sideInputId, view)
              .build();

  DataflowSideInputHandlerFactory factory =
      DataflowSideInputHandlerFactory.of(sideInputReadersMap, sideInputIdToPCollectionViewMap);
  MultimapSideInputHandler<String, Integer, GlobalWindow> handler =
      factory.forMultimapSideInput(
          TRANSFORM_ID,
          SIDE_INPUT_NAME,
          KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
          GlobalWindow.Coder.INSTANCE);

  Iterable<Integer> result = handler.get("foo2", GlobalWindow.INSTANCE);
  assertThat(result, emptyIterable());
}
 
Example 16
Source Project: beam   Source File: StreamingDataflowWorker.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Extracts the userland key coder, if any, from the coder used in the initial read step of a
 * stage. This encodes many assumptions about how the streaming execution context works.
 */
@Nullable
private Coder<?> extractKeyCoder(Coder<?> readCoder) {
  if (!(readCoder instanceof WindowedValueCoder)) {
    throw new RuntimeException(
        String.format(
            "Expected coder for streaming read to be %s, but received %s",
            WindowedValueCoder.class.getSimpleName(), readCoder));
  }

  // Note that TimerOrElementCoder is a backwards-compatibility class
  // that is really a FakeKeyedWorkItemCoder
  Coder<?> valueCoder = ((WindowedValueCoder<?>) readCoder).getValueCoder();

  if (valueCoder instanceof KvCoder<?, ?>) {
    return ((KvCoder<?, ?>) valueCoder).getKeyCoder();
  }
  if (!(valueCoder instanceof WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<?, ?>)) {
    return null;
  }

  return ((WindmillKeyedWorkItem.FakeKeyedWorkItemCoder<?, ?>) valueCoder).getKeyCoder();
}
 
Example 17
Source Project: beam   Source File: CloningBundleFactoryTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void keyedBundleWorkingCoderSucceedsClonesOutput() {
  PCollection<Integer> created = p.apply(Create.of(1, 3).withCoder(VarIntCoder.of()));

  PCollection<KV<String, Iterable<Integer>>> keyed =
      created
          .apply(WithKeys.of("foo"))
          .setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))
          .apply(GroupByKey.create());
  WindowedValue<KV<String, Iterable<Integer>>> foos =
      WindowedValue.valueInGlobalWindow(
          KV.<String, Iterable<Integer>>of("foo", ImmutableList.of(1, 3)));
  CommittedBundle<KV<String, Iterable<Integer>>> keyedBundle =
      factory
          .createKeyedBundle(StructuralKey.of("foo", StringUtf8Coder.of()), keyed)
          .add(foos)
          .commit(Instant.now());

  assertThat(keyedBundle.getElements(), containsInAnyOrder(foos));
  assertThat(
      Iterables.getOnlyElement(keyedBundle.getElements()).getValue(),
      not(theInstance(foos.getValue())));
  assertThat(keyedBundle.getPCollection(), equalTo(keyed));
  assertThat(keyedBundle.getKey(), equalTo(StructuralKey.of("foo", StringUtf8Coder.of())));
}
 
Example 18
Source Project: beam   Source File: SdkComponentsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void registerCoder() throws IOException {
  Coder<?> coder =
      KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(SetCoder.of(ByteArrayCoder.of())));
  String id = components.registerCoder(coder);
  assertThat(components.registerCoder(coder), equalTo(id));
  assertThat(id, not(isEmptyOrNullString()));
  Coder<?> equalCoder =
      KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(SetCoder.of(ByteArrayCoder.of())));
  assertThat(components.registerCoder(equalCoder), equalTo(id));
  Coder<?> otherCoder = VarLongCoder.of();
  assertThat(components.registerCoder(otherCoder), not(equalTo(id)));

  components.toComponents().getCodersOrThrow(id);
  components.toComponents().getCodersOrThrow(components.registerCoder(otherCoder));
}
 
Example 19
Source Project: beam   Source File: JacksonTransformsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWritingInvalidJsonsWithFailuresDefaultHandler() {
  WithFailures.Result<PCollection<String>, KV<MyPojo, Map<String, String>>> result =
      pipeline
          .apply(
              Create.of(Iterables.concat(POJOS, INVALID_POJOS))
                  .withCoder(SerializableCoder.of(MyPojo.class)))
          .apply(AsJsons.of(MyPojo.class).exceptionsVia());

  result.output().setCoder(StringUtf8Coder.of());

  result
      .failures()
      .setCoder(
          KvCoder.of(
              SerializableCoder.of(MyPojo.class),
              MapCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())));

  PAssert.that(result.output()).containsInAnyOrder(VALID_JSONS);
  assertWritingWithErrorMapHandler(result);

  pipeline.run();
}
 
Example 20
Source Project: beam   Source File: GroupNonMergingWindowsFunctionsTest.java    License: Apache License 2.0 6 votes vote down vote up
private <W extends BoundedWindow> GroupByKeyIterator<String, Integer, W> createGbkIterator(
    W window, Coder<W> winCoder, WindowingStrategy<Object, W> winStrategy)
    throws Coder.NonDeterministicException {

  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  final WindowedValue.FullWindowedValueCoder<KV<String, Integer>> winValCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
          winStrategy.getWindowFn().windowCoder());

  ItemFactory<String, Integer, W> factory =
      ItemFactory.forWindow(keyCoder, winValCoder, winCoder, window);
  List<Tuple2<ByteArray, byte[]>> items =
      Arrays.asList(
          factory.create("k1", 1),
          factory.create("k1", 2),
          factory.create("k2", 3),
          factory.create("k2", 4),
          factory.create("k2", 5));
  return new GroupByKeyIterator<>(items.iterator(), keyCoder, winStrategy, winValCoder);
}
 
Example 21
Source Project: beam   Source File: Reify.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<K, V>> expand(PCollection<KV<K, TimestampedValue<V>>> input) {
  KvCoder<K, TimestampedValue<V>> kvCoder = (KvCoder<K, TimestampedValue<V>>) input.getCoder();
  TimestampedValueCoder<V> tvCoder = (TimestampedValueCoder<V>) kvCoder.getValueCoder();
  return input
      .apply(
          ParDo.of(
              new DoFn<KV<K, TimestampedValue<V>>, KV<K, V>>() {
                @Override
                public Duration getAllowedTimestampSkew() {
                  return Duration.millis(Long.MAX_VALUE);
                }

                @ProcessElement
                public void processElement(
                    @Element KV<K, TimestampedValue<V>> kv, OutputReceiver<KV<K, V>> r) {
                  r.outputWithTimestamp(
                      KV.of(kv.getKey(), kv.getValue().getValue()),
                      kv.getValue().getTimestamp());
                }
              }))
      .setCoder(KvCoder.of(kvCoder.getKeyCoder(), tvCoder.getValueCoder()));
}
 
Example 22
Source Project: beam   Source File: WriteFiles.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public WriteFilesResult<DestinationT> expand(
    PCollection<List<FileResult<DestinationT>>> input) {

  List<PCollectionView<?>> finalizeSideInputs = Lists.newArrayList(getSideInputs());
  if (numShardsView != null) {
    finalizeSideInputs.add(numShardsView);
  }
  PCollection<KV<DestinationT, String>> outputFilenames =
      input
          .apply("Finalize", ParDo.of(new FinalizeFn()).withSideInputs(finalizeSideInputs))
          .setCoder(KvCoder.of(destinationCoder, StringUtf8Coder.of()))
          // Reshuffle the filenames to make sure they are observable downstream
          // only after each one is done finalizing.
          .apply(Reshuffle.viaRandomKey());

  TupleTag<KV<DestinationT, String>> perDestinationOutputFilenamesTag =
      new TupleTag<>("perDestinationOutputFilenames");
  return WriteFilesResult.in(
      input.getPipeline(), perDestinationOutputFilenamesTag, outputFilenames);
}
 
Example 23
Source Project: beam   Source File: SplittableProcessFnFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DoFnInfo<?, ?> getDoFnInfo(CloudObject cloudUserFn) throws Exception {
  DoFnInfo<?, ?> doFnInfo =
      (DoFnInfo<?, ?>)
          deserializeFromByteArray(
              getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN), "Serialized DoFnInfo");
  Coder restrictionAndStateCoder =
      coderFromCloudObject(
          fromSpec(getObject(cloudUserFn, WorkerPropertyNames.RESTRICTION_CODER)));
  checkState(
      restrictionAndStateCoder instanceof KvCoder,
      "Expected pair coder with restriction as key coder and watermark estimator state as value coder, but received %s.",
      restrictionAndStateCoder);
  Coder restrictionCoder = ((KvCoder) restrictionAndStateCoder).getKeyCoder();
  Coder watermarkEstimatorStateCoder = ((KvCoder) restrictionAndStateCoder).getValueCoder();

  ProcessFn processFn =
      new ProcessFn(
          doFnInfo.getDoFn(),
          doFnInfo.getInputCoder(),
          restrictionCoder,
          watermarkEstimatorStateCoder,
          doFnInfo.getWindowingStrategy());

  return DoFnInfo.forFn(
      processFn,
      doFnInfo.getWindowingStrategy(),
      doFnInfo.getSideInputViews(),
      KeyedWorkItemCoder.of(
          ByteArrayCoder.of(),
          KvCoder.of(doFnInfo.getInputCoder(), restrictionCoder),
          doFnInfo.getWindowingStrategy().getWindowFn().windowCoder()),
      doFnInfo.getOutputCoders(),
      doFnInfo.getMainOutput(),
      doFnInfo.getDoFnSchemaInformation(),
      doFnInfo.getSideInputMapping());
}
 
Example 24
Source Project: beam   Source File: StreamingDataflowWorkerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testKeyTokenInvalidException() throws Exception {
  if (streamingEngine) {
    // TODO: This test needs to be adapted to work with streamingEngine=true.
    return;
  }
  KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());

  List<ParallelInstruction> instructions =
      Arrays.asList(
          makeSourceInstruction(kvCoder),
          makeDoFnInstruction(new KeyTokenInvalidFn(), 0, kvCoder),
          makeSinkInstruction(kvCoder, 1));

  FakeWindmillServer server = new FakeWindmillServer(errorCollector);
  server.addWorkToOffer(makeInput(0, 0, "key"));

  StreamingDataflowWorker worker =
      makeWorker(instructions, createTestingPipelineOptions(server), true /* publishCounters */);
  worker.start();

  server.waitForEmptyWorkQueue();

  server.addWorkToOffer(makeInput(1, 0, "key"));

  Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);

  assertEquals(makeExpectedOutput(1, 0, "key", "key").build(), result.get(1L));
  assertEquals(1, result.size());
}
 
Example 25
Source Project: beam   Source File: CoderTranslators.java    License: Apache License 2.0 5 votes vote down vote up
static CoderTranslator<KvCoder<?, ?>> kv() {
  return new SimpleStructuredCoderTranslator<KvCoder<?, ?>>() {
    @Override
    public List<? extends Coder<?>> getComponents(KvCoder<?, ?> from) {
      return ImmutableList.of(from.getKeyCoder(), from.getValueCoder());
    }

    @Override
    public KvCoder<?, ?> fromComponents(List<Coder<?>> components) {
      return KvCoder.of(components.get(0), components.get(1));
    }
  };
}
 
Example 26
Source Project: beam   Source File: ViewTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class})
public void testMultimapSideInputWithNonDeterministicKeyCoder() {

  final PCollectionView<Map<String, Iterable<Integer>>> view =
      pipeline
          .apply(
              "CreateSideInput",
              Create.of(KV.of("a", 1), KV.of("a", 1), KV.of("a", 2), KV.of("b", 3))
                  .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of())))
          .apply(View.asMultimap());

  PCollection<KV<String, Integer>> output =
      pipeline
          .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
          .apply(
              "OutputSideInputs",
              ParDo.of(
                      new DoFn<String, KV<String, Integer>>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
                            c.output(KV.of(c.element(), v));
                          }
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output)
      .containsInAnyOrder(
          KV.of("apple", 1),
          KV.of("apple", 1),
          KV.of("apple", 2),
          KV.of("banana", 3),
          KV.of("blackberry", 3));

  pipeline.run();
}
 
Example 27
@Test
public void testCombinesElementsInSlidingWindows() throws Exception {
  CombineFn<Long, ?, Long> combineFn = Sum.ofLongs();
  AppliedCombineFn<String, Long, ?, Long> appliedFn =
      AppliedCombineFn.withInputCoder(
          combineFn,
          CoderRegistry.createDefault(),
          KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));

  GroupAlsoByWindowProperties.combinesElementsInSlidingWindows(
      new CombiningGABWViaOutputBufferDoFnFactory<>(StringUtf8Coder.of(), appliedFn), combineFn);
}
 
Example 28
Source Project: DataflowTemplates   Source File: KafkaIO.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<V> input) {
  return input
      .apply(
          "Kafka values with default key",
          MapElements.via(
              new SimpleFunction<V, KV<K, V>>() {
                @Override
                public KV<K, V> apply(V element) {
                  return KV.of(null, element);
                }
              }))
      .setCoder(KvCoder.of(new NullOnlyCoder<>(), input.getCoder()))
      .apply(kvWriteTransform);
}
 
Example 29
Source Project: beam   Source File: GroupingTablesTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBufferingGroupingTable() throws Exception {
  GroupingTableBase<String, String, List<String>> table =
      (GroupingTableBase<String, String, List<String>>)
          GroupingTables.buffering(
              new IdentityGroupingKeyCreator(), new KvPairInfo(),
              new StringPowerSizeEstimator(), new StringPowerSizeEstimator());
  table.setMaxSize(1000);
  TestOutputReceiver receiver =
      new TestOutputReceiver(
          KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of())),
          NameContextsForTests.nameContextForTest());

  table.put("A", "a", receiver);
  table.put("B", "b1", receiver);
  table.put("B", "b2", receiver);
  table.put("C", "c", receiver);
  assertThat(receiver.outputElems, empty());

  table.put("C", "cccc", receiver);
  assertThat(receiver.outputElems, hasItem((Object) KV.of("C", Arrays.asList("c", "cccc"))));

  table.put("DDDD", "d", receiver);
  assertThat(receiver.outputElems, hasItem((Object) KV.of("DDDD", Arrays.asList("d"))));

  table.flush(receiver);
  assertThat(
      receiver.outputElems,
      IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
          KV.of("A", Arrays.asList("a")),
          KV.of("B", Arrays.asList("b1", "b2")),
          KV.of("C", Arrays.asList("c", "cccc")),
          KV.of("DDDD", Arrays.asList("d"))));
}
 
Example 30
Source Project: beam   Source File: GroupByKeyTranslatorBatch.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> transform,
    TranslationContext context) {

  @SuppressWarnings("unchecked")
  final PCollection<KV<K, V>> inputPCollection = (PCollection<KV<K, V>>) context.getInput();
  Dataset<WindowedValue<KV<K, V>>> input = context.getDataset(inputPCollection);
  WindowingStrategy<?, ?> windowingStrategy = inputPCollection.getWindowingStrategy();
  KvCoder<K, V> kvCoder = (KvCoder<K, V>) inputPCollection.getCoder();
  Coder<V> valueCoder = kvCoder.getValueCoder();

  // group by key only
  Coder<K> keyCoder = kvCoder.getKeyCoder();
  KeyValueGroupedDataset<K, WindowedValue<KV<K, V>>> groupByKeyOnly =
      input.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder));

  // group also by windows
  WindowedValue.FullWindowedValueCoder<KV<K, Iterable<V>>> outputCoder =
      WindowedValue.FullWindowedValueCoder.of(
          KvCoder.of(keyCoder, IterableCoder.of(valueCoder)),
          windowingStrategy.getWindowFn().windowCoder());
  Dataset<WindowedValue<KV<K, Iterable<V>>>> output =
      groupByKeyOnly.flatMapGroups(
          new GroupAlsoByWindowViaOutputBufferFn<>(
              windowingStrategy,
              new InMemoryStateInternalsFactory<>(),
              SystemReduceFn.buffering(valueCoder),
              context.getSerializableOptions()),
          EncoderHelpers.fromBeamCoder(outputCoder));

  context.putDataset(context.getOutput(), output);
}