Java Code Examples for org.apache.beam.sdk.coders.KvCoder#getValueCoder()

The following examples show how to use org.apache.beam.sdk.coders.KvCoder#getValueCoder() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SortValues.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Retrieves the {@link Coder} for the secondary key-value pairs. */
@SuppressWarnings("unchecked")
private static <PrimaryKeyT, SecondaryKeyT, ValueT>
    KvCoder<SecondaryKeyT, ValueT> getSecondaryKeyValueCoder(
        Coder<KV<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>> inputCoder) {
  if (!(inputCoder instanceof KvCoder)) {
    throw new IllegalStateException("SortValues requires its input to use KvCoder");
  }
  @SuppressWarnings("unchecked")
  KvCoder<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>> kvCoder =
      (KvCoder<PrimaryKeyT, Iterable<KV<SecondaryKeyT, ValueT>>>) inputCoder;

  if (!(kvCoder.getValueCoder() instanceof IterableCoder)) {
    throw new IllegalStateException(
        "SortValues requires the values be encoded with IterableCoder");
  }
  IterableCoder<KV<SecondaryKeyT, ValueT>> iterableCoder =
      (IterableCoder<KV<SecondaryKeyT, ValueT>>) (kvCoder.getValueCoder());

  if (!(iterableCoder.getElemCoder() instanceof KvCoder)) {
    throw new IllegalStateException(
        "SortValues requires the secondary key-value pairs to use KvCoder");
  }
  return (KvCoder<SecondaryKeyT, ValueT>) (iterableCoder.getElemCoder());
}
 
Example 2
Source File: Reify.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<K, V>> expand(PCollection<KV<K, TimestampedValue<V>>> input) {
  KvCoder<K, TimestampedValue<V>> kvCoder = (KvCoder<K, TimestampedValue<V>>) input.getCoder();
  TimestampedValueCoder<V> tvCoder = (TimestampedValueCoder<V>) kvCoder.getValueCoder();
  return input
      .apply(
          ParDo.of(
              new DoFn<KV<K, TimestampedValue<V>>, KV<K, V>>() {
                @Override
                public Duration getAllowedTimestampSkew() {
                  return Duration.millis(Long.MAX_VALUE);
                }

                @ProcessElement
                public void processElement(
                    @Element KV<K, TimestampedValue<V>> kv, OutputReceiver<KV<K, V>> r) {
                  r.outputWithTimestamp(
                      KV.of(kv.getKey(), kv.getValue().getValue()),
                      kv.getValue().getTimestamp());
                }
              }))
      .setCoder(KvCoder.of(kvCoder.getKeyCoder(), tvCoder.getValueCoder()));
}
 
Example 3
Source File: Combine.java    From beam with Apache License 2.0 6 votes vote down vote up
private KvCoder<K, InputT> getKvCoder(
    Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder) {
  if (!(inputCoder instanceof KvCoder)) {
    throw new IllegalStateException("Combine.GroupedValues requires its input to use KvCoder");
  }
  @SuppressWarnings({"unchecked", "rawtypes"})
  KvCoder<K, ? extends Iterable<InputT>> kvCoder = (KvCoder) inputCoder;
  Coder<K> keyCoder = kvCoder.getKeyCoder();
  Coder<? extends Iterable<InputT>> kvValueCoder = kvCoder.getValueCoder();
  if (!(kvValueCoder instanceof IterableCoder)) {
    throw new IllegalStateException(
        "Combine.GroupedValues requires its input values to use " + "IterableCoder");
  }
  @SuppressWarnings("unchecked")
  IterableCoder<InputT> inputValuesCoder = (IterableCoder<InputT>) kvValueCoder;
  Coder<InputT> inputValueCoder = inputValuesCoder.getElemCoder();
  return KvCoder.of(keyCoder, inputValueCoder);
}
 
Example 4
Source File: CombineTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT> Coder<AccumT> extractAccumulatorCoder(
    GlobalCombineFn<InputT, AccumT, ?> combineFn,
    AppliedPTransform<
            PCollection<KV<K, Iterable<InputT>>>, ?, Combine.GroupedValues<K, InputT, ?>>
        transform)
    throws IOException {
  try {
    @SuppressWarnings("unchecked")
    PCollection<KV<K, Iterable<InputT>>> mainInput =
        (PCollection<KV<K, Iterable<InputT>>>)
            Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform));
    KvCoder<K, Iterable<InputT>> kvCoder = (KvCoder<K, Iterable<InputT>>) mainInput.getCoder();
    IterableCoder<InputT> iterCoder = (IterableCoder<InputT>) kvCoder.getValueCoder();
    return combineFn.getAccumulatorCoder(
        transform.getPipeline().getCoderRegistry(), iterCoder.getElemCoder());
  } catch (CannotProvideCoderException e) {
    throw new IOException("Could not obtain a Coder for the accumulator", e);
  }
}
 
Example 5
Source File: GroupByKeyViaGroupByKeyOnly.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<K, Iterable<V>>> expand(
    PCollection<KV<K, Iterable<WindowedValue<V>>>> input) {
  @SuppressWarnings("unchecked")
  KvCoder<K, Iterable<WindowedValue<V>>> inputKvCoder =
      (KvCoder<K, Iterable<WindowedValue<V>>>) input.getCoder();

  Coder<K> keyCoder = inputKvCoder.getKeyCoder();
  Coder<Iterable<WindowedValue<V>>> inputValueCoder = inputKvCoder.getValueCoder();

  IterableCoder<WindowedValue<V>> inputIterableValueCoder =
      (IterableCoder<WindowedValue<V>>) inputValueCoder;
  Coder<WindowedValue<V>> inputIterableElementCoder = inputIterableValueCoder.getElemCoder();
  WindowedValueCoder<V> inputIterableWindowedValueCoder =
      (WindowedValueCoder<V>) inputIterableElementCoder;

  Coder<V> inputIterableElementValueCoder = inputIterableWindowedValueCoder.getValueCoder();
  Coder<Iterable<V>> outputValueCoder = IterableCoder.of(inputIterableElementValueCoder);
  Coder<KV<K, Iterable<V>>> outputKvCoder = KvCoder.of(keyCoder, outputValueCoder);

  return PCollection.createPrimitiveOutputInternal(
      input.getPipeline(), windowingStrategy, input.isBounded(), outputKvCoder);
}
 
Example 6
Source File: CoGroupByKey.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the value coder for the given PCollection. Assumes that the value coder is an instance
 * of {@code KvCoder<K, V>}.
 */
private <V> Coder<V> getValueCoder(PCollection<KV<K, V>> pCollection) {
  // Assumes that the PCollection uses a KvCoder.
  Coder<?> entryCoder = pCollection.getCoder();
  if (!(entryCoder instanceof KvCoder<?, ?>)) {
    throw new IllegalArgumentException("PCollection does not use a KvCoder");
  }
  @SuppressWarnings("unchecked")
  KvCoder<K, V> coder = (KvCoder<K, V>) entryCoder;
  return coder.getValueCoder();
}
 
Example 7
Source File: GroupByKeyTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
  PCollection<KV<K, V>> input = context.getInput(transform);
  BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
  final KvCoder<K, V> coder = (KvCoder<K, V>) input.getCoder();
  Coder<K> inputKeyCoder = coder.getKeyCoder();
  WindowingStrategy windowingStrategy = input.getWindowingStrategy();
  WindowFn<KV<K, V>, BoundedWindow> windowFn =
      (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
  final WindowedValue.WindowedValueCoder<V> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
  KeyedTSet<byte[], byte[]> keyedTSet =
      inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));

  // todo add support for a partition function to be specified, this would use
  // todo keyedPartition function instead of KeyedGather
  ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>>
      groupedbyKeyTset =
          keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));

  // --- now group also by window.
  SystemReduceFnBuffering reduceFnBuffering = new SystemReduceFnBuffering(coder.getValueCoder());
  ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>>
      outputTset =
          groupedbyKeyTset
              .direct()
              .<WindowedValue<KV<K, Iterable<V>>>>flatmap(
                  new GroupByWindowFunction(
                      windowingStrategy, reduceFnBuffering, context.getOptions()));
  PCollection output = context.getOutput(transform);
  context.setOutputDataSet(output, outputTset);
}
 
Example 8
Source File: GroupByKeyTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> transform,
    TranslationContext context) {

  @SuppressWarnings("unchecked")
  final PCollection<KV<K, V>> inputPCollection = (PCollection<KV<K, V>>) context.getInput();
  Dataset<WindowedValue<KV<K, V>>> input = context.getDataset(inputPCollection);
  WindowingStrategy<?, ?> windowingStrategy = inputPCollection.getWindowingStrategy();
  KvCoder<K, V> kvCoder = (KvCoder<K, V>) inputPCollection.getCoder();
  Coder<V> valueCoder = kvCoder.getValueCoder();

  // group by key only
  Coder<K> keyCoder = kvCoder.getKeyCoder();
  KeyValueGroupedDataset<K, WindowedValue<KV<K, V>>> groupByKeyOnly =
      input.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder));

  // group also by windows
  WindowedValue.FullWindowedValueCoder<KV<K, Iterable<V>>> outputCoder =
      WindowedValue.FullWindowedValueCoder.of(
          KvCoder.of(keyCoder, IterableCoder.of(valueCoder)),
          windowingStrategy.getWindowFn().windowCoder());
  Dataset<WindowedValue<KV<K, Iterable<V>>>> output =
      groupByKeyOnly.flatMapGroups(
          new GroupAlsoByWindowViaOutputBufferFn<>(
              windowingStrategy,
              new InMemoryStateInternalsFactory<>(),
              SystemReduceFn.buffering(valueCoder),
              context.getSerializableOptions()),
          EncoderHelpers.fromBeamCoder(outputCoder));

  context.putDataset(context.getOutput(), output);
}
 
Example 9
Source File: SparkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <K, V> void translateGroupByKey(
    PTransformNode transformNode, RunnerApi.Pipeline pipeline, SparkTranslationContext context) {

  RunnerApi.Components components = pipeline.getComponents();
  String inputId = getInputId(transformNode);
  Dataset inputDataset = context.popDataset(inputId);
  JavaRDD<WindowedValue<KV<K, V>>> inputRdd = ((BoundedDataset<KV<K, V>>) inputDataset).getRDD();
  WindowedValueCoder<KV<K, V>> inputCoder = getWindowedValueCoder(inputId, components);
  KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) inputCoder.getValueCoder();
  Coder<K> inputKeyCoder = inputKvCoder.getKeyCoder();
  Coder<V> inputValueCoder = inputKvCoder.getValueCoder();
  WindowingStrategy windowingStrategy = getWindowingStrategy(inputId, components);
  WindowFn<Object, BoundedWindow> windowFn = windowingStrategy.getWindowFn();
  WindowedValue.WindowedValueCoder<V> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(inputValueCoder, windowFn.windowCoder());

  JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupedByKeyAndWindow;
  Partitioner partitioner = getPartitioner(context);
  if (GroupNonMergingWindowsFunctions.isEligibleForGroupByWindow(windowingStrategy)) {
    // we can have a memory sensitive translation for non-merging windows
    groupedByKeyAndWindow =
        GroupNonMergingWindowsFunctions.groupByKeyAndWindow(
            inputRdd, inputKeyCoder, inputValueCoder, windowingStrategy, partitioner);
  } else {
    JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupedByKeyOnly =
        GroupCombineFunctions.groupByKeyOnly(inputRdd, inputKeyCoder, wvCoder, partitioner);
    // for batch, GroupAlsoByWindow uses an in-memory StateInternals.
    groupedByKeyAndWindow =
        groupedByKeyOnly.flatMap(
            new SparkGroupAlsoByWindowViaOutputBufferFn<>(
                windowingStrategy,
                new TranslationUtils.InMemoryStateInternalsFactory<>(),
                SystemReduceFn.buffering(inputValueCoder),
                context.serializablePipelineOptions));
  }
  context.pushDataset(getOutputId(transformNode), new BoundedDataset<>(groupedByKeyAndWindow));
}
 
Example 10
Source File: GroupingShuffleReader.java    From beam with Apache License 2.0 5 votes vote down vote up
private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder, boolean valuesAreSorted)
    throws Exception {
  if (!(coder instanceof WindowedValueCoder)) {
    throw new Exception("unexpected kind of coder for WindowedValue: " + coder);
  }
  Coder<KV<K, Iterable<V>>> elemCoder =
      ((WindowedValueCoder<KV<K, Iterable<V>>>) coder).getValueCoder();
  if (!(elemCoder instanceof KvCoder)) {
    throw new Exception(
        "unexpected kind of coder for elements read from "
            + "a key-grouping shuffle: "
            + elemCoder);
  }

  @SuppressWarnings("unchecked")
  KvCoder<K, Iterable<V>> kvCoder = (KvCoder<K, Iterable<V>>) elemCoder;
  this.keyCoder = kvCoder.getKeyCoder();
  Coder<Iterable<V>> kvValueCoder = kvCoder.getValueCoder();
  if (!(kvValueCoder instanceof IterableCoder)) {
    throw new Exception(
        "unexpected kind of coder for values of KVs read from " + "a key-grouping shuffle");
  }
  IterableCoder<V> iterCoder = (IterableCoder<V>) kvValueCoder;
  if (valuesAreSorted) {
    checkState(
        iterCoder.getElemCoder() instanceof KvCoder,
        "unexpected kind of coder for elements read from a "
            + "key-grouping value sorting shuffle: %s",
        iterCoder.getElemCoder());
    @SuppressWarnings("rawtypes")
    KvCoder<?, ?> valueKvCoder = (KvCoder) iterCoder.getElemCoder();
    this.secondaryKeyCoder = valueKvCoder.getKeyCoder();
    this.valueCoder = valueKvCoder.getValueCoder();
  } else {
    this.valueCoder = iterCoder.getElemCoder();
  }
}
 
Example 11
Source File: MultiStepCombine.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<K, OutputT>> expand(PCollection<KV<K, InputT>> input) {
  checkArgument(
      input.getCoder() instanceof KvCoder,
      "Expected input to have a %s of type %s, got %s",
      Coder.class.getSimpleName(),
      KvCoder.class.getSimpleName(),
      input.getCoder());
  KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) input.getCoder();
  Coder<InputT> inputValueCoder = inputCoder.getValueCoder();
  Coder<AccumT> accumulatorCoder;
  try {
    accumulatorCoder =
        combineFn.getAccumulatorCoder(input.getPipeline().getCoderRegistry(), inputValueCoder);
  } catch (CannotProvideCoderException e) {
    throw new IllegalStateException(
        String.format(
            "Could not construct an Accumulator Coder with the provided %s %s",
            CombineFn.class.getSimpleName(), combineFn),
        e);
  }
  return input
      .apply(
          ParDo.of(
              new CombineInputs<>(
                  combineFn,
                  input.getWindowingStrategy().getTimestampCombiner(),
                  inputCoder.getKeyCoder())))
      .setCoder(KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder))
      .apply(GroupByKey.create())
      .apply(new MergeAndExtractAccumulatorOutput<>(combineFn, outputCoder));
}
 
Example 12
Source File: FnApiStateAccessor.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
@Nullable
public <T> T get(PCollectionView<T> view, BoundedWindow window) {
  TupleTag<?> tag = view.getTagInternal();

  SideInputSpec sideInputSpec = sideInputSpecMap.get(tag);
  checkArgument(sideInputSpec != null, "Attempting to access unknown side input %s.", view);

  ByteString.Output encodedWindowOut = ByteString.newOutput();
  try {
    sideInputSpec
        .getWindowCoder()
        .encode(sideInputSpec.getWindowMappingFn().getSideInputWindow(window), encodedWindowOut);
  } catch (IOException e) {
    throw new IllegalStateException(e);
  }
  ByteString encodedWindow = encodedWindowOut.toByteString();
  StateKey.Builder cacheKeyBuilder = StateKey.newBuilder();
  Object sideInputAccessor;

  switch (sideInputSpec.getAccessPattern()) {
    case Materializations.ITERABLE_MATERIALIZATION_URN:
      cacheKeyBuilder
          .getIterableSideInputBuilder()
          .setTransformId(ptransformId)
          .setSideInputId(tag.getId())
          .setWindow(encodedWindow);
      sideInputAccessor =
          new IterableSideInput<>(
              beamFnStateClient,
              processBundleInstructionId.get(),
              ptransformId,
              tag.getId(),
              encodedWindow,
              sideInputSpec.getCoder());
      break;

    case Materializations.MULTIMAP_MATERIALIZATION_URN:
      checkState(
          sideInputSpec.getCoder() instanceof KvCoder,
          "Expected %s but received %s.",
          KvCoder.class,
          sideInputSpec.getCoder().getClass());
      KvCoder<?, ?> kvCoder = (KvCoder) sideInputSpec.getCoder();
      cacheKeyBuilder
          .getMultimapSideInputBuilder()
          .setTransformId(ptransformId)
          .setSideInputId(tag.getId())
          .setWindow(encodedWindow);
      sideInputAccessor =
          new MultimapSideInput<>(
              beamFnStateClient,
              processBundleInstructionId.get(),
              ptransformId,
              tag.getId(),
              encodedWindow,
              kvCoder.getKeyCoder(),
              kvCoder.getValueCoder());
      break;

    default:
      throw new IllegalStateException(
          String.format(
              "This SDK is only capable of dealing with %s materializations "
                  + "but was asked to handle %s for PCollectionView with tag %s.",
              ImmutableList.of(
                  Materializations.ITERABLE_MATERIALIZATION_URN,
                  Materializations.MULTIMAP_MATERIALIZATION_URN),
              sideInputSpec.getAccessPattern(),
              tag));
  }

  return (T)
      stateKeyObjectCache.computeIfAbsent(
          cacheKeyBuilder.build(), key -> sideInputSpec.getViewFn().apply(sideInputAccessor));
}
 
Example 13
Source File: WindowGroupP.java    From beam with Apache License 2.0 4 votes vote down vote up
private WindowGroupP(
    SerializablePipelineOptions pipelineOptions,
    WindowedValue.WindowedValueCoder<KV<K, V>> inputCoder,
    Coder outputCoder,
    WindowingStrategy<V, BoundedWindow> windowingStrategy,
    String ownerId) {
  this.pipelineOptions = pipelineOptions;
  KvCoder<K, V> inputValueCoder = (KvCoder<K, V>) inputCoder.getValueCoder();
  this.inputValueValueCoder = inputValueCoder.getValueCoder();
  this.outputCoder = outputCoder;
  this.windowingStrategy = windowingStrategy;
  this.ownerId = ownerId;

  this.flatMapper =
      flatMapper(
          item -> {
            if (COMPLETE_MARKER == item) {
              long millis = BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis();
              advanceWatermark(millis);
            } else if (TRY_PROCESS_MARKER == item) {
              Instant now = Instant.now();
              if (now.getMillis() - lastProcessingTime > PROCESSING_TIME_MIN_INCREMENT) {
                lastProcessingTime = now.getMillis();
                advanceProcessingTime(now);
              }
            } else if (item instanceof Watermark) {
              advanceWatermark(((Watermark) item).timestamp());
              appendableTraverser.append(item);
            } else {
              WindowedValue<KV<K, V>> windowedValue =
                  Utils.decodeWindowedValue((byte[]) item, inputCoder);
              KV<K, V> kv = windowedValue.getValue();
              K key = kv.getKey();
              V value = kv.getValue();
              Utils.ByteArrayKey keyBytes =
                  new Utils.ByteArrayKey(Utils.encode(key, inputValueCoder.getKeyCoder()));
              WindowedValue<V> updatedWindowedValue =
                  WindowedValue.of(
                      value,
                      windowedValue.getTimestamp(),
                      windowedValue.getWindows(),
                      windowedValue.getPane());
              keyManagers
                  .computeIfAbsent(keyBytes, x -> new KeyManager(key))
                  .processElement(updatedWindowedValue);
            }
            return appendableTraverser;
          });
}
 
Example 14
Source File: CombinePerKeyTranslatorBatch.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform,
    TranslationContext context) {

  Combine.PerKey combineTransform = (Combine.PerKey) transform;
  @SuppressWarnings("unchecked")
  final PCollection<KV<K, InputT>> input = (PCollection<KV<K, InputT>>) context.getInput();
  @SuppressWarnings("unchecked")
  final PCollection<KV<K, OutputT>> output = (PCollection<KV<K, OutputT>>) context.getOutput();
  @SuppressWarnings("unchecked")
  final Combine.CombineFn<InputT, AccumT, OutputT> combineFn =
      (Combine.CombineFn<InputT, AccumT, OutputT>) combineTransform.getFn();
  WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();

  Dataset<WindowedValue<KV<K, InputT>>> inputDataset = context.getDataset(input);

  KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) input.getCoder();
  Coder<K> keyCoder = inputCoder.getKeyCoder();
  KvCoder<K, OutputT> outputKVCoder = (KvCoder<K, OutputT>) output.getCoder();
  Coder<OutputT> outputCoder = outputKVCoder.getValueCoder();

  KeyValueGroupedDataset<K, WindowedValue<KV<K, InputT>>> groupedDataset =
      inputDataset.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder));

  Coder<AccumT> accumulatorCoder = null;
  try {
    accumulatorCoder =
        combineFn.getAccumulatorCoder(
            input.getPipeline().getCoderRegistry(), inputCoder.getValueCoder());
  } catch (CannotProvideCoderException e) {
    throw new RuntimeException(e);
  }

  Dataset<Tuple2<K, Iterable<WindowedValue<OutputT>>>> combinedDataset =
      groupedDataset.agg(
          new AggregatorCombiner<K, InputT, AccumT, OutputT, BoundedWindow>(
                  combineFn, windowingStrategy, accumulatorCoder, outputCoder)
              .toColumn());

  // expand the list into separate elements and put the key back into the elements
  WindowedValue.WindowedValueCoder<KV<K, OutputT>> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(
          outputKVCoder, input.getWindowingStrategy().getWindowFn().windowCoder());
  Dataset<WindowedValue<KV<K, OutputT>>> outputDataset =
      combinedDataset.flatMap(
          (FlatMapFunction<
                  Tuple2<K, Iterable<WindowedValue<OutputT>>>, WindowedValue<KV<K, OutputT>>>)
              tuple2 -> {
                K key = tuple2._1();
                Iterable<WindowedValue<OutputT>> windowedValues = tuple2._2();
                List<WindowedValue<KV<K, OutputT>>> result = new ArrayList<>();
                for (WindowedValue<OutputT> windowedValue : windowedValues) {
                  KV<K, OutputT> kv = KV.of(key, windowedValue.getValue());
                  result.add(
                      WindowedValue.of(
                          kv,
                          windowedValue.getTimestamp(),
                          windowedValue.getWindows(),
                          windowedValue.getPane()));
                }
                return result.iterator();
              },
          EncoderHelpers.fromBeamCoder(wvCoder));
  context.putDataset(output, outputDataset);
}
 
Example 15
Source File: ShuffleSink.java    From beam with Apache License 2.0 4 votes vote down vote up
private void initCoder(Coder<WindowedValue<T>> coder) throws Exception {
  switch (shuffleKind) {
    case UNGROUPED:
      this.shardByKey = false;
      this.groupValues = false;
      this.sortValues = false;
      break;
    case PARTITION_KEYS:
      this.shardByKey = true;
      this.groupValues = false;
      this.sortValues = false;
      break;
    case GROUP_KEYS:
      this.shardByKey = true;
      this.groupValues = true;
      this.sortValues = false;
      break;
    case GROUP_KEYS_AND_SORT_VALUES:
      this.shardByKey = true;
      this.groupValues = true;
      this.sortValues = true;
      break;
    default:
      throw new AssertionError("unexpected shuffle kind");
  }

  this.windowedElemCoder = (WindowedValueCoder<T>) coder;
  this.elemCoder = windowedElemCoder.getValueCoder();
  if (shardByKey) {
    if (!(elemCoder instanceof KvCoder)) {
      throw new Exception(
          String.format(
              "Unexpected kind of coder for elements written to a key-grouping shuffle %s.",
              elemCoder));
    }
    KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
    this.keyCoder = kvCoder.getKeyCoder();
    this.valueCoder = kvCoder.getValueCoder();
    if (sortValues) {
      // TODO: Decide the representation of sort-keyed values.
      // For now, we'll just use KVs.
      if (!(valueCoder instanceof KvCoder)) {
        throw new Exception(
            String.format(
                "Unexpected kind of coder for values written to a value-sorting shuffle %s.",
                valueCoder));
      }
      KvCoder<?, ?> kvValueCoder = (KvCoder<?, ?>) valueCoder;
      this.sortKeyCoder = kvValueCoder.getKeyCoder();
      this.sortValueCoder = kvValueCoder.getValueCoder();
    } else {
      this.sortKeyCoder = null;
      this.sortValueCoder = null;
    }
    if (groupValues) {
      this.windowedValueCoder = null;
    } else {
      this.windowedValueCoder = this.windowedElemCoder.withValueCoder(this.valueCoder);
    }
  } else {
    this.keyCoder = null;
    this.valueCoder = null;
    this.sortKeyCoder = null;
    this.sortValueCoder = null;
    this.windowedValueCoder = null;
  }
}
 
Example 16
Source File: DataflowSideInputHandlerFactory.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public <K, V, W extends BoundedWindow> MultimapSideInputHandler<K, V, W> forMultimapSideInput(
    String pTransformId, String sideInputId, KvCoder<K, V> elementCoder, Coder<W> windowCoder) {
  checkArgument(
      pTransformId != null && pTransformId.length() > 0, "Expect a valid PTransform ID.");

  SideInputReader sideInputReader = ptransformIdToSideInputReader.get(pTransformId);
  checkState(sideInputReader != null, String.format("Unknown PTransform '%s'", pTransformId));

  PCollectionView<Materializations.MultimapView<Object, Object>> view =
      (PCollectionView<Materializations.MultimapView<Object, Object>>)
          sideInputIdToPCollectionViewMap.get(
              RunnerApi.ExecutableStagePayload.SideInputId.newBuilder()
                  .setTransformId(pTransformId)
                  .setLocalName(sideInputId)
                  .build());
  checkState(
      view != null,
      String.format("Unknown side input '%s' on PTransform '%s'", sideInputId, pTransformId));

  checkState(
      Materializations.MULTIMAP_MATERIALIZATION_URN.equals(
          view.getViewFn().getMaterialization().getUrn()),
      String.format(
          "Unknown materialization for side input '%s' on PTransform '%s' with urn '%s'",
          sideInputId, pTransformId, view.getViewFn().getMaterialization().getUrn()));

  checkState(
      view.getCoderInternal() instanceof KvCoder,
      String.format(
          "Materialization of side input '%s' on PTransform '%s' expects %s but received %s.",
          sideInputId,
          pTransformId,
          KvCoder.class.getSimpleName(),
          view.getCoderInternal().getClass().getSimpleName()));

  KvCoder<K, V> kvCoder = elementCoder;

  return new DataflowMultimapSideInputHandler<>(
      sideInputReader, view, kvCoder.getKeyCoder(), kvCoder.getValueCoder(), windowCoder);
}