Java Code Examples for org.apache.beam.sdk.transforms.Combine#GroupedValues

The following examples show how to use org.apache.beam.sdk.transforms.Combine#GroupedValues . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CombineTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public FunctionSpec translate(
    AppliedPTransform<?, ?, Combine.GroupedValues<?, ?, ?>> transform, SdkComponents components)
    throws IOException {
  if (transform.getTransform().getSideInputs().isEmpty()) {
    GlobalCombineFn<?, ?, ?> combineFn = transform.getTransform().getFn();
    Coder<?> accumulatorCoder =
        extractAccumulatorCoder(combineFn, (AppliedPTransform) transform);
    return FunctionSpec.newBuilder()
        .setUrn(getUrn(transform.getTransform()))
        .setPayload(combinePayload(combineFn, accumulatorCoder, components).toByteString())
        .build();
  } else {
    // Combines with side inputs are translated as generic composites, which have a blank
    // FunctionSpec.
    return null;
  }
}
 
Example 2
Source File: CombineTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT> Coder<AccumT> extractAccumulatorCoder(
    GlobalCombineFn<InputT, AccumT, ?> combineFn,
    AppliedPTransform<
            PCollection<KV<K, Iterable<InputT>>>, ?, Combine.GroupedValues<K, InputT, ?>>
        transform)
    throws IOException {
  try {
    @SuppressWarnings("unchecked")
    PCollection<KV<K, Iterable<InputT>>> mainInput =
        (PCollection<KV<K, Iterable<InputT>>>)
            Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform));
    KvCoder<K, Iterable<InputT>> kvCoder = (KvCoder<K, Iterable<InputT>>) mainInput.getCoder();
    IterableCoder<InputT> iterCoder = (IterableCoder<InputT>) kvCoder.getValueCoder();
    return combineFn.getAccumulatorCoder(
        transform.getPipeline().getCoderRegistry(), iterCoder.getElemCoder());
  } catch (CannotProvideCoderException e) {
    throw new IOException("Could not obtain a Coder for the accumulator", e);
  }
}
 
Example 3
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private <K, InputT, OutputT> void translateHelper(
    final CombineGroupedValues<K, InputT, OutputT> primitiveTransform,
    TranslationContext context) {
  Combine.GroupedValues<K, InputT, OutputT> originalTransform =
      primitiveTransform.getOriginalCombine();
  StepTranslationContext stepContext =
      context.addStep(primitiveTransform, "CombineValues");
  translateInputs(
      stepContext,
      context.getInput(primitiveTransform),
      originalTransform.getSideInputs(),
      context);

  AppliedCombineFn<? super K, ? super InputT, ?, OutputT> fn =
      originalTransform.getAppliedFn(
          context.getInput(primitiveTransform).getPipeline().getCoderRegistry(),
          context.getInput(primitiveTransform).getCoder(),
          context.getInput(primitiveTransform).getWindowingStrategy());

  stepContext.addEncodingInput(fn.getAccumulatorCoder());

  List<String> experiments = context.getPipelineOptions().getExperiments();
  boolean isFnApi = experiments != null && experiments.contains("beam_fn_api");

  if (isFnApi) {
    String ptransformId =
        context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentParent());
    stepContext.addInput(PropertyNames.SERIALIZED_FN, ptransformId);
  } else {
    stepContext.addInput(
        PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(fn)));
  }

  stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(primitiveTransform));
}
 
Example 4
Source File: CombineTranslation.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public String getUrn(Combine.GroupedValues<?, ?, ?> transform) {
  return COMBINE_GROUPED_VALUES_TRANSFORM_URN;
}
 
Example 5
Source File: TransformTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <K, InputT, OutputT>
    TransformEvaluator<Combine.GroupedValues<KV<K, InputT>, InputT, OutputT>> combineGrouped() {
  return new TransformEvaluator<Combine.GroupedValues<KV<K, InputT>, InputT, OutputT>>() {
    @Override
    public void evaluate(
        Combine.GroupedValues<KV<K, InputT>, InputT, OutputT> transform,
        EvaluationContext context) {
      @SuppressWarnings("unchecked")
      CombineWithContext.CombineFnWithContext<InputT, ?, OutputT> combineFn =
          (CombineWithContext.CombineFnWithContext<InputT, ?, OutputT>)
              CombineFnUtil.toFnWithContext(transform.getFn());
      final SparkCombineFn<KV<K, InputT>, InputT, ?, OutputT> sparkCombineFn =
          SparkCombineFn.keyed(
              combineFn,
              context.getSerializableOptions(),
              TranslationUtils.getSideInputs(transform.getSideInputs(), context),
              context.getInput(transform).getWindowingStrategy());

      @SuppressWarnings("unchecked")
      JavaRDD<WindowedValue<KV<K, Iterable<InputT>>>> inRDD =
          ((BoundedDataset<KV<K, Iterable<InputT>>>) context.borrowDataset(transform)).getRDD();

      @SuppressWarnings("unchecked")
      JavaRDD<WindowedValue<KV<K, OutputT>>> outRDD =
          inRDD.map(
              in ->
                  WindowedValue.of(
                      KV.of(
                          in.getValue().getKey(),
                          combineFn.apply(
                              in.getValue().getValue(), sparkCombineFn.ctxtForValue(in))),
                      in.getTimestamp(),
                      in.getWindows(),
                      in.getPane()));
      context.putDataset(transform, new BoundedDataset<>(outRDD));
    }

    @Override
    public String toNativeString() {
      return "map(new <fn>())";
    }
  };
}
 
Example 6
Source File: StreamingTransformTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <K, InputT, OutputT>
    TransformEvaluator<Combine.GroupedValues<K, InputT, OutputT>> combineGrouped() {
  return new TransformEvaluator<Combine.GroupedValues<K, InputT, OutputT>>() {
    @Override
    public void evaluate(
        final Combine.GroupedValues<K, InputT, OutputT> transform, EvaluationContext context) {
      // get the applied combine function.
      PCollection<? extends KV<K, ? extends Iterable<InputT>>> input =
          context.getInput(transform);
      final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
      @SuppressWarnings("unchecked")
      final CombineWithContext.CombineFnWithContext<InputT, ?, OutputT> fn =
          (CombineWithContext.CombineFnWithContext<InputT, ?, OutputT>)
              CombineFnUtil.toFnWithContext(transform.getFn());

      @SuppressWarnings("unchecked")
      UnboundedDataset<KV<K, Iterable<InputT>>> unboundedDataset =
          (UnboundedDataset<KV<K, Iterable<InputT>>>) context.borrowDataset(transform);
      JavaDStream<WindowedValue<KV<K, Iterable<InputT>>>> dStream = unboundedDataset.getDStream();

      final SerializablePipelineOptions options = context.getSerializableOptions();
      final SparkPCollectionView pviews = context.getPViews();

      JavaDStream<WindowedValue<KV<K, OutputT>>> outStream =
          dStream.transform(
              rdd -> {
                SparkCombineFn<KV<K, InputT>, InputT, ?, OutputT> combineFnWithContext =
                    SparkCombineFn.keyed(
                        fn,
                        options,
                        TranslationUtils.getSideInputs(
                            transform.getSideInputs(),
                            new JavaSparkContext(rdd.context()),
                            pviews),
                        windowingStrategy);
                return rdd.map(new TranslationUtils.CombineGroupedValues<>(combineFnWithContext));
              });

      context.putDataset(
          transform, new UnboundedDataset<>(outStream, unboundedDataset.getStreamSources()));
    }

    @Override
    public String toNativeString() {
      return "map(new <fn>())";
    }
  };
}
 
Example 7
Source File: DataflowRunner.java    From beam with Apache License 2.0 4 votes vote down vote up
public Combine.GroupedValues<K, InputT, OutputT> getOriginalCombine() {
  return original;
}