Java Code Examples for org.apache.beam.sdk.transforms.Combine#CombineFn

The following examples show how to use org.apache.beam.sdk.transforms.Combine#CombineFn . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AggregatorCombiner.java    From beam with Apache License 2.0 7 votes vote down vote up
public AggregatorCombiner(
    Combine.CombineFn<InputT, AccumT, OutputT> combineFn,
    WindowingStrategy<?, ?> windowingStrategy,
    Coder<AccumT> accumulatorCoder,
    Coder<OutputT> outputCoder) {
  this.combineFn = combineFn;
  this.windowingStrategy = (WindowingStrategy<InputT, W>) windowingStrategy;
  this.timestampCombiner = windowingStrategy.getTimestampCombiner();
  this.accumulatorCoder =
      IterableCoder.of(
          WindowedValue.FullWindowedValueCoder.of(
              accumulatorCoder, windowingStrategy.getWindowFn().windowCoder()));
  this.outputCoder =
      IterableCoder.of(
          WindowedValue.FullWindowedValueCoder.of(
              outputCoder, windowingStrategy.getWindowFn().windowCoder()));
}
 
Example 2
Source File: FlinkBroadcastStateInternals.java    From beam with Apache License 2.0 5 votes vote down vote up
FlinkKeyedCombiningState(
    OperatorStateBackend flinkStateBackend,
    StateTag<CombiningState<InputT, AccumT, OutputT>> address,
    Combine.CombineFn<InputT, AccumT, OutputT> combineFn,
    StateNamespace namespace,
    Coder<AccumT> accumCoder,
    FlinkBroadcastStateInternals<K2> flinkStateInternals) {
  super(flinkStateBackend, address.getId(), namespace, accumCoder);

  this.namespace = namespace;
  this.address = address;
  this.combineFn = combineFn;
  this.flinkStateInternals = flinkStateInternals;
}
 
Example 3
Source File: FlinkBroadcastStateInternals.java    From beam with Apache License 2.0 5 votes vote down vote up
FlinkCombiningState(
    OperatorStateBackend flinkStateBackend,
    StateTag<CombiningState<InputT, AccumT, OutputT>> address,
    Combine.CombineFn<InputT, AccumT, OutputT> combineFn,
    StateNamespace namespace,
    Coder<AccumT> accumCoder) {
  super(flinkStateBackend, address.getId(), namespace, accumCoder);

  this.namespace = namespace;
  this.address = address;
  this.combineFn = combineFn;
}
 
Example 4
Source File: SamzaStoreStateInternals.java    From beam with Apache License 2.0 5 votes vote down vote up
protected SamzaAccumulatorCombiningState(
    StateNamespace namespace,
    StateTag<? extends State> address,
    Coder<AccumT> coder,
    Combine.CombineFn<InT, AccumT, OutT> combineFn) {
  super(namespace, address, coder);

  this.combineFn = combineFn;
}
 
Example 5
Source File: CombineValuesFnFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCombineValuesFnAdd() throws Exception {
  TestReceiver receiver = new TestReceiver();
  MeanInts mean = new MeanInts();

  Combine.CombineFn<Integer, CountSum, String> combiner = mean;

  ParDoFn combineParDoFn =
      createCombineValuesFn(
          CombinePhase.ADD,
          combiner,
          StringUtf8Coder.of(),
          BigEndianIntegerCoder.of(),
          new CountSumCoder(),
          WindowingStrategy.globalDefault());

  combineParDoFn.startBundle(receiver);
  combineParDoFn.processElement(
      WindowedValue.valueInGlobalWindow(KV.of("a", Arrays.asList(5, 6, 7))));
  combineParDoFn.processElement(
      WindowedValue.valueInGlobalWindow(KV.of("b", Arrays.asList(1, 3, 7))));
  combineParDoFn.processElement(
      WindowedValue.valueInGlobalWindow(KV.of("c", Arrays.asList(3, 6, 8, 9))));
  combineParDoFn.finishBundle();

  Object[] expectedReceivedElems = {
    WindowedValue.valueInGlobalWindow(KV.of("a", new CountSum(3, 18))),
    WindowedValue.valueInGlobalWindow(KV.of("b", new CountSum(3, 11))),
    WindowedValue.valueInGlobalWindow(KV.of("c", new CountSum(4, 26)))
  };
  assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
}
 
Example 6
Source File: CombineValuesFnFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCombineValuesFnMerge() throws Exception {
  TestReceiver receiver = new TestReceiver();
  MeanInts mean = new MeanInts();

  Combine.CombineFn<Integer, CountSum, String> combiner = mean;

  ParDoFn combineParDoFn =
      createCombineValuesFn(
          CombinePhase.MERGE,
          combiner,
          StringUtf8Coder.of(),
          BigEndianIntegerCoder.of(),
          new CountSumCoder(),
          WindowingStrategy.globalDefault());

  combineParDoFn.startBundle(receiver);
  combineParDoFn.processElement(
      WindowedValue.valueInGlobalWindow(
          KV.of(
              "a", Arrays.asList(new CountSum(3, 6), new CountSum(2, 9), new CountSum(1, 12)))));
  combineParDoFn.processElement(
      WindowedValue.valueInGlobalWindow(
          KV.of("b", Arrays.asList(new CountSum(2, 20), new CountSum(1, 1)))));
  combineParDoFn.finishBundle();

  Object[] expectedReceivedElems = {
    WindowedValue.valueInGlobalWindow(KV.of("a", new CountSum(6, 27))),
    WindowedValue.valueInGlobalWindow(KV.of("b", new CountSum(3, 21))),
  };
  assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
}
 
Example 7
Source File: CombineValuesFnFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCombineValuesFnExtract() throws Exception {
  TestReceiver receiver = new TestReceiver();
  MeanInts mean = new MeanInts();

  Combine.CombineFn<Integer, CountSum, String> combiner = mean;

  ParDoFn combineParDoFn =
      createCombineValuesFn(
          CombinePhase.EXTRACT,
          combiner,
          StringUtf8Coder.of(),
          BigEndianIntegerCoder.of(),
          new CountSumCoder(),
          WindowingStrategy.globalDefault());

  combineParDoFn.startBundle(receiver);
  combineParDoFn.processElement(
      WindowedValue.valueInGlobalWindow(KV.of("a", new CountSum(6, 27))));
  combineParDoFn.processElement(
      WindowedValue.valueInGlobalWindow(KV.of("b", new CountSum(3, 21))));
  combineParDoFn.finishBundle();

  assertArrayEquals(
      new Object[] {
        WindowedValue.valueInGlobalWindow(KV.of("a", String.format("%.1f", 4.5))),
        WindowedValue.valueInGlobalWindow(KV.of("b", String.format("%.1f", 7.0)))
      },
      receiver.receivedElems.toArray());
}
 
Example 8
Source File: StateBinder.java    From beam with Apache License 2.0 4 votes vote down vote up
<InputT, AccumT, OutputT> CombiningState<InputT, AccumT, OutputT> bindCombining(
String id,
StateSpec<CombiningState<InputT, AccumT, OutputT>> spec,
Coder<AccumT> accumCoder,
Combine.CombineFn<InputT, AccumT, OutputT> combineFn);
 
Example 9
Source File: StateSpec.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public ResultT dispatchCombining(Combine.CombineFn<?, ?, ?> combineFn, Coder<?> accumCoder) {
  return dispatchDefault();
}
 
Example 10
Source File: ParDoTranslation.java    From beam with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
static StateSpec<?> fromProto(RunnerApi.StateSpec stateSpec, RehydratedComponents components)
    throws IOException {
  switch (stateSpec.getSpecCase()) {
    case READ_MODIFY_WRITE_SPEC:
      return StateSpecs.value(
          components.getCoder(stateSpec.getReadModifyWriteSpec().getCoderId()));
    case BAG_SPEC:
      return StateSpecs.bag(components.getCoder(stateSpec.getBagSpec().getElementCoderId()));
    case COMBINING_SPEC:
      FunctionSpec combineFnSpec = stateSpec.getCombiningSpec().getCombineFn();

      if (!combineFnSpec.getUrn().equals(CombineTranslation.JAVA_SERIALIZED_COMBINE_FN_URN)) {
        throw new UnsupportedOperationException(
            String.format(
                "Cannot create %s from non-Java %s: %s",
                StateSpec.class.getSimpleName(),
                Combine.CombineFn.class.getSimpleName(),
                combineFnSpec.getUrn()));
      }

      Combine.CombineFn<?, ?, ?> combineFn =
          (Combine.CombineFn<?, ?, ?>)
              SerializableUtils.deserializeFromByteArray(
                  combineFnSpec.getPayload().toByteArray(),
                  Combine.CombineFn.class.getSimpleName());

      // Rawtype coder cast because it is required to be a valid accumulator coder
      // for the CombineFn, by construction
      return StateSpecs.combining(
          (Coder) components.getCoder(stateSpec.getCombiningSpec().getAccumulatorCoderId()),
          combineFn);

    case MAP_SPEC:
      return StateSpecs.map(
          components.getCoder(stateSpec.getMapSpec().getKeyCoderId()),
          components.getCoder(stateSpec.getMapSpec().getValueCoderId()));

    case SET_SPEC:
      return StateSpecs.set(components.getCoder(stateSpec.getSetSpec().getElementCoderId()));

    case SPEC_NOT_SET:
    default:
      throw new IllegalArgumentException(
          String.format("Unknown %s: %s", RunnerApi.StateSpec.class.getName(), stateSpec));
  }
}
 
Example 11
Source File: AsList.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
public static <T> Combine.CombineFn<T, List<T>, List<T>> fn() {
  return new Impl<>();
}
 
Example 12
Source File: CombinePerKeyTranslatorBatch.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform,
    TranslationContext context) {

  Combine.PerKey combineTransform = (Combine.PerKey) transform;
  @SuppressWarnings("unchecked")
  final PCollection<KV<K, InputT>> input = (PCollection<KV<K, InputT>>) context.getInput();
  @SuppressWarnings("unchecked")
  final PCollection<KV<K, OutputT>> output = (PCollection<KV<K, OutputT>>) context.getOutput();
  @SuppressWarnings("unchecked")
  final Combine.CombineFn<InputT, AccumT, OutputT> combineFn =
      (Combine.CombineFn<InputT, AccumT, OutputT>) combineTransform.getFn();
  WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();

  Dataset<WindowedValue<KV<K, InputT>>> inputDataset = context.getDataset(input);

  KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) input.getCoder();
  Coder<K> keyCoder = inputCoder.getKeyCoder();
  KvCoder<K, OutputT> outputKVCoder = (KvCoder<K, OutputT>) output.getCoder();
  Coder<OutputT> outputCoder = outputKVCoder.getValueCoder();

  KeyValueGroupedDataset<K, WindowedValue<KV<K, InputT>>> groupedDataset =
      inputDataset.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder));

  Coder<AccumT> accumulatorCoder = null;
  try {
    accumulatorCoder =
        combineFn.getAccumulatorCoder(
            input.getPipeline().getCoderRegistry(), inputCoder.getValueCoder());
  } catch (CannotProvideCoderException e) {
    throw new RuntimeException(e);
  }

  Dataset<Tuple2<K, Iterable<WindowedValue<OutputT>>>> combinedDataset =
      groupedDataset.agg(
          new AggregatorCombiner<K, InputT, AccumT, OutputT, BoundedWindow>(
                  combineFn, windowingStrategy, accumulatorCoder, outputCoder)
              .toColumn());

  // expand the list into separate elements and put the key back into the elements
  WindowedValue.WindowedValueCoder<KV<K, OutputT>> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(
          outputKVCoder, input.getWindowingStrategy().getWindowFn().windowCoder());
  Dataset<WindowedValue<KV<K, OutputT>>> outputDataset =
      combinedDataset.flatMap(
          (FlatMapFunction<
                  Tuple2<K, Iterable<WindowedValue<OutputT>>>, WindowedValue<KV<K, OutputT>>>)
              tuple2 -> {
                K key = tuple2._1();
                Iterable<WindowedValue<OutputT>> windowedValues = tuple2._2();
                List<WindowedValue<KV<K, OutputT>>> result = new ArrayList<>();
                for (WindowedValue<OutputT> windowedValue : windowedValues) {
                  KV<K, OutputT> kv = KV.of(key, windowedValue.getValue());
                  result.add(
                      WindowedValue.of(
                          kv,
                          windowedValue.getTimestamp(),
                          windowedValue.getWindows(),
                          windowedValue.getPane()));
                }
                return result.iterator();
              },
          EncoderHelpers.fromBeamCoder(wvCoder));
  context.putDataset(output, outputDataset);
}
 
Example 13
Source File: SqlTransform.java    From beam with Apache License 2.0 4 votes vote down vote up
static UdafDefinition of(String udafName, Combine.CombineFn combineFn) {
  return new AutoValue_SqlTransform_UdafDefinition(udafName, combineFn);
}
 
Example 14
Source File: UdfUdafProvider.java    From beam with Apache License 2.0 4 votes vote down vote up
default Map<String, Combine.CombineFn> getUdafs() {
  return Collections.emptyMap();
}
 
Example 15
Source File: FlinkBatchTransformTranslators.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform,
    FlinkBatchTranslationContext context) {

  // for now, this is copied from the Combine.PerKey translator. Once we have the new runner API
  // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn

  DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
      context.getInputDataSet(context.getInput(transform));

  Combine.CombineFn<InputT, List<InputT>, List<InputT>> combineFn = new Concatenate<>();

  KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder();

  Coder<List<InputT>> accumulatorCoder;

  try {
    accumulatorCoder =
        combineFn.getAccumulatorCoder(
            context.getInput(transform).getPipeline().getCoderRegistry(),
            inputCoder.getValueCoder());
  } catch (CannotProvideCoderException e) {
    throw new RuntimeException(e);
  }

  WindowingStrategy<?, ?> windowingStrategy =
      context.getInput(transform).getWindowingStrategy();

  TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(
              KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
              windowingStrategy.getWindowFn().windowCoder()));

  Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
      inputDataSet.groupBy(new KvKeySelector<>(inputCoder.getKeyCoder()));

  @SuppressWarnings("unchecked")
  WindowingStrategy<Object, BoundedWindow> boundedStrategy =
      (WindowingStrategy<Object, BoundedWindow>) windowingStrategy;

  FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction =
      new FlinkPartialReduceFunction<>(
          combineFn, boundedStrategy, Collections.emptyMap(), context.getPipelineOptions());

  FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction =
      new FlinkReduceFunction<>(
          combineFn, boundedStrategy, Collections.emptyMap(), context.getPipelineOptions());

  // Partially GroupReduce the values into the intermediate format AccumT (combine)
  String fullName = getCurrentTransformName(context);
  GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, List<InputT>>>>
      groupCombine =
          new GroupCombineOperator<>(
              inputGrouping,
              partialReduceTypeInfo,
              partialReduceFunction,
              "GroupCombine: " + fullName);

  Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping =
      groupCombine.groupBy(new KvKeySelector<>(inputCoder.getKeyCoder()));

  // Fully reduce the values and create output format VO
  GroupReduceOperator<WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>>
      outputDataSet =
          new GroupReduceOperator<>(
              intermediateGrouping, partialReduceTypeInfo, reduceFunction, fullName);

  context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}
 
Example 16
Source File: ReduceByKeyTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <InputT, KeyT, ValueT, AccT, OutputT>
    Combine.CombineFn<ValueT, AccT, OutputT> asCombineFn(
        ReduceByKey<InputT, KeyT, ValueT, AccT, OutputT> operator) {

  @SuppressWarnings("unchecked")
  ReduceByKey<InputT, KeyT, ValueT, AccT, OutputT> cast = (ReduceByKey) operator;

  VoidFunction<AccT> accumulatorFactory = cast.getAccumulatorFactory();
  BinaryFunction<AccT, ValueT, AccT> accumulate = cast.getAccumulate();
  CombinableBinaryFunction<AccT> mergeAccumulators = cast.getMergeAccumulators();
  UnaryFunction<AccT, OutputT> outputFn = cast.getOutputFn();
  TypeDescriptor<AccT> accumulatorType = cast.getAccumulatorType();

  return new Combine.CombineFn<ValueT, AccT, OutputT>() {

    @Override
    public AccT createAccumulator() {
      return accumulatorFactory.apply();
    }

    @Override
    public Coder<AccT> getAccumulatorCoder(CoderRegistry registry, Coder<ValueT> inputCoder)
        throws CannotProvideCoderException {
      return registry.getCoder(accumulatorType);
    }

    @Override
    public AccT addInput(AccT mutableAccumulator, ValueT input) {
      return accumulate.apply(mutableAccumulator, input);
    }

    @Override
    public AccT mergeAccumulators(Iterable<AccT> accumulators) {
      AccT accumulated = null;
      for (AccT o : accumulators) {
        if (accumulated == null) {
          accumulated = o;
        } else {
          accumulated = mergeAccumulators.apply(accumulated, o);
        }
      }
      return accumulated;
    }

    @Override
    public OutputT extractOutput(AccT accumulator) {
      return outputFn.apply(accumulator);
    }
  };
}
 
Example 17
Source File: StateSpec.java    From beam with Apache License 2.0 votes vote down vote up
ResultT dispatchCombining(Combine.CombineFn<?, ?, ?> combineFn, Coder<?> accumCoder); 
Example 18
Source File: NamedAggregators.java    From beam with Apache License 2.0 votes vote down vote up
Combine.CombineFn<InputT, InterT, OutputT> getCombineFn(); 
Example 19
Source File: NamedAggregators.java    From beam with Apache License 2.0 votes vote down vote up
Combine.CombineFn<InputT, InterT, OutputT> getCombineFn(); 
Example 20
Source File: SqlTransform.java    From beam with Apache License 2.0 votes vote down vote up
abstract Combine.CombineFn combineFn();