Java Code Examples for org.apache.beam.sdk.util.WindowedValue

The following examples show how to use org.apache.beam.sdk.util.WindowedValue. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: FlinkBatchPortablePipelineTranslator.java    License: Apache License 2.0 6 votes vote down vote up
private static void translateImpulse(
    PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) {
  TypeInformation<WindowedValue<byte[]>> typeInformation =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE));
  DataSource<WindowedValue<byte[]>> dataSource =
      new DataSource<>(
              context.getExecutionEnvironment(),
              new ImpulseInputFormat(),
              typeInformation,
              transform.getTransform().getUniqueName())
          .name("Impulse");

  context.addDataSet(
      Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), dataSource);
}
 
Example 2
Source Project: beam   Source File: GroupByKeyOnlyEvaluatorFactory.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public TransformResult<KV<K, V>> finishBundle() {
  StepTransformResult.Builder resultBuilder = StepTransformResult.withoutHold(application);
  for (Map.Entry<StructuralKey<K>, List<WindowedValue<V>>> groupedEntry :
      groupingMap.entrySet()) {
    K key = groupedEntry.getKey().getKey();
    KeyedWorkItem<K, V> groupedKv =
        KeyedWorkItems.elementsWorkItem(key, groupedEntry.getValue());
    UncommittedBundle<KeyedWorkItem<K, V>> bundle =
        evaluationContext.createKeyedBundle(
            StructuralKey.of(key, keyCoder),
            (PCollection<KeyedWorkItem<K, V>>)
                Iterables.getOnlyElement(application.getOutputs().values()));
    bundle.add(WindowedValue.valueInGlobalWindow(groupedKv));
    resultBuilder.addOutput(bundle);
  }
  return resultBuilder.build();
}
 
Example 3
Source Project: beam   Source File: BoundedReadEvaluatorFactory.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Collection<CommittedBundle<BoundedSourceShard<T>>> getInitialInputs(
    AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> transform,
    int targetParallelism)
    throws Exception {
  BoundedSource<T> source = ReadTranslation.boundedSourceFromTransform(transform);
  long estimatedBytes = source.getEstimatedSizeBytes(options);
  long bytesPerBundle = estimatedBytes / targetParallelism;
  List<? extends BoundedSource<T>> bundles = source.split(bytesPerBundle, options);
  ImmutableList.Builder<CommittedBundle<BoundedSourceShard<T>>> shards =
      ImmutableList.builder();
  for (BoundedSource<T> bundle : bundles) {
    CommittedBundle<BoundedSourceShard<T>> inputShard =
        evaluationContext
            .<BoundedSourceShard<T>>createRootBundle()
            .add(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(bundle)))
            .commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
    shards.add(inputShard);
  }
  return shards.build();
}
 
Example 4
Source Project: beam   Source File: BufferingDoFnRunnerTest.java    License: Apache License 2.0 6 votes vote down vote up
private static BufferingDoFnRunner createBufferingDoFnRunner(
    int concurrentCheckpoints,
    List<BufferingDoFnRunner.CheckpointIdentifier> notYetAcknowledgeCheckpoints)
    throws Exception {
  DoFnRunner doFnRunner = Mockito.mock(DoFnRunner.class);
  OperatorStateBackend operatorStateBackend = Mockito.mock(OperatorStateBackend.class);

  // Setup not yet acknowledged checkpoint union list state
  ListState unionListState = Mockito.mock(ListState.class);
  Mockito.when(operatorStateBackend.getUnionListState(Mockito.any())).thenReturn(unionListState);
  Mockito.when(unionListState.get()).thenReturn(notYetAcknowledgeCheckpoints);

  // Setup buffer list state
  Mockito.when(operatorStateBackend.getListState(Mockito.any()))
      .thenReturn(Mockito.mock(ListState.class));

  return BufferingDoFnRunner.create(
      doFnRunner,
      "stable-input",
      StringUtf8Coder.of(),
      WindowedValue.getFullCoder(VarIntCoder.of(), GlobalWindow.Coder.INSTANCE),
      operatorStateBackend,
      null,
      concurrentCheckpoints);
}
 
Example 5
private static <K, InputT, OutputT, W extends BoundedWindow>
    DoFnRunner<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> makeRunner(
        GroupAlsoByWindowDoFnFactory<K, InputT, OutputT> fnFactory,
        WindowingStrategy<?, W> windowingStrategy,
        TupleTag<KV<K, OutputT>> outputTag,
        DoFnRunners.OutputManager outputManager) {

  final StepContext stepContext = new TestStepContext(STEP_NAME);

  StateInternalsFactory<K> stateInternalsFactory = key -> stepContext.stateInternals();

  BatchGroupAlsoByWindowFn<K, InputT, OutputT> fn =
      fnFactory.forStrategy(windowingStrategy, stateInternalsFactory);

  return new GroupAlsoByWindowFnRunner<>(
      PipelineOptionsFactory.create(),
      fn,
      NullSideInputReader.empty(),
      outputManager,
      outputTag,
      stepContext);
}
 
Example 6
Source Project: beam   Source File: MultiStepCombine.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void processElement(WindowedValue<KV<K, Iterable<AccumT>>> element) throws Exception {
  checkState(
      element.getWindows().size() == 1,
      "Expected inputs to %s to be in exactly one window. Got %s",
      MergeAccumulatorsAndExtractOutputEvaluator.class.getSimpleName(),
      element.getWindows().size());
  Iterable<AccumT> inputAccumulators = element.getValue().getValue();
  try {
    AccumT first = combineFn.createAccumulator();
    AccumT merged =
        combineFn.mergeAccumulators(
            Iterables.concat(
                Collections.singleton(first),
                inputAccumulators,
                Collections.singleton(combineFn.createAccumulator())));
    OutputT extracted = combineFn.extractOutput(merged);
    output.add(element.withValue(KV.of(element.getValue().getKey(), extracted)));
  } catch (Exception e) {
    throw UserCodeException.wrap(e);
  }
}
 
Example 7
Source Project: beam   Source File: IsmSideInputReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Note that it is important that the return value if split is only split on shard boundaries
 * because it is expected that each shard id only appears in one source.
 *
 * <p>Each windowed value is expected to be within the same window.
 */
<K, V> Multimap<Integer, IsmRecord<WindowedValue<V>>> forMap(
    IsmRecordCoder<WindowedValue<V>> coder, ListMultimap<K, WindowedValue<V>> elements)
    throws Exception {

  Multimap<Integer, IsmRecord<WindowedValue<V>>> rval =
      TreeMultimap.create(Ordering.natural(), new IsmReaderTest.IsmRecordKeyComparator<>(coder));

  for (K key : elements.keySet()) {
    long i = 0;
    for (WindowedValue<V> value : elements.get(key)) {
      IsmRecord<WindowedValue<V>> record =
          IsmRecord.of(ImmutableList.of(key, windowOf(value), i), value);
      rval.put(coder.hash(record.getKeyComponents()), record);
      i += 1L;
    }
  }

  return rval;
}
 
Example 8
Source Project: incubator-nemo   Source File: BeamBoundedSourceVertex.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public List<Readable<WindowedValue<O>>> getReadables(final int desiredNumOfSplits) throws Exception {
  final List<Readable<WindowedValue<O>>> readables = new ArrayList<>();

  if (source != null) {
    LOG.info("estimate: {}", source.getEstimatedSizeBytes(null));
    LOG.info("desired: {}", desiredNumOfSplits);
    source.split(this.estimatedSizeBytes / desiredNumOfSplits, null)
      .forEach(boundedSource -> readables.add(new BoundedSourceReadable<>(boundedSource)));
    return readables;
  } else {
    // TODO #333: Remove SourceVertex#clearInternalStates
    final SourceVertex emptySourceVertex = new EmptyComponents.EmptySourceVertex("EMPTY");
    return emptySourceVertex.getReadables(desiredNumOfSplits);
  }
}
 
Example 9
Source Project: beam   Source File: TranslationContext.java    License: Apache License 2.0 6 votes vote down vote up
/** The dummy stream created will only be used in Beam tests. */
private static InputDescriptor<OpMessage<String>, ?> createDummyStreamDescriptor(String id) {
  final GenericSystemDescriptor dummySystem =
      new GenericSystemDescriptor(id, InMemorySystemFactory.class.getName());
  final GenericInputDescriptor<OpMessage<String>> dummyInput =
      dummySystem.getInputDescriptor(id, new NoOpSerde<>());
  dummyInput.withOffsetDefault(SystemStreamMetadata.OffsetType.OLDEST);
  final Config config = new MapConfig(dummyInput.toConfig(), dummySystem.toConfig());
  final SystemFactory factory = new InMemorySystemFactory();
  final StreamSpec dummyStreamSpec = new StreamSpec(id, id, id, 1);
  factory.getAdmin(id, config).createStream(dummyStreamSpec);

  final SystemProducer producer = factory.getProducer(id, config, null);
  final SystemStream sysStream = new SystemStream(id, id);
  final Consumer<Object> sendFn =
      (msg) -> {
        producer.send(id, new OutgoingMessageEnvelope(sysStream, 0, null, msg));
      };
  final WindowedValue<String> windowedValue =
      WindowedValue.timestampedValueInGlobalWindow("dummy", new Instant());

  sendFn.accept(OpMessage.ofElement(windowedValue));
  sendFn.accept(new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
  sendFn.accept(new EndOfStreamMessage(null));
  return dummyInput;
}
 
Example 10
Source Project: beam   Source File: WindmillKeyedWorkItem.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Iterable<WindowedValue<ElemT>> elementsIterable() {
  return FluentIterable.from(workItem.getMessageBundlesList())
      .transformAndConcat(Windmill.InputMessageBundle::getMessagesList)
      .transform(
          message -> {
            try {
              Instant timestamp =
                  WindmillTimeUtils.windmillToHarnessTimestamp(message.getTimestamp());
              Collection<? extends BoundedWindow> windows =
                  WindmillSink.decodeMetadataWindows(windowsCoder, message.getMetadata());
              PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());

              InputStream inputStream = message.getData().newInput();
              ElemT value = valueCoder.decode(inputStream, Coder.Context.OUTER);
              return WindowedValue.of(value, timestamp, windows, pane);
            } catch (IOException e) {
              throw new RuntimeException(e);
            }
          });
}
 
Example 11
Source Project: beam   Source File: BatchStatefulParDoOverrides.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<OutputT> expand(PCollection<KV<K, InputT>> input) {
  DoFn<KV<K, InputT>, OutputT> fn = originalParDo.getFn();
  verifyFnIsStateful(fn);
  DataflowRunner.verifyDoFnSupportedBatch(fn);
  DataflowRunner.verifyStateSupportForWindowingStrategy(input.getWindowingStrategy());

  if (isFnApi) {
    return input.apply(Reshuffle.of()).apply(originalParDo);
  }

  PTransform<
          PCollection<? extends KV<K, Iterable<KV<Instant, WindowedValue<KV<K, InputT>>>>>>,
          PCollection<OutputT>>
      statefulParDo =
          ParDo.of(new BatchStatefulDoFn<>(fn)).withSideInputs(originalParDo.getSideInputs());

  return input.apply(new GbkBeforeStatefulParDo<>()).apply(statefulParDo);
}
 
Example 12
Source Project: beam   Source File: GroupNonMergingWindowsFunctionsTest.java    License: Apache License 2.0 6 votes vote down vote up
private <W extends BoundedWindow> GroupByKeyIterator<String, Integer, W> createGbkIterator(
    W window, Coder<W> winCoder, WindowingStrategy<Object, W> winStrategy)
    throws Coder.NonDeterministicException {

  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  final WindowedValue.FullWindowedValueCoder<KV<String, Integer>> winValCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
          winStrategy.getWindowFn().windowCoder());

  ItemFactory<String, Integer, W> factory =
      ItemFactory.forWindow(keyCoder, winValCoder, winCoder, window);
  List<Tuple2<ByteArray, byte[]>> items =
      Arrays.asList(
          factory.create("k1", 1),
          factory.create("k1", 2),
          factory.create("k2", 3),
          factory.create("k2", 4),
          factory.create("k2", 5));
  return new GroupByKeyIterator<>(items.iterator(), keyCoder, winStrategy, winValCoder);
}
 
Example 13
Source Project: twister2   Source File: AssignWindowsFunction.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void compute(Iterator<WindowedValue<T>> input, RecordCollector<WindowedValue<T>> output) {
  WindowedValue<T> element;
  try {
    while (input.hasNext()) {
      element = input.next();
      Collection<BoundedWindow> windows =
          windowFn.assignWindows(new Twister2AssignContext<>(windowFn, element));

      for (BoundedWindow window : windows) {
        output.collect(
            WindowedValue.of(
                element.getValue(), element.getTimestamp(), window, element.getPane()));
      }
    }
  } catch (Exception e) {
    LOG.info(e.getMessage());
  }
}
 
Example 14
Source Project: beam   Source File: BoundedReadEvaluatorFactoryTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void boundedSourceEvaluatorNoElementsClosesReader() throws Exception {
  TestSource<Long> source = new TestSource<>(BigEndianLongCoder.of());

  PCollection<Long> pcollection = p.apply(Read.from(source));
  AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getProducer(pcollection);

  UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection);
  when(context.createBundle(pcollection)).thenReturn(output);

  TransformEvaluator<BoundedSourceShard<Long>> evaluator =
      factory.forApplication(
          sourceTransform, bundleFactory.createRootBundle().commit(Instant.now()));
  evaluator.processElement(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(source)));
  evaluator.finishBundle();
  CommittedBundle<Long> committed = output.commit(Instant.now());
  assertThat(committed.getElements(), emptyIterable());
  assertThat(TestSource.readerClosed, is(true));
}
 
Example 15
Source Project: beam   Source File: ShuffleSinkFactoryTest.java    License: Apache License 2.0 6 votes vote down vote up
void runTestCreateGroupingSortingShuffleSink(
    byte[] shuffleWriterConfig, Coder<?> keyCoder, Coder<?> sortKeyCoder, Coder<?> sortValueCoder)
    throws Exception {
  FullWindowedValueCoder<?> coder =
      WindowedValue.getFullCoder(
          KvCoder.of(keyCoder, KvCoder.of(sortKeyCoder, sortValueCoder)),
          IntervalWindow.getCoder());
  ShuffleSink shuffleSink =
      runTestCreateShuffleSinkHelper(
          shuffleWriterConfig, "group_keys_and_sort_values", coder, coder);
  Assert.assertEquals(
      ShuffleSink.ShuffleKind.GROUP_KEYS_AND_SORT_VALUES, shuffleSink.shuffleKind);
  Assert.assertTrue(shuffleSink.shardByKey);
  Assert.assertTrue(shuffleSink.groupValues);
  Assert.assertTrue(shuffleSink.sortValues);
  Assert.assertEquals(keyCoder, shuffleSink.keyCoder);
  Assert.assertEquals(KvCoder.of(sortKeyCoder, sortValueCoder), shuffleSink.valueCoder);
  Assert.assertEquals(sortKeyCoder, shuffleSink.sortKeyCoder);
  Assert.assertEquals(sortValueCoder, shuffleSink.sortValueCoder);
  Assert.assertNull(shuffleSink.windowedValueCoder);
}
 
Example 16
private void translateImpulse(
    String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
  RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id);

  TypeInformation<WindowedValue<byte[]>> typeInfo =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE));

  long shutdownAfterIdleSourcesMs = context.getPipelineOptions().getShutdownSourcesAfterIdleMs();
  SingleOutputStreamOperator<WindowedValue<byte[]>> source =
      context
          .getExecutionEnvironment()
          .addSource(new ImpulseSourceFunction(shutdownAfterIdleSourcesMs), "Impulse")
          .returns(typeInfo);

  context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), source);
}
 
Example 17
Source Project: beam   Source File: UnboundedDataset.java    License: Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public void cache(String storageLevel, Coder<?> coder) {
  // we "force" MEMORY storage level in streaming
  if (!StorageLevel.fromString(storageLevel).equals(StorageLevel.MEMORY_ONLY_SER())) {
    LOG.warn(
        "Provided StorageLevel: {} is ignored for streams, using the default level: {}",
        storageLevel,
        StorageLevel.MEMORY_ONLY_SER());
  }
  // Caching can cause Serialization, we need to code to bytes
  // more details in https://issues.apache.org/jira/browse/BEAM-2669
  Coder<WindowedValue<T>> wc = (Coder<WindowedValue<T>>) coder;
  this.dStream =
      dStream.map(CoderHelpers.toByteFunction(wc)).cache().map(CoderHelpers.fromByteFunction(wc));
}
 
Example 18
Source Project: beam   Source File: FnApiDoFnRunner.java    License: Apache License 2.0 6 votes vote down vote up
private void processElementForWindowObservingSplitRestriction(
    WindowedValue<KV<InputT, KV<RestrictionT, WatermarkEstimatorStateT>>> elem) {
  currentElement = elem.withValue(elem.getValue().getKey());
  currentRestriction = elem.getValue().getValue().getKey();
  currentWatermarkEstimatorState = elem.getValue().getValue().getValue();
  try {
    Iterator<BoundedWindow> windowIterator =
        (Iterator<BoundedWindow>) elem.getWindows().iterator();
    while (windowIterator.hasNext()) {
      currentWindow = windowIterator.next();
      doFnInvoker.invokeSplitRestriction(processContext);
    }
  } finally {
    currentElement = null;
    currentRestriction = null;
    currentWatermarkEstimatorState = null;
    currentWindow = null;
  }

  // TODO(BEAM-10212): Support caching state data across bundle boundaries.
  this.stateAccessor.finalizeState();
}
 
Example 19
Source Project: beam   Source File: GroupCombineFunctions.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * An implementation of {@link
 * org.apache.beam.runners.core.GroupByKeyViaGroupByKeyOnly.GroupByKeyOnly} for the Spark runner.
 */
public static <K, V> JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupByKeyOnly(
    JavaRDD<WindowedValue<KV<K, V>>> rdd,
    Coder<K> keyCoder,
    WindowedValueCoder<V> wvCoder,
    @Nullable Partitioner partitioner) {
  // we use coders to convert objects in the PCollection to byte arrays, so they
  // can be transferred over the network for the shuffle.
  JavaPairRDD<ByteArray, byte[]> pairRDD =
      rdd.map(new ReifyTimestampsAndWindowsFunction<>())
          .mapToPair(TranslationUtils.toPairFunction())
          .mapToPair(CoderHelpers.toByteFunction(keyCoder, wvCoder));

  // If no partitioner is passed, the default group by key operation is called
  JavaPairRDD<ByteArray, Iterable<byte[]>> groupedRDD =
      (partitioner != null) ? pairRDD.groupByKey(partitioner) : pairRDD.groupByKey();

  return groupedRDD
      .mapToPair(CoderHelpers.fromByteFunctionIterable(keyCoder, wvCoder))
      .map(new TranslationUtils.FromPairFunction<>());
}
 
Example 20
Source Project: beam   Source File: SplittableDoFnOperator.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected void fireTimer(TimerInternals.TimerData timer) {
  timerInternals.onFiredOrDeletedTimer(timer);
  if (timer.getDomain().equals(TimeDomain.EVENT_TIME)) {
    // ignore this, it can only be a state cleanup timers from StatefulDoFnRunner and ProcessFn
    // does its own state cleanup and should never set event-time timers.
    return;
  }
  doFnRunner.processElement(
      WindowedValue.valueInGlobalWindow(
          KeyedWorkItems.timersWorkItem(
              (byte[]) keyedStateInternals.getKey(), Collections.singletonList(timer))));
}
 
Example 21
Source Project: incubator-nemo   Source File: CreateViewTransform.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void onWatermark(final Watermark inputWatermark) {
  // If no data, just forwards the watermark
  if (windowListMap.size() == 0 && currentOutputWatermark < inputWatermark.getTimestamp()) {
    currentOutputWatermark = inputWatermark.getTimestamp();
    outputCollector.emitWatermark(inputWatermark);
    return;
  }

  final Iterator<Map.Entry<BoundedWindow, List<I>>> iterator = windowListMap.entrySet().iterator();
  long minOutputTimestampOfEmittedWindows = Long.MAX_VALUE;

  while (iterator.hasNext()) {
    final Map.Entry<BoundedWindow, List<I>> entry = iterator.next();
    if (entry.getKey().maxTimestamp().getMillis() <= inputWatermark.getTimestamp()) {
      // emit the windowed data if the watermark timestamp > the window max boundary
      final O output = viewFn.apply(new MultiView<>(entry.getValue()));
      outputCollector.emit(WindowedValue.of(
        output, entry.getKey().maxTimestamp(), entry.getKey(), PaneInfo.ON_TIME_AND_ONLY_FIRING));
      iterator.remove();

      minOutputTimestampOfEmittedWindows =
        Math.min(minOutputTimestampOfEmittedWindows, entry.getKey().maxTimestamp().getMillis());
    }
  }

  if (minOutputTimestampOfEmittedWindows != Long.MAX_VALUE
    && currentOutputWatermark < minOutputTimestampOfEmittedWindows) {
    // update current output watermark and emit to next operators
    currentOutputWatermark = minOutputTimestampOfEmittedWindows;
    outputCollector.emitWatermark(new Watermark(currentOutputWatermark));
  }
}
 
Example 22
Source Project: beam   Source File: AssignWindowsRunner.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public ThrowingFunction<WindowedValue<T>, WindowedValue<T>> forPTransform(
    String ptransformId, PTransform ptransform) throws IOException {
  checkArgument(
      PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN.equals(ptransform.getSpec().getUrn()));
  checkArgument(ptransform.getInputsCount() == 1, "Expected only one input");
  checkArgument(ptransform.getOutputsCount() == 1, "Expected only one output");
  WindowIntoPayload payload = WindowIntoPayload.parseFrom(ptransform.getSpec().getPayload());

  WindowFn<T, ?> windowFn =
      (WindowFn<T, ?>) WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn());

  return AssignWindowsRunner.create(windowFn)::assignWindows;
}
 
Example 23
Source Project: beam   Source File: WatermarkManagerTest.java    License: Apache License 2.0 5 votes vote down vote up
@SafeVarargs
private final <T> CommittedBundle<T> timestampedBundle(
    PCollection<T> pc, TimestampedValue<T>... values) {
  UncommittedBundle<T> bundle = bundleFactory.createBundle(pc);
  for (TimestampedValue<T> value : values) {
    bundle.add(
        WindowedValue.timestampedValueInGlobalWindow(value.getValue(), value.getTimestamp()));
  }
  return bundle.commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
}
 
Example 24
Source Project: beam   Source File: ExecutableStageDoFnOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link ExecutableStageDoFnOperator}. Sets the runtime context to {@link
 * #runtimeContext}. The context factory is mocked to return {@link #stageContext} every time. The
 * behavior of the stage context itself is unchanged.
 */
@SuppressWarnings("rawtypes")
private ExecutableStageDoFnOperator getOperator(
    TupleTag<Integer> mainOutput,
    List<TupleTag<?>> additionalOutputs,
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory) {
  return getOperator(
      mainOutput,
      additionalOutputs,
      outputManagerFactory,
      WindowingStrategy.globalDefault(),
      null,
      WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));
}
 
Example 25
Source Project: beam   Source File: FlinkBatchTransformTranslators.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<T>, PCollection<T>> transform,
    FlinkBatchTranslationContext context) {
  PValue input = context.getInput(transform);

  TypeInformation<WindowedValue<T>> resultTypeInfo =
      context.getTypeInfo(context.getOutput(transform));

  DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input);

  @SuppressWarnings("unchecked")
  final WindowingStrategy<T, ? extends BoundedWindow> windowingStrategy =
      (WindowingStrategy<T, ? extends BoundedWindow>)
          context.getOutput(transform).getWindowingStrategy();

  WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn();

  FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction =
      new FlinkAssignWindows<>(windowFn);

  DataSet<WindowedValue<T>> resultDataSet =
      inputDataSet
          .flatMap(assignWindowsFunction)
          .name(context.getOutput(transform).getName())
          .returns(resultTypeInfo);

  context.setOutputDataSet(context.getOutput(transform), resultDataSet);
}
 
Example 26
Source Project: beam   Source File: SparkCombineFn.java    License: Apache License 2.0 5 votes vote down vote up
@Override
void mergeWindows(SparkCombineFn<?, ?, AccumT, ?> fn) throws Exception {

  SparkCombineContext ctx = fn.ctxtForWindows(this.map.keySet());

  @SuppressWarnings("unchecked")
  WindowFn<Object, BoundedWindow> windowFn = (WindowFn) fn.windowingStrategy.getWindowFn();
  windowFn.mergeWindows(
      asMergeContext(
          windowFn,
          (a, b) -> fn.combineFn.mergeAccumulators(Lists.newArrayList(a, b), ctx),
          (toBeMerged, mergeResult) -> {
            Instant mergedInstant =
                fn.windowingStrategy
                    .getTimestampCombiner()
                    .merge(
                        mergeResult.getKey(),
                        toBeMerged.stream()
                            .map(w -> map.get(w).getTimestamp())
                            .collect(Collectors.toList()));
            toBeMerged.forEach(this.map::remove);
            this.map.put(
                mergeResult.getKey(),
                WindowedValue.of(
                    mergeResult.getValue(),
                    mergedInstant,
                    mergeResult.getKey(),
                    PaneInfo.NO_FIRING));
          },
          map));
}
 
Example 27
Source Project: beam   Source File: FlinkStreamingTransformTranslators.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void collectWithTimestamp(
    WindowedValue<ValueWithRecordId<OutputT>> element, long timestamp) {
  OutputT originalValue = element.getValue().getValue();
  WindowedValue<OutputT> output =
      WindowedValue.of(
          originalValue, element.getTimestamp(), element.getWindows(), element.getPane());
  ctx.collectWithTimestamp(output, timestamp);
}
 
Example 28
Source Project: beam   Source File: TranslationUtils.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressFBWarnings(
    value = "NP_METHOD_PARAMETER_TIGHTENS_ANNOTATION",
    justification = "https://github.com/google/guava/issues/920")
@Override
public WindowedValue<KV<K, V>> apply(@Nonnull KV<K, WindowedValue<V>> kv) {
  return call(kv);
}
 
Example 29
Source Project: beam   Source File: DirectTransformExecutorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("FutureReturnValueIgnored") // expected exception checked via completionCallback
public void processElementThrowsExceptionCallsback() throws Exception {
  final TransformResult<String> result =
      StepTransformResult.<String>withoutHold(downstreamProducer).build();
  final Exception exception = new Exception();
  TransformEvaluator<String> evaluator =
      new TransformEvaluator<String>() {
        @Override
        public void processElement(WindowedValue<String> element) throws Exception {
          throw exception;
        }

        @Override
        public TransformResult<String> finishBundle() throws Exception {
          return result;
        }
      };

  WindowedValue<String> foo = WindowedValue.valueInGlobalWindow("foo");
  CommittedBundle<String> inputBundle =
      bundleFactory.createBundle(created).add(foo).commit(Instant.now());
  when(registry.<String>forApplication(downstreamProducer, inputBundle)).thenReturn(evaluator);

  DirectTransformExecutor<String> executor =
      new DirectTransformExecutor<>(
          evaluationContext,
          registry,
          Collections.emptyList(),
          inputBundle,
          downstreamProducer,
          completionCallback,
          transformEvaluationState);
  Executors.newSingleThreadExecutor().submit(executor);

  evaluatorCompleted.await();

  assertThat(completionCallback.handledResult, is(nullValue()));
  assertThat(completionCallback.handledException, Matchers.<Throwable>equalTo(exception));
}
 
Example 30
Source Project: beam   Source File: IsmSideInputReader.java    License: Apache License 2.0 5 votes vote down vote up
private MapOverReaders(
    W window,
    Function<KV<K, IsmReader<WindowedValue<V1>>.IsmPrefixReaderIterator>, V2> transform,
    List<IsmReader<WindowedValue<V1>>> readers,
    Coder<K> keyCoder,
    long size) {

  this.window = window;
  this.transform = transform;
  this.readers = readers;
  this.keyCoder = keyCoder;
  this.size = size;
}