org.apache.beam.sdk.transforms.windowing.BoundedWindow Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.windowing.BoundedWindow. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WriteFilesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public ResourceId windowedFilename(
    int shardNumber,
    int numShards,
    BoundedWindow window,
    PaneInfo paneInfo,
    OutputFileHints outputFileHints) {
  DecimalFormat df = new DecimalFormat("0000");
  IntervalWindow intervalWindow = (IntervalWindow) window;
  String filename =
      String.format(
          "%s-%s-of-%s%s%s",
          filenamePrefixForWindow(intervalWindow),
          df.format(shardNumber),
          df.format(numShards),
          outputFileHints.getSuggestedFilenameSuffix(),
          suffix);
  return baseFilename
      .getCurrentDirectory()
      .resolve(filename, StandardResolveOptions.RESOLVE_FILE);
}
 
Example #2
Source File: SystemReduceFn.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Create a factory that produces {@link SystemReduceFn} instances that that buffer all of the
 * input values in persistent state and produces an {@code Iterable<T>}.
 */
public static <K, T, W extends BoundedWindow>
    SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W> buffering(final Coder<T> inputCoder) {
  final StateTag<BagState<T>> bufferTag =
      StateTags.makeSystemTagInternal(StateTags.bag(BUFFER_NAME, inputCoder));
  return new SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W>(bufferTag) {
    @Override
    public void prefetchOnMerge(MergingStateAccessor<K, W> state) throws Exception {
      StateMerging.prefetchBags(state, bufferTag);
    }

    @Override
    public void onMerge(OnMergeContext c) throws Exception {
      StateMerging.mergeBags(c.state(), bufferTag);
    }
  };
}
 
Example #3
Source File: BoundedReadEvaluatorFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void getInitialInputsSplitsIntoBundles() throws Exception {
  when(context.createRootBundle()).thenAnswer(invocation -> bundleFactory.createRootBundle());
  Collection<CommittedBundle<?>> initialInputs =
      new BoundedReadEvaluatorFactory.InputProvider(context, options)
          .getInitialInputs(longsProducer, 3);

  assertThat(initialInputs, hasSize(allOf(greaterThanOrEqualTo(3), lessThanOrEqualTo(4))));

  Collection<BoundedSource<Long>> sources = new ArrayList<>();
  for (CommittedBundle<?> initialInput : initialInputs) {
    Iterable<WindowedValue<BoundedSourceShard<Long>>> shards =
        (Iterable) initialInput.getElements();
    WindowedValue<BoundedSourceShard<Long>> shard = Iterables.getOnlyElement(shards);
    assertThat(shard.getWindows(), Matchers.contains(GlobalWindow.INSTANCE));
    assertThat(shard.getTimestamp(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
    sources.add(shard.getValue().getSource());
  }

  SourceTestUtils.assertSourcesEqualReferenceSource(
      source, (List<? extends BoundedSource<Long>>) sources, PipelineOptionsFactory.create());
}
 
Example #4
Source File: DoFnOperator.java    From beam with Apache License 2.0 6 votes vote down vote up
private void earlyBindStateIfNeeded() throws IllegalArgumentException, IllegalAccessException {
  if (keyCoder != null) {
    if (doFn != null) {
      DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
      FlinkStateInternals.EarlyBinder earlyBinder =
          new FlinkStateInternals.EarlyBinder(getKeyedStateBackend());
      for (DoFnSignature.StateDeclaration value : signature.stateDeclarations().values()) {
        StateSpec<?> spec =
            (StateSpec<?>) signature.stateDeclarations().get(value.id()).field().get(doFn);
        spec.bind(value.id(), earlyBinder);
      }
      if (doFnRunner instanceof StatefulDoFnRunner) {
        ((StatefulDoFnRunner<InputT, OutputT, BoundedWindow>) doFnRunner)
            .getSystemStateTags()
            .forEach(tag -> tag.getSpec().bind(tag.getId(), earlyBinder));
      }
    }
  }
}
 
Example #5
Source File: GroupAlsoByWindowViaWindowSetNewDoFn.java    From beam with Apache License 2.0 6 votes vote down vote up
public static <K, InputT, OutputT, W extends BoundedWindow>
    DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
        WindowingStrategy<?, W> strategy,
        StateInternalsFactory<K> stateInternalsFactory,
        TimerInternalsFactory<K> timerInternalsFactory,
        SideInputReader sideInputReader,
        SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn,
        DoFnRunners.OutputManager outputManager,
        TupleTag<KV<K, OutputT>> mainTag) {
  return new GroupAlsoByWindowViaWindowSetNewDoFn<>(
      strategy,
      stateInternalsFactory,
      timerInternalsFactory,
      sideInputReader,
      reduceFn,
      outputManager,
      mainTag);
}
 
Example #6
Source File: DoFnOp.java    From beam with Apache License 2.0 6 votes vote down vote up
private void fireTimer(KeyedTimerData<?> keyedTimerData) {
  final TimerInternals.TimerData timer = keyedTimerData.getTimerData();
  LOG.debug("Firing timer {}", timer);

  final StateNamespace namespace = timer.getNamespace();
  // NOTE: not sure why this is safe, but DoFnOperator makes this assumption
  final BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();

  if (fnRunner instanceof DoFnRunnerWithKeyedInternals) {
    // Need to pass in the keyed TimerData here
    ((DoFnRunnerWithKeyedInternals) fnRunner).onTimer(keyedTimerData, window);
  } else {
    pushbackFnRunner.onTimer(
        timer.getTimerId(),
        timer.getTimerFamilyId(),
        null,
        window,
        timer.getTimestamp(),
        timer.getOutputTimestamp(),
        timer.getDomain());
  }
}
 
Example #7
Source File: TimestampPolicyFactory.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public Instant getTimestampForRecord(PartitionContext context, KafkaRecord<K, V> record) {
  if (record.getTimestampType().equals(KafkaTimestampType.LOG_APPEND_TIME)) {
    currentWatermark = new Instant(record.getTimestamp());
  } else if (currentWatermark.equals(BoundedWindow.TIMESTAMP_MIN_VALUE)) {
    // This is the first record and it does not have LOG_APPEND_TIME.
    // Most likely the topic is not configured correctly.
    throw new IllegalStateException(
        String.format(
            "LogAppendTimePolicy policy is enabled in reader, but Kafka record's timestamp type "
                + "is LogAppendTime. Most likely it is not enabled on Kafka for the topic '%s'. "
                + "Actual timestamp type is '%s'.",
            record.getTopic(), record.getTimestampType()));
  }
  return currentWatermark;
}
 
Example #8
Source File: TimerReceiverFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
public <K> FnDataReceiver<Timer<K>> create(String transformId, String timerFamilyId) {
  final ProcessBundleDescriptors.TimerSpec timerSpec =
      transformAndTimerIdToSpecMap.get(KV.of(transformId, timerFamilyId));

  return receivedElement -> {
    Timer timer =
        checkNotNull(
            receivedElement, "Received null Timer from SDK harness: %s", receivedElement);
    LOG.debug("Timer received: {}", timer);
    for (Object window : timer.getWindows()) {
      StateNamespace namespace = StateNamespaces.window(windowCoder, (BoundedWindow) window);
      TimerInternals.TimerData timerData =
          TimerInternals.TimerData.of(
              encodeToTimerDataTimerId(timerSpec.transformId(), timerSpec.timerId()),
              namespace,
              timer.getClearBit() ? BoundedWindow.TIMESTAMP_MAX_VALUE : timer.getFireTimestamp(),
              timer.getClearBit() ? BoundedWindow.TIMESTAMP_MAX_VALUE : timer.getHoldTimestamp(),
              timerSpec.getTimerSpec().getTimeDomain());
      timerDataConsumer.accept(timer, timerData);
    }
  };
}
 
Example #9
Source File: StatefulDoFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static void advanceInputWatermark(
    InMemoryTimerInternals timerInternals, Instant newInputWatermark, DoFnRunner<?, ?> toTrigger)
    throws Exception {
  timerInternals.advanceInputWatermark(newInputWatermark);
  TimerInternals.TimerData timer;
  while ((timer = timerInternals.removeNextEventTimer()) != null) {
    StateNamespace namespace = timer.getNamespace();
    checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
    BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
    toTrigger.onTimer(
        timer.getTimerId(),
        timer.getTimerFamilyId(),
        null,
        window,
        timer.getTimestamp(),
        timer.getOutputTimestamp(),
        timer.getDomain());
  }
}
 
Example #10
Source File: FlinkStreamingSideInputHandlerFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public <V, W extends BoundedWindow> IterableSideInputHandler<V, W> forIterableSideInput(
    String transformId, String sideInputId, Coder<V> elementCoder, Coder<W> windowCoder) {

  PCollectionView collectionNode =
      sideInputToCollection.get(
          SideInputId.newBuilder().setTransformId(transformId).setLocalName(sideInputId).build());
  checkArgument(collectionNode != null, "No side input for %s/%s", transformId, sideInputId);

  return new IterableSideInputHandler<V, W>() {
    @Override
    public Iterable<V> get(W window) {
      return checkNotNull(
          (Iterable<V>) runnerHandler.getIterable(collectionNode, window),
          "Element processed by SDK before side input is ready");
    }

    @Override
    public Coder<V> elementCoder() {
      return elementCoder;
    }
  };
}
 
Example #11
Source File: StreamingDataflowWorkerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private ParallelInstruction makeSinkInstruction(
    String streamId,
    Coder<?> coder,
    int producerIndex,
    Coder<? extends BoundedWindow> windowCoder) {
  CloudObject spec = CloudObject.forClass(WindmillSink.class);
  addString(spec, "stream_id", streamId);
  return new ParallelInstruction()
      .setSystemName(DEFAULT_SINK_SYSTEM_NAME)
      .setOriginalName(DEFAULT_SINK_ORIGINAL_NAME)
      .setWrite(
          new WriteInstruction()
              .setInput(
                  new InstructionInput()
                      .setProducerInstructionIndex(producerIndex)
                      .setOutputNum(0))
              .setSink(
                  new Sink()
                      .setSpec(spec)
                      .setCodec(
                          CloudObjects.asCloudObject(
                              WindowedValue.getFullCoder(coder, windowCoder),
                              /*sdkComponents=*/ null))));
}
 
Example #12
Source File: WatermarkManagerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void updateWatermarkWithCompletedElementsNotPending() {
  WindowedValue<Integer> first = WindowedValue.timestampedValueInGlobalWindow(1, new Instant(22));
  CommittedBundle<Integer> createdBundle =
      bundleFactory.createBundle(createdInts).add(first).commit(clock.now());

  WindowedValue<Integer> second =
      WindowedValue.timestampedValueInGlobalWindow(2, new Instant(22));
  CommittedBundle<Integer> neverCreatedBundle =
      bundleFactory.createBundle(createdInts).add(second).commit(clock.now());

  manager.updateWatermarks(
      null,
      TimerUpdate.empty(),
      graph.getProducer(createdInts),
      null,
      Collections.<CommittedBundle<?>>singleton(createdBundle),
      BoundedWindow.TIMESTAMP_MAX_VALUE);

  manager.updateWatermarks(
      neverCreatedBundle,
      TimerUpdate.empty(),
      graph.getProducer(filtered),
      neverCreatedBundle.withElements(Collections.emptyList()),
      Collections.emptyList(),
      BoundedWindow.TIMESTAMP_MAX_VALUE);

  manager.refreshAll();
  TransformWatermarks filteredWms = manager.getWatermarks(graph.getProducer(filtered));
  assertThat(filteredWms.getInputWatermark(), equalTo(new Instant(22L)));
}
 
Example #13
Source File: StatefulParDoEvaluatorFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
static <K, InputT, OutputT> AppliedPTransformOutputKeyAndWindow<K, InputT, OutputT> create(
    AppliedPTransform<
            PCollection<? extends KeyedWorkItem<K, KV<K, InputT>>>,
            PCollectionTuple,
            StatefulParDo<K, InputT, OutputT>>
        transform,
    StructuralKey<K> key,
    BoundedWindow w) {
  return new AutoValue_StatefulParDoEvaluatorFactory_AppliedPTransformOutputKeyAndWindow<>(
      transform, key, w);
}
 
Example #14
Source File: AbstractFlinkCombineRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public AccumT extractOutput(
    K key,
    AccumT accumulator,
    PipelineOptions options,
    SideInputReader sideInputReader,
    Collection<? extends BoundedWindow> windows) {
  return accumulator;
}
 
Example #15
Source File: SparkGroupAlsoByWindowViaOutputBufferFn.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void outputWindowedValue(
    KV<K, Iterable<V>> output,
    Instant timestamp,
    Collection<? extends BoundedWindow> windows,
    PaneInfo pane) {
  outputs.add(WindowedValue.of(output, timestamp, windows, pane));
}
 
Example #16
Source File: DataflowSideInputHandlerFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<K> get(W window) {
  Materializations.MultimapView<K, V> sideInput =
      (Materializations.MultimapView<K, V>)
          sideInputReader.get(view, (BoundedWindow) windowCoder.structuralValue(window));
  return sideInput.get();
}
 
Example #17
Source File: StaticWindows.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <W extends BoundedWindow> StaticWindows of(Coder<W> coder, Iterable<W> windows) {
  checkArgument(!Iterables.isEmpty(windows), "Input windows to StaticWindows may not be empty");
  @SuppressWarnings("unchecked")
  StaticWindows windowFn =
      new StaticWindows(
          WindowSupplier.of((Coder<BoundedWindow>) coder, (Iterable<BoundedWindow>) windows),
          (Coder<BoundedWindow>) coder,
          false);
  return windowFn;
}
 
Example #18
Source File: WindowMergingFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWindowMergingWithMergingWindowFn() throws Exception {
  ThrowingFunction<
          KV<Object, Iterable<BoundedWindow>>,
          KV<
              Object,
              KV<Iterable<BoundedWindow>, Iterable<KV<BoundedWindow, Iterable<BoundedWindow>>>>>>
      mapFunction =
          WindowMergingFnRunner.createMapFunctionForPTransform(
              "ptransformId",
              createMergeTransformForWindowFn(Sessions.withGapDuration(Duration.millis(5L))));

  // 7, 8 and 10 should all be merged. 1 and 20 should remain in the original set.
  BoundedWindow[] expectedToBeMerged =
      new BoundedWindow[] {
        new IntervalWindow(new Instant(9L), new Instant(11L)),
        new IntervalWindow(new Instant(10L), new Instant(10L)),
        new IntervalWindow(new Instant(7L), new Instant(10L))
      };
  Iterable<BoundedWindow> expectedToBeUnmerged =
      Sets.newHashSet(
          new IntervalWindow(new Instant(1L), new Instant(1L)),
          new IntervalWindow(new Instant(20L), new Instant(20L)));
  KV<Object, Iterable<BoundedWindow>> input =
      KV.of(
          "abc",
          ImmutableList.<BoundedWindow>builder()
              .add(expectedToBeMerged)
              .addAll(expectedToBeUnmerged)
              .build());

  KV<Object, KV<Iterable<BoundedWindow>, Iterable<KV<BoundedWindow, Iterable<BoundedWindow>>>>>
      output = mapFunction.apply(input);
  assertEquals(input.getKey(), output.getKey());
  assertEquals(expectedToBeUnmerged, output.getValue().getKey());
  KV<BoundedWindow, Iterable<BoundedWindow>> mergedOutput =
      Iterables.getOnlyElement(output.getValue().getValue());
  assertEquals(new IntervalWindow(new Instant(7L), new Instant(11L)), mergedOutput.getKey());
  assertThat(mergedOutput.getValue(), containsInAnyOrder(expectedToBeMerged));
}
 
Example #19
Source File: SamzaTimerInternalsFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
private SamzaTimerInternalsFactory(
    Coder<K> keyCoder,
    Scheduler<KeyedTimerData<K>> timerRegistry,
    String timerStateId,
    SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory,
    Coder<BoundedWindow> windowCoder,
    IsBounded isBounded) {
  this.keyCoder = keyCoder;
  this.timerRegistry = timerRegistry;
  this.eventTimeTimers = new TreeSet<>();
  this.state = new SamzaTimerState(timerStateId, nonKeyedStateInternalsFactory, windowCoder);
  this.isBounded = isBounded;
}
 
Example #20
Source File: StreamingModeExecutionContext.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public <T, W extends BoundedWindow> void writePCollectionViewData(
    TupleTag<?> tag,
    Iterable<T> data,
    Coder<Iterable<T>> dataCoder,
    W window,
    Coder<W> windowCoder)
    throws IOException {
  throw new IllegalStateException("User DoFns cannot write PCollectionView data");
}
 
Example #21
Source File: StatefulDoFnRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
private void reportDroppedElement(WindowedValue<InputT> value, BoundedWindow window) {
  droppedDueToLateness.inc();
  WindowTracing.debug(
      "StatefulDoFnRunner.processElement: Dropping element at {}; window:{} "
          + "since too far behind inputWatermark:{}",
      value.getTimestamp(),
      window,
      stepContext.timerInternals().currentInputWatermarkTime());
}
 
Example #22
Source File: WatermarkCallbackExecutor.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <W extends BoundedWindow> WatermarkCallback afterWindowExpiration(
    BoundedWindow window, WindowingStrategy<?, W> strategy, Runnable callback) {
  // Fire one milli past the end of the window. This ensures that all window expiration
  // timers are delivered first
  Instant firingAfter = window.maxTimestamp().plus(strategy.getAllowedLateness()).plus(1L);
  return new WatermarkCallback(firingAfter, callback);
}
 
Example #23
Source File: DoFnOperator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public final void processWatermark2(Watermark mark) throws Exception {
  currentSideInputWatermark = mark.getTimestamp();
  if (mark.getTimestamp() >= BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()) {
    // this means we will never see any more side input
    emitAllPushedBackData();

    // maybe output a new watermark
    processWatermark1(new Watermark(currentInputWatermark));
  }
}
 
Example #24
Source File: WindowDoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private WindowDoFnOperator<Long, Long, Long> getWindowDoFnOperator() {
  WindowingStrategy<Object, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(FixedWindows.of(standardMinutes(1)));

  TupleTag<KV<Long, Long>> outputTag = new TupleTag<>("main-output");

  SystemReduceFn<Long, Long, long[], Long, BoundedWindow> reduceFn =
      SystemReduceFn.combining(
          VarLongCoder.of(),
          AppliedCombineFn.withInputCoder(
              Sum.ofLongs(),
              CoderRegistry.createDefault(),
              KvCoder.of(VarLongCoder.of(), VarLongCoder.of())));

  Coder<IntervalWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();
  SingletonKeyedWorkItemCoder<Long, Long> workItemCoder =
      SingletonKeyedWorkItemCoder.of(VarLongCoder.of(), VarLongCoder.of(), windowCoder);
  FullWindowedValueCoder<SingletonKeyedWorkItem<Long, Long>> inputCoder =
      WindowedValue.getFullCoder(workItemCoder, windowCoder);
  FullWindowedValueCoder<KV<Long, Long>> outputCoder =
      WindowedValue.getFullCoder(KvCoder.of(VarLongCoder.of(), VarLongCoder.of()), windowCoder);

  return new WindowDoFnOperator<Long, Long, Long>(
      reduceFn,
      "stepName",
      (Coder) inputCoder,
      outputTag,
      emptyList(),
      new MultiOutputOutputManagerFactory<>(outputTag, outputCoder),
      windowingStrategy,
      emptyMap(),
      emptyList(),
      PipelineOptionsFactory.as(FlinkPipelineOptions.class),
      VarLongCoder.of(),
      new WorkItemKeySelector(VarLongCoder.of()));
}
 
Example #25
Source File: ValueInSingleWindow.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public ValueInSingleWindow<T> decode(InputStream inStream, Context context) throws IOException {
  Instant timestamp = InstantCoder.of().decode(inStream);
  BoundedWindow window = windowCoder.decode(inStream);
  PaneInfo pane = PaneInfo.PaneInfoCoder.INSTANCE.decode(inStream);
  T value = valueCoder.decode(inStream, context);
  return new AutoValue_ValueInSingleWindow<>(value, timestamp, window, pane);
}
 
Example #26
Source File: GroupByKeyTranslatorBatch.java    From twister2 with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
  PCollection<KV<K, V>> input = context.getInput(transform);
  BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
  final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
  Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder();
  WindowingStrategy windowingStrategy = input.getWindowingStrategy();
  WindowFn<KV<K, V>, BoundedWindow> windowFn =
      (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
  final WindowedValue.WindowedValueCoder<V> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
  KeyedTSet<byte[], byte[]> keyedTSet =
      inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));

  // todo add support for a partition function to be specified, this would use
  // todo keyedPartition function instead of KeyedGather
  ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>>
      groupedbyKeyTset =
      keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));

  // --- now group also by window.
  ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>>
      outputTset =
      groupedbyKeyTset
          .direct()
          .<WindowedValue<KV<K, Iterable<V>>>>flatmap(
              new GroupByWindowFunction(
                  windowingStrategy,
                  SystemReduceFn.buffering(coder.getValueCoder())));
  PCollection output = context.getOutput(transform);
  context.setOutputDataSet(output, outputTset);
}
 
Example #27
Source File: StaticWindows.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Collection<BoundedWindow> assignWindows(AssignContext c) throws Exception {
  if (onlyExisting) {
    checkArgument(
        windows.get().contains(c.window()),
        "Tried to assign windows to an element that is not already windowed into a provided "
            + "window when onlyExisting is set to true");
    return Collections.singleton(c.window());
  } else {
    return getWindows();
  }
}
 
Example #28
Source File: PubsubUnboundedSource.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Instant getWatermark() {
  if (pubsubClient.get().isEOF() && notYetRead.isEmpty()) {
    // For testing only: Advance the watermark to the end of time to signal
    // the test is complete.
    return BoundedWindow.TIMESTAMP_MAX_VALUE;
  }

  // NOTE: We'll allow the watermark to go backwards. The underlying runner is responsible
  // for aggregating all reported watermarks and ensuring the aggregate is latched.
  // If we attempt to latch locally then it is possible a temporary starvation of one reader
  // could cause its estimated watermark to fast forward to current system time. Then when
  // the reader resumes its watermark would be unable to resume tracking.
  // By letting the underlying runner latch we avoid any problems due to localized starvation.
  long nowMsSinceEpoch = now();
  long readMin = minReadTimestampMsSinceEpoch.get(nowMsSinceEpoch);
  long unreadMin = minUnreadTimestampMsSinceEpoch.get();
  if (readMin == Long.MAX_VALUE
      && unreadMin == Long.MAX_VALUE
      && lastReceivedMsSinceEpoch >= 0
      && nowMsSinceEpoch > lastReceivedMsSinceEpoch + SAMPLE_PERIOD.getMillis()) {
    // We don't currently have any unread messages pending, we have not had any messages
    // read for a while, and we have not received any new messages from Pubsub for a while.
    // Advance watermark to current time.
    // TODO: Estimate a timestamp lag.
    lastWatermarkMsSinceEpoch = nowMsSinceEpoch;
  } else if (minReadTimestampMsSinceEpoch.isSignificant()
      || minUnreadTimestampMsSinceEpoch.isSignificant()) {
    // Take minimum of the timestamps in all unread messages and recently read messages.
    lastWatermarkMsSinceEpoch = Math.min(readMin, unreadMin);
  }
  // else: We're not confident enough to estimate a new watermark. Stick with the old one.
  minWatermarkMsSinceEpoch.add(nowMsSinceEpoch, lastWatermarkMsSinceEpoch);
  maxWatermarkMsSinceEpoch.add(nowMsSinceEpoch, lastWatermarkMsSinceEpoch);
  return new Instant(lastWatermarkMsSinceEpoch);
}
 
Example #29
Source File: StreamingPCollectionViewWriterParDoFn.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void processElement(Object element) throws Exception {
  WindowedValue<Iterable<Object>> elemsToWrite = (WindowedValue<Iterable<Object>>) element;
  BoundedWindow window = Iterables.getOnlyElement(elemsToWrite.getWindows());

  stepContext.writePCollectionViewData(
      viewTag, elemsToWrite.getValue(), IterableCoder.of(elemCoder), window, windowCoder);
}
 
Example #30
Source File: Read.java    From beam with Apache License 2.0 5 votes vote down vote up
private Instant ensureTimestampWithinBounds(Instant timestamp) {
  if (timestamp.isBefore(BoundedWindow.TIMESTAMP_MIN_VALUE)) {
    timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
  } else if (timestamp.isAfter(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
    timestamp = BoundedWindow.TIMESTAMP_MAX_VALUE;
  }
  return timestamp;
}