org.apache.beam.sdk.transforms.windowing.BoundedWindow Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.windowing.BoundedWindow.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StreamingDataflowWorkerTest.java From beam with Apache License 2.0 | 6 votes |
private ParallelInstruction makeSinkInstruction( String streamId, Coder<?> coder, int producerIndex, Coder<? extends BoundedWindow> windowCoder) { CloudObject spec = CloudObject.forClass(WindmillSink.class); addString(spec, "stream_id", streamId); return new ParallelInstruction() .setSystemName(DEFAULT_SINK_SYSTEM_NAME) .setOriginalName(DEFAULT_SINK_ORIGINAL_NAME) .setWrite( new WriteInstruction() .setInput( new InstructionInput() .setProducerInstructionIndex(producerIndex) .setOutputNum(0)) .setSink( new Sink() .setSpec(spec) .setCodec( CloudObjects.asCloudObject( WindowedValue.getFullCoder(coder, windowCoder), /*sdkComponents=*/ null)))); }
Example #2
Source File: FlinkStreamingSideInputHandlerFactory.java From beam with Apache License 2.0 | 6 votes |
@Override public <V, W extends BoundedWindow> IterableSideInputHandler<V, W> forIterableSideInput( String transformId, String sideInputId, Coder<V> elementCoder, Coder<W> windowCoder) { PCollectionView collectionNode = sideInputToCollection.get( SideInputId.newBuilder().setTransformId(transformId).setLocalName(sideInputId).build()); checkArgument(collectionNode != null, "No side input for %s/%s", transformId, sideInputId); return new IterableSideInputHandler<V, W>() { @Override public Iterable<V> get(W window) { return checkNotNull( (Iterable<V>) runnerHandler.getIterable(collectionNode, window), "Element processed by SDK before side input is ready"); } @Override public Coder<V> elementCoder() { return elementCoder; } }; }
Example #3
Source File: GroupAlsoByWindowViaWindowSetNewDoFn.java From beam with Apache License 2.0 | 6 votes |
public static <K, InputT, OutputT, W extends BoundedWindow> DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create( WindowingStrategy<?, W> strategy, StateInternalsFactory<K> stateInternalsFactory, TimerInternalsFactory<K> timerInternalsFactory, SideInputReader sideInputReader, SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn, DoFnRunners.OutputManager outputManager, TupleTag<KV<K, OutputT>> mainTag) { return new GroupAlsoByWindowViaWindowSetNewDoFn<>( strategy, stateInternalsFactory, timerInternalsFactory, sideInputReader, reduceFn, outputManager, mainTag); }
Example #4
Source File: DoFnOperator.java From beam with Apache License 2.0 | 6 votes |
private void earlyBindStateIfNeeded() throws IllegalArgumentException, IllegalAccessException { if (keyCoder != null) { if (doFn != null) { DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass()); FlinkStateInternals.EarlyBinder earlyBinder = new FlinkStateInternals.EarlyBinder(getKeyedStateBackend()); for (DoFnSignature.StateDeclaration value : signature.stateDeclarations().values()) { StateSpec<?> spec = (StateSpec<?>) signature.stateDeclarations().get(value.id()).field().get(doFn); spec.bind(value.id(), earlyBinder); } if (doFnRunner instanceof StatefulDoFnRunner) { ((StatefulDoFnRunner<InputT, OutputT, BoundedWindow>) doFnRunner) .getSystemStateTags() .forEach(tag -> tag.getSpec().bind(tag.getId(), earlyBinder)); } } } }
Example #5
Source File: SystemReduceFn.java From beam with Apache License 2.0 | 6 votes |
/** * Create a factory that produces {@link SystemReduceFn} instances that that buffer all of the * input values in persistent state and produces an {@code Iterable<T>}. */ public static <K, T, W extends BoundedWindow> SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W> buffering(final Coder<T> inputCoder) { final StateTag<BagState<T>> bufferTag = StateTags.makeSystemTagInternal(StateTags.bag(BUFFER_NAME, inputCoder)); return new SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W>(bufferTag) { @Override public void prefetchOnMerge(MergingStateAccessor<K, W> state) throws Exception { StateMerging.prefetchBags(state, bufferTag); } @Override public void onMerge(OnMergeContext c) throws Exception { StateMerging.mergeBags(c.state(), bufferTag); } }; }
Example #6
Source File: BoundedReadEvaluatorFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void getInitialInputsSplitsIntoBundles() throws Exception { when(context.createRootBundle()).thenAnswer(invocation -> bundleFactory.createRootBundle()); Collection<CommittedBundle<?>> initialInputs = new BoundedReadEvaluatorFactory.InputProvider(context, options) .getInitialInputs(longsProducer, 3); assertThat(initialInputs, hasSize(allOf(greaterThanOrEqualTo(3), lessThanOrEqualTo(4)))); Collection<BoundedSource<Long>> sources = new ArrayList<>(); for (CommittedBundle<?> initialInput : initialInputs) { Iterable<WindowedValue<BoundedSourceShard<Long>>> shards = (Iterable) initialInput.getElements(); WindowedValue<BoundedSourceShard<Long>> shard = Iterables.getOnlyElement(shards); assertThat(shard.getWindows(), Matchers.contains(GlobalWindow.INSTANCE)); assertThat(shard.getTimestamp(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE)); sources.add(shard.getValue().getSource()); } SourceTestUtils.assertSourcesEqualReferenceSource( source, (List<? extends BoundedSource<Long>>) sources, PipelineOptionsFactory.create()); }
Example #7
Source File: DoFnOp.java From beam with Apache License 2.0 | 6 votes |
private void fireTimer(KeyedTimerData<?> keyedTimerData) { final TimerInternals.TimerData timer = keyedTimerData.getTimerData(); LOG.debug("Firing timer {}", timer); final StateNamespace namespace = timer.getNamespace(); // NOTE: not sure why this is safe, but DoFnOperator makes this assumption final BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow(); if (fnRunner instanceof DoFnRunnerWithKeyedInternals) { // Need to pass in the keyed TimerData here ((DoFnRunnerWithKeyedInternals) fnRunner).onTimer(keyedTimerData, window); } else { pushbackFnRunner.onTimer( timer.getTimerId(), timer.getTimerFamilyId(), null, window, timer.getTimestamp(), timer.getOutputTimestamp(), timer.getDomain()); } }
Example #8
Source File: TimestampPolicyFactory.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public Instant getTimestampForRecord(PartitionContext context, KafkaRecord<K, V> record) { if (record.getTimestampType().equals(KafkaTimestampType.LOG_APPEND_TIME)) { currentWatermark = new Instant(record.getTimestamp()); } else if (currentWatermark.equals(BoundedWindow.TIMESTAMP_MIN_VALUE)) { // This is the first record and it does not have LOG_APPEND_TIME. // Most likely the topic is not configured correctly. throw new IllegalStateException( String.format( "LogAppendTimePolicy policy is enabled in reader, but Kafka record's timestamp type " + "is LogAppendTime. Most likely it is not enabled on Kafka for the topic '%s'. " + "Actual timestamp type is '%s'.", record.getTopic(), record.getTimestampType())); } return currentWatermark; }
Example #9
Source File: WriteFilesTest.java From beam with Apache License 2.0 | 6 votes |
@Override public ResourceId windowedFilename( int shardNumber, int numShards, BoundedWindow window, PaneInfo paneInfo, OutputFileHints outputFileHints) { DecimalFormat df = new DecimalFormat("0000"); IntervalWindow intervalWindow = (IntervalWindow) window; String filename = String.format( "%s-%s-of-%s%s%s", filenamePrefixForWindow(intervalWindow), df.format(shardNumber), df.format(numShards), outputFileHints.getSuggestedFilenameSuffix(), suffix); return baseFilename .getCurrentDirectory() .resolve(filename, StandardResolveOptions.RESOLVE_FILE); }
Example #10
Source File: TimerReceiverFactory.java From beam with Apache License 2.0 | 6 votes |
public <K> FnDataReceiver<Timer<K>> create(String transformId, String timerFamilyId) { final ProcessBundleDescriptors.TimerSpec timerSpec = transformAndTimerIdToSpecMap.get(KV.of(transformId, timerFamilyId)); return receivedElement -> { Timer timer = checkNotNull( receivedElement, "Received null Timer from SDK harness: %s", receivedElement); LOG.debug("Timer received: {}", timer); for (Object window : timer.getWindows()) { StateNamespace namespace = StateNamespaces.window(windowCoder, (BoundedWindow) window); TimerInternals.TimerData timerData = TimerInternals.TimerData.of( encodeToTimerDataTimerId(timerSpec.transformId(), timerSpec.timerId()), namespace, timer.getClearBit() ? BoundedWindow.TIMESTAMP_MAX_VALUE : timer.getFireTimestamp(), timer.getClearBit() ? BoundedWindow.TIMESTAMP_MAX_VALUE : timer.getHoldTimestamp(), timerSpec.getTimerSpec().getTimeDomain()); timerDataConsumer.accept(timer, timerData); } }; }
Example #11
Source File: StatefulDoFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
private static void advanceInputWatermark( InMemoryTimerInternals timerInternals, Instant newInputWatermark, DoFnRunner<?, ?> toTrigger) throws Exception { timerInternals.advanceInputWatermark(newInputWatermark); TimerInternals.TimerData timer; while ((timer = timerInternals.removeNextEventTimer()) != null) { StateNamespace namespace = timer.getNamespace(); checkArgument(namespace instanceof StateNamespaces.WindowNamespace); BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow(); toTrigger.onTimer( timer.getTimerId(), timer.getTimerFamilyId(), null, window, timer.getTimestamp(), timer.getOutputTimestamp(), timer.getDomain()); } }
Example #12
Source File: StreamingModeExecutionContext.java From beam with Apache License 2.0 | 5 votes |
/** * Fetches the requested sideInput, and maintains a view of the cache that doesn't remove items * until the active work item is finished. * * <p>If the side input was not ready, throws {@code IllegalStateException} if the state is * {@literal CACHED_IN_WORKITEM} or returns null otherwise. * * <p>If the side input was ready and null, returns {@literal Optional.absent()}. If the side * input was ready and non-null returns {@literal Optional.present(...)}. */ @Nullable private <T> Optional<T> fetchSideInput( PCollectionView<T> view, BoundedWindow sideInputWindow, String stateFamily, StateFetcher.SideInputState state, Supplier<Closeable> scopedReadStateSupplier) { Map<BoundedWindow, Object> tagCache = sideInputCache.get(view.getTagInternal()); if (tagCache == null) { tagCache = new HashMap<>(); sideInputCache.put(view.getTagInternal(), tagCache); } if (tagCache.containsKey(sideInputWindow)) { @SuppressWarnings("unchecked") T typed = (T) tagCache.get(sideInputWindow); return Optional.fromNullable(typed); } else { if (state == StateFetcher.SideInputState.CACHED_IN_WORKITEM) { throw new IllegalStateException( "Expected side input to be cached. Tag: " + view.getTagInternal().getId()); } Optional<T> fetched = stateFetcher.fetchSideInput( view, sideInputWindow, stateFamily, state, scopedReadStateSupplier); if (fetched != null) { tagCache.put(sideInputWindow, fetched.orNull()); } return fetched; } }
Example #13
Source File: SimplePushbackSideInputDoFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({ValidatesRunner.class}) public void testLateDroppingForStatefulDoFnRunner() throws Exception { MetricsContainerImpl container = new MetricsContainerImpl("any"); MetricsEnvironment.setCurrentContainer(container); timerInternals.advanceInputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE)); timerInternals.advanceOutputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE)); PushbackSideInputDoFnRunner runner = createRunner(statefulRunner, ImmutableList.of(singletonView)); runner.startBundle(); when(reader.isReady(Mockito.eq(singletonView), Mockito.any(BoundedWindow.class))) .thenReturn(true); WindowedValue<Integer> multiWindow = WindowedValue.of( 1, new Instant(0), ImmutableList.of(new IntervalWindow(new Instant(0), new Instant(0L + WINDOW_SIZE))), PaneInfo.ON_TIME_AND_ONLY_FIRING); runner.processElementInReadyWindows(multiWindow); long droppedValues = container .getCounter( MetricName.named( StatefulDoFnRunner.class, StatefulDoFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER)) .getCumulative(); assertEquals(1L, droppedValues); runner.finishBundle(); }
Example #14
Source File: SimpleDoFnRunner.java From beam with Apache License 2.0 | 5 votes |
public TimerInternalsTimer( BoundedWindow window, StateNamespace namespace, String timerId, TimerSpec spec, Instant elementInputTimestamp, TimerInternals timerInternals) { this.window = window; this.namespace = namespace; this.timerId = timerId; this.timerFamilyId = ""; this.spec = spec; this.elementInputTimestamp = elementInputTimestamp; this.timerInternals = timerInternals; }
Example #15
Source File: GlobalCombineFnRunners.java From beam with Apache License 2.0 | 5 votes |
@Override public AccumT addInput( AccumT accumulator, InputT input, PipelineOptions options, SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) { return combineFn.addInput(accumulator, input); }
Example #16
Source File: StateRequestHandlers.java From beam with Apache License 2.0 | 5 votes |
private <K, V, W extends BoundedWindow> BagUserStateHandler<K, V, W> createHandler( BagUserStateSpec cacheKey) { return handlerFactory.forUserState( cacheKey.transformId(), cacheKey.userStateId(), cacheKey.keyCoder(), cacheKey.valueCoder(), cacheKey.windowCoder()); }
Example #17
Source File: GlobalCombineFnRunners.java From beam with Apache License 2.0 | 5 votes |
@Override public AccumT compact( AccumT accumulator, PipelineOptions options, SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) { return combineFn.compact(accumulator); }
Example #18
Source File: DataflowSideInputHandlerFactory.java From beam with Apache License 2.0 | 5 votes |
@Override public Iterable<K> get(W window) { Materializations.MultimapView<K, V> sideInput = (Materializations.MultimapView<K, V>) sideInputReader.get(view, (BoundedWindow) windowCoder.structuralValue(window)); return sideInput.get(); }
Example #19
Source File: SimpleParDoFn.java From beam with Apache License 2.0 | 5 votes |
private void processUserTimer(TimerData timer) throws Exception { if (fnSignature.timerDeclarations().containsKey(timer.getTimerId()) || fnSignature.timerFamilyDeclarations().containsKey(timer.getTimerFamilyId())) { BoundedWindow window = ((WindowNamespace) timer.getNamespace()).getWindow(); fnRunner.onTimer( timer.getTimerId(), timer.getTimerFamilyId(), this.stepContext.stateInternals().getKey(), window, timer.getTimestamp(), timer.getOutputTimestamp(), timer.getDomain()); } }
Example #20
Source File: NonEmptyPanes.java From beam with Apache License 2.0 | 5 votes |
static <K, W extends BoundedWindow> NonEmptyPanes<K, W> create( WindowingStrategy<?, W> strategy, ReduceFn<K, ?, ?, W> reduceFn) { if (strategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) { return new DiscardingModeNonEmptyPanes<>(reduceFn); } else { return new GeneralNonEmptyPanes<>(); } }
Example #21
Source File: StaticWindows.java From beam with Apache License 2.0 | 5 votes |
public static <W extends BoundedWindow> StaticWindows of(Coder<W> coder, Iterable<W> windows) { checkArgument(!Iterables.isEmpty(windows), "Input windows to StaticWindows may not be empty"); @SuppressWarnings("unchecked") StaticWindows windowFn = new StaticWindows( WindowSupplier.of((Coder<BoundedWindow>) coder, (Iterable<BoundedWindow>) windows), (Coder<BoundedWindow>) coder, false); return windowFn; }
Example #22
Source File: StreamingModeExecutionContext.java From beam with Apache License 2.0 | 5 votes |
@Override public <T, W extends BoundedWindow> void writePCollectionViewData( TupleTag<?> tag, Iterable<T> data, Coder<Iterable<T>> dataCoder, W window, Coder<W> windowCoder) throws IOException { throw new IllegalStateException("User DoFns cannot write PCollectionView data"); }
Example #23
Source File: ValueInSingleWindow.java From beam with Apache License 2.0 | 5 votes |
@Override public ValueInSingleWindow<T> decode(InputStream inStream, Context context) throws IOException { Instant timestamp = InstantCoder.of().decode(inStream); BoundedWindow window = windowCoder.decode(inStream); PaneInfo pane = PaneInfo.PaneInfoCoder.INSTANCE.decode(inStream); T value = valueCoder.decode(inStream, context); return new AutoValue_ValueInSingleWindow<>(value, timestamp, window, pane); }
Example #24
Source File: Read.java From beam with Apache License 2.0 | 5 votes |
private Instant ensureTimestampWithinBounds(Instant timestamp) { if (timestamp.isBefore(BoundedWindow.TIMESTAMP_MIN_VALUE)) { timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE; } else if (timestamp.isAfter(BoundedWindow.TIMESTAMP_MAX_VALUE)) { timestamp = BoundedWindow.TIMESTAMP_MAX_VALUE; } return timestamp; }
Example #25
Source File: StatefulParDoEvaluatorFactory.java From beam with Apache License 2.0 | 5 votes |
static <K, InputT, OutputT> AppliedPTransformOutputKeyAndWindow<K, InputT, OutputT> create( AppliedPTransform< PCollection<? extends KeyedWorkItem<K, KV<K, InputT>>>, PCollectionTuple, StatefulParDo<K, InputT, OutputT>> transform, StructuralKey<K> key, BoundedWindow w) { return new AutoValue_StatefulParDoEvaluatorFactory_AppliedPTransformOutputKeyAndWindow<>( transform, key, w); }
Example #26
Source File: StreamingPCollectionViewWriterParDoFn.java From beam with Apache License 2.0 | 5 votes |
@Override public void processElement(Object element) throws Exception { WindowedValue<Iterable<Object>> elemsToWrite = (WindowedValue<Iterable<Object>>) element; BoundedWindow window = Iterables.getOnlyElement(elemsToWrite.getWindows()); stepContext.writePCollectionViewData( viewTag, elemsToWrite.getValue(), IterableCoder.of(elemCoder), window, windowCoder); }
Example #27
Source File: GroupByKeyTranslatorBatch.java From twister2 with Apache License 2.0 | 5 votes |
@Override public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) { PCollection<KV<K, V>> input = context.getInput(transform); BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input); final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder(); Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder(); WindowingStrategy windowingStrategy = input.getWindowingStrategy(); WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn(); final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder()); KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder)); // todo add support for a partition function to be specified, this would use // todo keyedPartition function instead of KeyedGather ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder)); // --- now group also by window. ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>> outputTset = groupedbyKeyTset .direct() .<WindowedValue<KV<K, Iterable<V>>>>flatmap( new GroupByWindowFunction( windowingStrategy, SystemReduceFn.buffering(coder.getValueCoder()))); PCollection output = context.getOutput(transform); context.setOutputDataSet(output, outputTset); }
Example #28
Source File: StatefulDoFnRunner.java From beam with Apache License 2.0 | 5 votes |
private void reportDroppedElement(WindowedValue<InputT> value, BoundedWindow window) { droppedDueToLateness.inc(); WindowTracing.debug( "StatefulDoFnRunner.processElement: Dropping element at {}; window:{} " + "since too far behind inputWatermark:{}", value.getTimestamp(), window, stepContext.timerInternals().currentInputWatermarkTime()); }
Example #29
Source File: WatermarkCallbackExecutor.java From beam with Apache License 2.0 | 5 votes |
public static <W extends BoundedWindow> WatermarkCallback afterWindowExpiration( BoundedWindow window, WindowingStrategy<?, W> strategy, Runnable callback) { // Fire one milli past the end of the window. This ensures that all window expiration // timers are delivered first Instant firingAfter = window.maxTimestamp().plus(strategy.getAllowedLateness()).plus(1L); return new WatermarkCallback(firingAfter, callback); }
Example #30
Source File: WindowDoFnOperatorTest.java From beam with Apache License 2.0 | 5 votes |
private WindowDoFnOperator<Long, Long, Long> getWindowDoFnOperator() { WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(standardMinutes(1))); TupleTag<KV<Long, Long>> outputTag = new TupleTag<>("main-output"); SystemReduceFn<Long, Long, long[], Long, BoundedWindow> reduceFn = SystemReduceFn.combining( VarLongCoder.of(), AppliedCombineFn.withInputCoder( Sum.ofLongs(), CoderRegistry.createDefault(), KvCoder.of(VarLongCoder.of(), VarLongCoder.of()))); Coder<IntervalWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder(); SingletonKeyedWorkItemCoder<Long, Long> workItemCoder = SingletonKeyedWorkItemCoder.of(VarLongCoder.of(), VarLongCoder.of(), windowCoder); FullWindowedValueCoder<SingletonKeyedWorkItem<Long, Long>> inputCoder = WindowedValue.getFullCoder(workItemCoder, windowCoder); FullWindowedValueCoder<KV<Long, Long>> outputCoder = WindowedValue.getFullCoder(KvCoder.of(VarLongCoder.of(), VarLongCoder.of()), windowCoder); return new WindowDoFnOperator<Long, Long, Long>( reduceFn, "stepName", (Coder) inputCoder, outputTag, emptyList(), new MultiOutputOutputManagerFactory<>(outputTag, outputCoder), windowingStrategy, emptyMap(), emptyList(), PipelineOptionsFactory.as(FlinkPipelineOptions.class), VarLongCoder.of(), new WorkItemKeySelector(VarLongCoder.of())); }