org.apache.beam.sdk.util.WindowedValue Java Examples

The following examples show how to use org.apache.beam.sdk.util.WindowedValue. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GroupCombineFunctions.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * An implementation of {@link
 * org.apache.beam.runners.core.GroupByKeyViaGroupByKeyOnly.GroupByKeyOnly} for the Spark runner.
 */
public static <K, V> JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupByKeyOnly(
    JavaRDD<WindowedValue<KV<K, V>>> rdd,
    Coder<K> keyCoder,
    WindowedValueCoder<V> wvCoder,
    @Nullable Partitioner partitioner) {
  // we use coders to convert objects in the PCollection to byte arrays, so they
  // can be transferred over the network for the shuffle.
  JavaPairRDD<ByteArray, byte[]> pairRDD =
      rdd.map(new ReifyTimestampsAndWindowsFunction<>())
          .mapToPair(TranslationUtils.toPairFunction())
          .mapToPair(CoderHelpers.toByteFunction(keyCoder, wvCoder));

  // If no partitioner is passed, the default group by key operation is called
  JavaPairRDD<ByteArray, Iterable<byte[]>> groupedRDD =
      (partitioner != null) ? pairRDD.groupByKey(partitioner) : pairRDD.groupByKey();

  return groupedRDD
      .mapToPair(CoderHelpers.fromByteFunctionIterable(keyCoder, wvCoder))
      .map(new TranslationUtils.FromPairFunction<>());
}
 
Example #2
Source File: BatchGroupAlsoByWindowReshuffleDoFnTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <K, InputT, OutputT, W extends BoundedWindow>
    DoFnRunner<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> makeRunner(
        GroupAlsoByWindowDoFnFactory<K, InputT, OutputT> fnFactory,
        WindowingStrategy<?, W> windowingStrategy,
        TupleTag<KV<K, OutputT>> outputTag,
        DoFnRunners.OutputManager outputManager) {

  final StepContext stepContext = new TestStepContext(STEP_NAME);

  StateInternalsFactory<K> stateInternalsFactory = key -> stepContext.stateInternals();

  BatchGroupAlsoByWindowFn<K, InputT, OutputT> fn =
      fnFactory.forStrategy(windowingStrategy, stateInternalsFactory);

  return new GroupAlsoByWindowFnRunner<>(
      PipelineOptionsFactory.create(),
      fn,
      NullSideInputReader.empty(),
      outputManager,
      outputTag,
      stepContext);
}
 
Example #3
Source File: BufferingDoFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static BufferingDoFnRunner createBufferingDoFnRunner(
    int concurrentCheckpoints,
    List<BufferingDoFnRunner.CheckpointIdentifier> notYetAcknowledgeCheckpoints)
    throws Exception {
  DoFnRunner doFnRunner = Mockito.mock(DoFnRunner.class);
  OperatorStateBackend operatorStateBackend = Mockito.mock(OperatorStateBackend.class);

  // Setup not yet acknowledged checkpoint union list state
  ListState unionListState = Mockito.mock(ListState.class);
  Mockito.when(operatorStateBackend.getUnionListState(Mockito.any())).thenReturn(unionListState);
  Mockito.when(unionListState.get()).thenReturn(notYetAcknowledgeCheckpoints);

  // Setup buffer list state
  Mockito.when(operatorStateBackend.getListState(Mockito.any()))
      .thenReturn(Mockito.mock(ListState.class));

  return BufferingDoFnRunner.create(
      doFnRunner,
      "stable-input",
      StringUtf8Coder.of(),
      WindowedValue.getFullCoder(VarIntCoder.of(), GlobalWindow.Coder.INSTANCE),
      operatorStateBackend,
      null,
      concurrentCheckpoints);
}
 
Example #4
Source File: BoundedReadEvaluatorFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public Collection<CommittedBundle<BoundedSourceShard<T>>> getInitialInputs(
    AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> transform,
    int targetParallelism)
    throws Exception {
  BoundedSource<T> source = ReadTranslation.boundedSourceFromTransform(transform);
  long estimatedBytes = source.getEstimatedSizeBytes(options);
  long bytesPerBundle = estimatedBytes / targetParallelism;
  List<? extends BoundedSource<T>> bundles = source.split(bytesPerBundle, options);
  ImmutableList.Builder<CommittedBundle<BoundedSourceShard<T>>> shards =
      ImmutableList.builder();
  for (BoundedSource<T> bundle : bundles) {
    CommittedBundle<BoundedSourceShard<T>> inputShard =
        evaluationContext
            .<BoundedSourceShard<T>>createRootBundle()
            .add(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(bundle)))
            .commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
    shards.add(inputShard);
  }
  return shards.build();
}
 
Example #5
Source File: GroupByKeyOnlyEvaluatorFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public TransformResult<KV<K, V>> finishBundle() {
  StepTransformResult.Builder resultBuilder = StepTransformResult.withoutHold(application);
  for (Map.Entry<StructuralKey<K>, List<WindowedValue<V>>> groupedEntry :
      groupingMap.entrySet()) {
    K key = groupedEntry.getKey().getKey();
    KeyedWorkItem<K, V> groupedKv =
        KeyedWorkItems.elementsWorkItem(key, groupedEntry.getValue());
    UncommittedBundle<KeyedWorkItem<K, V>> bundle =
        evaluationContext.createKeyedBundle(
            StructuralKey.of(key, keyCoder),
            (PCollection<KeyedWorkItem<K, V>>)
                Iterables.getOnlyElement(application.getOutputs().values()));
    bundle.add(WindowedValue.valueInGlobalWindow(groupedKv));
    resultBuilder.addOutput(bundle);
  }
  return resultBuilder.build();
}
 
Example #6
Source File: MultiStepCombine.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void processElement(WindowedValue<KV<K, Iterable<AccumT>>> element) throws Exception {
  checkState(
      element.getWindows().size() == 1,
      "Expected inputs to %s to be in exactly one window. Got %s",
      MergeAccumulatorsAndExtractOutputEvaluator.class.getSimpleName(),
      element.getWindows().size());
  Iterable<AccumT> inputAccumulators = element.getValue().getValue();
  try {
    AccumT first = combineFn.createAccumulator();
    AccumT merged =
        combineFn.mergeAccumulators(
            Iterables.concat(
                Collections.singleton(first),
                inputAccumulators,
                Collections.singleton(combineFn.createAccumulator())));
    OutputT extracted = combineFn.extractOutput(merged);
    output.add(element.withValue(KV.of(element.getValue().getKey(), extracted)));
  } catch (Exception e) {
    throw UserCodeException.wrap(e);
  }
}
 
Example #7
Source File: IsmSideInputReaderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Note that it is important that the return value if split is only split on shard boundaries
 * because it is expected that each shard id only appears in one source.
 *
 * <p>Each windowed value is expected to be within the same window.
 */
<K, V> Multimap<Integer, IsmRecord<WindowedValue<V>>> forMap(
    IsmRecordCoder<WindowedValue<V>> coder, ListMultimap<K, WindowedValue<V>> elements)
    throws Exception {

  Multimap<Integer, IsmRecord<WindowedValue<V>>> rval =
      TreeMultimap.create(Ordering.natural(), new IsmReaderTest.IsmRecordKeyComparator<>(coder));

  for (K key : elements.keySet()) {
    long i = 0;
    for (WindowedValue<V> value : elements.get(key)) {
      IsmRecord<WindowedValue<V>> record =
          IsmRecord.of(ImmutableList.of(key, windowOf(value), i), value);
      rval.put(coder.hash(record.getKeyComponents()), record);
      i += 1L;
    }
  }

  return rval;
}
 
Example #8
Source File: BeamBoundedSourceVertex.java    From incubator-nemo with Apache License 2.0 6 votes vote down vote up
@Override
public List<Readable<WindowedValue<O>>> getReadables(final int desiredNumOfSplits) throws Exception {
  final List<Readable<WindowedValue<O>>> readables = new ArrayList<>();

  if (source != null) {
    LOG.info("estimate: {}", source.getEstimatedSizeBytes(null));
    LOG.info("desired: {}", desiredNumOfSplits);
    source.split(this.estimatedSizeBytes / desiredNumOfSplits, null)
      .forEach(boundedSource -> readables.add(new BoundedSourceReadable<>(boundedSource)));
    return readables;
  } else {
    // TODO #333: Remove SourceVertex#clearInternalStates
    final SourceVertex emptySourceVertex = new EmptyComponents.EmptySourceVertex("EMPTY");
    return emptySourceVertex.getReadables(desiredNumOfSplits);
  }
}
 
Example #9
Source File: TranslationContext.java    From beam with Apache License 2.0 6 votes vote down vote up
/** The dummy stream created will only be used in Beam tests. */
private static InputDescriptor<OpMessage<String>, ?> createDummyStreamDescriptor(String id) {
  final GenericSystemDescriptor dummySystem =
      new GenericSystemDescriptor(id, InMemorySystemFactory.class.getName());
  final GenericInputDescriptor<OpMessage<String>> dummyInput =
      dummySystem.getInputDescriptor(id, new NoOpSerde<>());
  dummyInput.withOffsetDefault(SystemStreamMetadata.OffsetType.OLDEST);
  final Config config = new MapConfig(dummyInput.toConfig(), dummySystem.toConfig());
  final SystemFactory factory = new InMemorySystemFactory();
  final StreamSpec dummyStreamSpec = new StreamSpec(id, id, id, 1);
  factory.getAdmin(id, config).createStream(dummyStreamSpec);

  final SystemProducer producer = factory.getProducer(id, config, null);
  final SystemStream sysStream = new SystemStream(id, id);
  final Consumer<Object> sendFn =
      (msg) -> {
        producer.send(id, new OutgoingMessageEnvelope(sysStream, 0, null, msg));
      };
  final WindowedValue<String> windowedValue =
      WindowedValue.timestampedValueInGlobalWindow("dummy", new Instant());

  sendFn.accept(OpMessage.ofElement(windowedValue));
  sendFn.accept(new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
  sendFn.accept(new EndOfStreamMessage(null));
  return dummyInput;
}
 
Example #10
Source File: WindmillKeyedWorkItem.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public Iterable<WindowedValue<ElemT>> elementsIterable() {
  return FluentIterable.from(workItem.getMessageBundlesList())
      .transformAndConcat(Windmill.InputMessageBundle::getMessagesList)
      .transform(
          message -> {
            try {
              Instant timestamp =
                  WindmillTimeUtils.windmillToHarnessTimestamp(message.getTimestamp());
              Collection<? extends BoundedWindow> windows =
                  WindmillSink.decodeMetadataWindows(windowsCoder, message.getMetadata());
              PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());

              InputStream inputStream = message.getData().newInput();
              ElemT value = valueCoder.decode(inputStream, Coder.Context.OUTER);
              return WindowedValue.of(value, timestamp, windows, pane);
            } catch (IOException e) {
              throw new RuntimeException(e);
            }
          });
}
 
Example #11
Source File: GroupNonMergingWindowsFunctionsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private <W extends BoundedWindow> GroupByKeyIterator<String, Integer, W> createGbkIterator(
    W window, Coder<W> winCoder, WindowingStrategy<Object, W> winStrategy)
    throws Coder.NonDeterministicException {

  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  final WindowedValue.FullWindowedValueCoder<KV<String, Integer>> winValCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
          winStrategy.getWindowFn().windowCoder());

  ItemFactory<String, Integer, W> factory =
      ItemFactory.forWindow(keyCoder, winValCoder, winCoder, window);
  List<Tuple2<ByteArray, byte[]>> items =
      Arrays.asList(
          factory.create("k1", 1),
          factory.create("k1", 2),
          factory.create("k2", 3),
          factory.create("k2", 4),
          factory.create("k2", 5));
  return new GroupByKeyIterator<>(items.iterator(), keyCoder, winStrategy, winValCoder);
}
 
Example #12
Source File: AssignWindowsFunction.java    From twister2 with Apache License 2.0 6 votes vote down vote up
@Override
public void compute(Iterator<WindowedValue<T>> input, RecordCollector<WindowedValue<T>> output) {
  WindowedValue<T> element;
  try {
    while (input.hasNext()) {
      element = input.next();
      Collection<BoundedWindow> windows =
          windowFn.assignWindows(new Twister2AssignContext<>(windowFn, element));

      for (BoundedWindow window : windows) {
        output.collect(
            WindowedValue.of(
                element.getValue(), element.getTimestamp(), window, element.getPane()));
      }
    }
  } catch (Exception e) {
    LOG.info(e.getMessage());
  }
}
 
Example #13
Source File: BoundedReadEvaluatorFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void boundedSourceEvaluatorNoElementsClosesReader() throws Exception {
  TestSource<Long> source = new TestSource<>(BigEndianLongCoder.of());

  PCollection<Long> pcollection = p.apply(Read.from(source));
  AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getProducer(pcollection);

  UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection);
  when(context.createBundle(pcollection)).thenReturn(output);

  TransformEvaluator<BoundedSourceShard<Long>> evaluator =
      factory.forApplication(
          sourceTransform, bundleFactory.createRootBundle().commit(Instant.now()));
  evaluator.processElement(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(source)));
  evaluator.finishBundle();
  CommittedBundle<Long> committed = output.commit(Instant.now());
  assertThat(committed.getElements(), emptyIterable());
  assertThat(TestSource.readerClosed, is(true));
}
 
Example #14
Source File: ShuffleSinkFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
void runTestCreateGroupingSortingShuffleSink(
    byte[] shuffleWriterConfig, Coder<?> keyCoder, Coder<?> sortKeyCoder, Coder<?> sortValueCoder)
    throws Exception {
  FullWindowedValueCoder<?> coder =
      WindowedValue.getFullCoder(
          KvCoder.of(keyCoder, KvCoder.of(sortKeyCoder, sortValueCoder)),
          IntervalWindow.getCoder());
  ShuffleSink shuffleSink =
      runTestCreateShuffleSinkHelper(
          shuffleWriterConfig, "group_keys_and_sort_values", coder, coder);
  Assert.assertEquals(
      ShuffleSink.ShuffleKind.GROUP_KEYS_AND_SORT_VALUES, shuffleSink.shuffleKind);
  Assert.assertTrue(shuffleSink.shardByKey);
  Assert.assertTrue(shuffleSink.groupValues);
  Assert.assertTrue(shuffleSink.sortValues);
  Assert.assertEquals(keyCoder, shuffleSink.keyCoder);
  Assert.assertEquals(KvCoder.of(sortKeyCoder, sortValueCoder), shuffleSink.valueCoder);
  Assert.assertEquals(sortKeyCoder, shuffleSink.sortKeyCoder);
  Assert.assertEquals(sortValueCoder, shuffleSink.sortValueCoder);
  Assert.assertNull(shuffleSink.windowedValueCoder);
}
 
Example #15
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
private void translateImpulse(
    String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
  RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id);

  TypeInformation<WindowedValue<byte[]>> typeInfo =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE));

  long shutdownAfterIdleSourcesMs = context.getPipelineOptions().getShutdownSourcesAfterIdleMs();
  SingleOutputStreamOperator<WindowedValue<byte[]>> source =
      context
          .getExecutionEnvironment()
          .addSource(new ImpulseSourceFunction(shutdownAfterIdleSourcesMs), "Impulse")
          .returns(typeInfo);

  context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), source);
}
 
Example #16
Source File: UnboundedDataset.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public void cache(String storageLevel, Coder<?> coder) {
  // we "force" MEMORY storage level in streaming
  if (!StorageLevel.fromString(storageLevel).equals(StorageLevel.MEMORY_ONLY_SER())) {
    LOG.warn(
        "Provided StorageLevel: {} is ignored for streams, using the default level: {}",
        storageLevel,
        StorageLevel.MEMORY_ONLY_SER());
  }
  // Caching can cause Serialization, we need to code to bytes
  // more details in https://issues.apache.org/jira/browse/BEAM-2669
  Coder<WindowedValue<T>> wc = (Coder<WindowedValue<T>>) coder;
  this.dStream =
      dStream.map(CoderHelpers.toByteFunction(wc)).cache().map(CoderHelpers.fromByteFunction(wc));
}
 
Example #17
Source File: FnApiDoFnRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
private void processElementForWindowObservingSplitRestriction(
    WindowedValue<KV<InputT, KV<RestrictionT, WatermarkEstimatorStateT>>> elem) {
  currentElement = elem.withValue(elem.getValue().getKey());
  currentRestriction = elem.getValue().getValue().getKey();
  currentWatermarkEstimatorState = elem.getValue().getValue().getValue();
  try {
    Iterator<BoundedWindow> windowIterator =
        (Iterator<BoundedWindow>) elem.getWindows().iterator();
    while (windowIterator.hasNext()) {
      currentWindow = windowIterator.next();
      doFnInvoker.invokeSplitRestriction(processContext);
    }
  } finally {
    currentElement = null;
    currentRestriction = null;
    currentWatermarkEstimatorState = null;
    currentWindow = null;
  }

  // TODO(BEAM-10212): Support caching state data across bundle boundaries.
  this.stateAccessor.finalizeState();
}
 
Example #18
Source File: FlinkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
private static void translateImpulse(
    PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) {
  TypeInformation<WindowedValue<byte[]>> typeInformation =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE));
  DataSource<WindowedValue<byte[]>> dataSource =
      new DataSource<>(
              context.getExecutionEnvironment(),
              new ImpulseInputFormat(),
              typeInformation,
              transform.getTransform().getUniqueName())
          .name("Impulse");

  context.addDataSet(
      Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), dataSource);
}
 
Example #19
Source File: BatchStatefulParDoOverrides.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<OutputT> expand(PCollection<KV<K, InputT>> input) {
  DoFn<KV<K, InputT>, OutputT> fn = originalParDo.getFn();
  verifyFnIsStateful(fn);
  DataflowRunner.verifyDoFnSupportedBatch(fn);
  DataflowRunner.verifyStateSupportForWindowingStrategy(input.getWindowingStrategy());

  if (isFnApi) {
    return input.apply(Reshuffle.of()).apply(originalParDo);
  }

  PTransform<
          PCollection<? extends KV<K, Iterable<KV<Instant, WindowedValue<KV<K, InputT>>>>>>,
          PCollection<OutputT>>
      statefulParDo =
          ParDo.of(new BatchStatefulDoFn<>(fn)).withSideInputs(originalParDo.getSideInputs());

  return input.apply(new GbkBeforeStatefulParDo<>()).apply(statefulParDo);
}
 
Example #20
Source File: SparkAssignWindowFn.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public WindowedValue<T> call(WindowedValue<T> windowedValue) throws Exception {
  final BoundedWindow boundedWindow = Iterables.getOnlyElement(windowedValue.getWindows());
  final T element = windowedValue.getValue();
  final Instant timestamp = windowedValue.getTimestamp();
  Collection<W> windows =
      ((WindowFn<T, W>) fn)
          .assignWindows(
              ((WindowFn<T, W>) fn).new AssignContext() {
                @Override
                public T element() {
                  return element;
                }

                @Override
                public Instant timestamp() {
                  return timestamp;
                }

                @Override
                public BoundedWindow window() {
                  return boundedWindow;
                }
              });
  return WindowedValue.of(element, timestamp, windows, PaneInfo.NO_FIRING);
}
 
Example #21
Source File: IsmSideInputReader.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public boolean contains(Object o) {
  if (!(o instanceof Entry)) {
    return false;
  }
  @SuppressWarnings("unchecked")
  Entry<K, ?> entry = (Entry<K, ?>) o;
  try {
    // We find the reader iterator which contains the key/window prefix.
    // For maps, this yields only one record. For multimaps, this is a valid
    // prefix reader iterator.
    List<IsmReader<WindowedValue<V1>>.IsmPrefixReaderIterator> readerIterators =
        findAndStartReaders(readers, ImmutableList.of(entry.getKey(), window));
    if (readerIterators.isEmpty()) {
      return false;
    }

    // Only one such reader iterator is expected.
    IsmReader<WindowedValue<V1>>.IsmPrefixReaderIterator readerIterator =
        Iterables.getOnlyElement(readerIterators);

    return Objects.equal(
        entry.getValue(), transform.apply(KV.of(entry.getKey(), readerIterator)));
  } catch (IOException e) {
    throw new IllegalStateException(e);
  }
}
 
Example #22
Source File: OutputTagFilter.java    From twister2 with Apache License 2.0 5 votes vote down vote up
@Override
public void compute(Iterator<RawUnionValue> input, RecordCollector<WindowedValue<OT>> output) {
  RawUnionValue temp;
  while (input.hasNext()) {
    temp = input.next();
    if (temp.getUnionTag() == tag) {
      output.collect((WindowedValue<OT>) temp.getValue());
    }
  }
}
 
Example #23
Source File: WatermarkManagerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@SafeVarargs
private final <T> CommittedBundle<T> timestampedBundle(
    PCollection<T> pc, TimestampedValue<T>... values) {
  UncommittedBundle<T> bundle = bundleFactory.createBundle(pc);
  for (TimestampedValue<T> value : values) {
    bundle.add(
        WindowedValue.timestampedValueInGlobalWindow(value.getValue(), value.getTimestamp()));
  }
  return bundle.commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
}
 
Example #24
Source File: DoFnOp.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public DoFnRunners.OutputManager create(OpEmitter<OutT> emitter) {
  return new DoFnRunners.OutputManager() {
    @Override
    public <T> void output(TupleTag<T> tupleTag, WindowedValue<T> windowedValue) {
      // With only one input we know that T is of type OutT.
      @SuppressWarnings("unchecked")
      final WindowedValue<OutT> retypedWindowedValue = (WindowedValue<OutT>) windowedValue;
      emitter.emitElement(retypedWindowedValue);
    }
  };
}
 
Example #25
Source File: SparkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <T> void translateReshuffle(
    PTransformNode transformNode, RunnerApi.Pipeline pipeline, SparkTranslationContext context) {
  String inputId = getInputId(transformNode);
  WindowedValueCoder<T> coder = getWindowedValueCoder(inputId, pipeline.getComponents());
  JavaRDD<WindowedValue<T>> inRDD = ((BoundedDataset<T>) context.popDataset(inputId)).getRDD();
  JavaRDD<WindowedValue<T>> reshuffled = GroupCombineFunctions.reshuffle(inRDD, coder);
  context.pushDataset(getOutputId(transformNode), new BoundedDataset<>(reshuffled));
}
 
Example #26
Source File: SplittableDoFnOperator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
protected void fireTimer(TimerInternals.TimerData timer) {
  timerInternals.onFiredOrDeletedTimer(timer);
  if (timer.getDomain().equals(TimeDomain.EVENT_TIME)) {
    // ignore this, it can only be a state cleanup timers from StatefulDoFnRunner and ProcessFn
    // does its own state cleanup and should never set event-time timers.
    return;
  }
  doFnRunner.processElement(
      WindowedValue.valueInGlobalWindow(
          KeyedWorkItems.timersWorkItem(
              (byte[]) keyedStateInternals.getKey(), Collections.singletonList(timer))));
}
 
Example #27
Source File: SourceRDD.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public scala.collection.Iterator<WindowedValue<T>> compute(
    final Partition split, final TaskContext context) {
  final MetricsContainer metricsContainer = metricsAccum.value().getContainer(stepName);

  @SuppressWarnings("unchecked")
  final BoundedSource.BoundedReader<T> reader = createReader((SourcePartition<T>) split);

  final Iterator<WindowedValue<T>> readerIterator =
      new ReaderToIteratorAdapter<>(metricsContainer, reader);

  return new InterruptibleIterator<>(context, JavaConversions.asScalaIterator(readerIterator));
}
 
Example #28
Source File: ReduceFnTester.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void outputWindowedValue(
    KV<String, OutputT> output,
    Instant timestamp,
    Collection<? extends BoundedWindow> windows,
    PaneInfo pane) {
  // Copy the output value (using coders) before capturing it.
  KV<String, OutputT> copy =
      SerializableUtils.ensureSerializableByCoder(
          KvCoder.of(StringUtf8Coder.of(), outputCoder), output, "outputForWindow");
  WindowedValue<KV<String, OutputT>> value = WindowedValue.of(copy, timestamp, windows, pane);
  outputs.add(value);
}
 
Example #29
Source File: WatermarkManagerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@SafeVarargs
private final <T> CommittedBundle<T> multiWindowedBundle(PCollection<T> pc, T... values) {
  UncommittedBundle<T> bundle = bundleFactory.createBundle(pc);
  Collection<BoundedWindow> windows =
      ImmutableList.of(
          GlobalWindow.INSTANCE,
          new IntervalWindow(BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(0)));
  for (T value : values) {
    bundle.add(
        WindowedValue.of(value, BoundedWindow.TIMESTAMP_MIN_VALUE, windows, PaneInfo.NO_FIRING));
  }
  return bundle.commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
}
 
Example #30
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link ExecutableStageDoFnOperator}. Sets the runtime context to {@link
 * #runtimeContext}. The context factory is mocked to return {@link #stageContext} every time. The
 * behavior of the stage context itself is unchanged.
 */
@SuppressWarnings("rawtypes")
private ExecutableStageDoFnOperator getOperator(
    TupleTag<Integer> mainOutput,
    List<TupleTag<?>> additionalOutputs,
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory) {
  return getOperator(
      mainOutput,
      additionalOutputs,
      outputManagerFactory,
      WindowingStrategy.globalDefault(),
      null,
      WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));
}