Java Code Examples for org.apache.beam.sdk.util.WindowedValue#getFullCoder()

The following examples show how to use org.apache.beam.sdk.util.WindowedValue#getFullCoder() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ShuffleSinkFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
void runTestCreatePartitioningShuffleSink(
    byte[] shuffleWriterConfig, Coder<?> keyCoder, Coder<?> valueCoder) throws Exception {
  FullWindowedValueCoder<?> coder =
      WindowedValue.getFullCoder(KvCoder.of(keyCoder, valueCoder), IntervalWindow.getCoder());
  ShuffleSink shuffleSink =
      runTestCreateShuffleSinkHelper(shuffleWriterConfig, "partition_keys", coder, coder);
  Assert.assertEquals(ShuffleSink.ShuffleKind.PARTITION_KEYS, shuffleSink.shuffleKind);
  Assert.assertTrue(shuffleSink.shardByKey);
  Assert.assertFalse(shuffleSink.groupValues);
  Assert.assertFalse(shuffleSink.sortValues);
  Assert.assertEquals(keyCoder, shuffleSink.keyCoder);
  Assert.assertEquals(valueCoder, shuffleSink.valueCoder);
  Assert.assertEquals(
      FullWindowedValueCoder.of(valueCoder, IntervalWindow.getCoder()),
      shuffleSink.windowedValueCoder);
  Assert.assertNull(shuffleSink.sortKeyCoder);
  Assert.assertNull(shuffleSink.sortValueCoder);
}
 
Example 2
Source File: TranslationUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Utility to get mapping between TupleTag and a coder.
 *
 * @param outputs - A map of tuple tags and pcollections
 * @return mapping between TupleTag and a coder
 */
public static Map<TupleTag<?>, Coder<WindowedValue<?>>> getTupleTagCoders(
    Map<TupleTag<?>, PValue> outputs) {
  Map<TupleTag<?>, Coder<WindowedValue<?>>> coderMap = new HashMap<>(outputs.size());

  for (Map.Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
    // we get the first PValue as all of them are fro the same type.
    PCollection<?> pCollection = (PCollection<?>) output.getValue();
    Coder<?> coder = pCollection.getCoder();
    Coder<? extends BoundedWindow> wCoder =
        pCollection.getWindowingStrategy().getWindowFn().windowCoder();
    @SuppressWarnings("unchecked")
    Coder<WindowedValue<?>> windowedValueCoder =
        (Coder<WindowedValue<?>>) (Coder<?>) WindowedValue.getFullCoder(coder, wCoder);
    coderMap.put(output.getKey(), windowedValueCoder);
  }
  return coderMap;
}
 
Example 3
Source File: ShuffleSinkFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
void runTestCreateGroupingSortingShuffleSink(
    byte[] shuffleWriterConfig, Coder<?> keyCoder, Coder<?> sortKeyCoder, Coder<?> sortValueCoder)
    throws Exception {
  FullWindowedValueCoder<?> coder =
      WindowedValue.getFullCoder(
          KvCoder.of(keyCoder, KvCoder.of(sortKeyCoder, sortValueCoder)),
          IntervalWindow.getCoder());
  ShuffleSink shuffleSink =
      runTestCreateShuffleSinkHelper(
          shuffleWriterConfig, "group_keys_and_sort_values", coder, coder);
  Assert.assertEquals(
      ShuffleSink.ShuffleKind.GROUP_KEYS_AND_SORT_VALUES, shuffleSink.shuffleKind);
  Assert.assertTrue(shuffleSink.shardByKey);
  Assert.assertTrue(shuffleSink.groupValues);
  Assert.assertTrue(shuffleSink.sortValues);
  Assert.assertEquals(keyCoder, shuffleSink.keyCoder);
  Assert.assertEquals(KvCoder.of(sortKeyCoder, sortValueCoder), shuffleSink.valueCoder);
  Assert.assertEquals(sortKeyCoder, shuffleSink.sortKeyCoder);
  Assert.assertEquals(sortValueCoder, shuffleSink.sortValueCoder);
  Assert.assertNull(shuffleSink.windowedValueCoder);
}
 
Example 4
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
private void translateImpulse(
    String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
  RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id);

  TypeInformation<WindowedValue<byte[]>> typeInfo =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE));

  long shutdownAfterIdleSourcesMs = context.getPipelineOptions().getShutdownSourcesAfterIdleMs();
  SingleOutputStreamOperator<WindowedValue<byte[]>> source =
      context
          .getExecutionEnvironment()
          .addSource(new ImpulseSourceFunction(shutdownAfterIdleSourcesMs), "Impulse")
          .returns(typeInfo);

  context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), source);
}
 
Example 5
Source File: CoderTranslators.java    From beam with Apache License 2.0 5 votes vote down vote up
static CoderTranslator<FullWindowedValueCoder<?>> fullWindowedValue() {
  return new SimpleStructuredCoderTranslator<FullWindowedValueCoder<?>>() {
    @Override
    public List<? extends Coder<?>> getComponents(FullWindowedValueCoder<?> from) {
      return ImmutableList.of(from.getValueCoder(), from.getWindowCoder());
    }

    @Override
    public FullWindowedValueCoder<?> fromComponents(List<Coder<?>> components) {
      return WindowedValue.getFullCoder(
          components.get(0), (Coder<BoundedWindow>) components.get(1));
    }
  };
}
 
Example 6
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private static DoFnOperator getOperatorForCleanupInspection() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setParallelism(4);

  TupleTag<String> outputTag = new TupleTag<>("main-output");
  WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
  IdentityDoFn<String> doFn =
      new IdentityDoFn<String>() {
        @FinishBundle
        public void finishBundle(FinishBundleContext context) {
          context.output(
              "finishBundle", BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE);
        }
      };

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  return new DoFnOperator<>(
      doFn,
      "stepName",
      windowedValueCoder,
      Collections.emptyMap(),
      outputTag,
      Collections.emptyList(),
      outputManagerFactory,
      WindowingStrategy.globalDefault(),
      new HashMap<>(), /* side-input mapping */
      Collections.emptyList(), /* side inputs */
      options,
      null,
      null,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example 7
Source File: FlinkStreamingTranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) {
  Coder<T> valueCoder = collection.getCoder();
  WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
      WindowedValue.getFullCoder(
          valueCoder, collection.getWindowingStrategy().getWindowFn().windowCoder());

  return new CoderTypeInformation<>(windowedValueCoder);
}
 
Example 8
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private void translateStreamingImpulse(
    String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
  RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id);

  TypeInformation<WindowedValue<byte[]>> typeInfo =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE));

  ObjectMapper objectMapper = new ObjectMapper();
  final int intervalMillis;
  final int messageCount;
  try {
    JsonNode config = objectMapper.readTree(pTransform.getSpec().getPayload().toByteArray());
    intervalMillis = config.path("interval_ms").asInt(100);
    messageCount = config.path("message_count").asInt(0);
  } catch (IOException e) {
    throw new RuntimeException("Failed to parse configuration for streaming impulse", e);
  }

  SingleOutputStreamOperator<WindowedValue<byte[]>> source =
      context
          .getExecutionEnvironment()
          .addSource(
              new StreamingImpulseSource(intervalMillis, messageCount),
              StreamingImpulseSource.class.getSimpleName())
          .returns(typeInfo);

  context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), source);
}
 
Example 9
Source File: FlinkBatchTranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
<T> TypeInformation<WindowedValue<T>> getTypeInfo(
    Coder<T> coder, WindowingStrategy<?, ?> windowingStrategy) {
  WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
      WindowedValue.getFullCoder(coder, windowingStrategy.getWindowFn().windowCoder());

  return new CoderTypeInformation<>(windowedValueCoder);
}
 
Example 10
Source File: IsmReaderFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testFactoryReturnsCachedInstance() throws Exception {
  Coder<?> coder =
      WindowedValue.getFullCoder(
          IsmRecordCoder.of(
              1, 0, ImmutableList.<Coder<?>>of(StringUtf8Coder.of()), VarLongCoder.of()),
          GlobalWindow.Coder.INSTANCE);

  String tmpFile = tmpFolder.newFile().getPath();
  String anotherTmpFile = tmpFolder.newFile().getPath();

  @SuppressWarnings("rawtypes")
  IsmReader<?> ismReader =
      (IsmReader)
          new IsmReaderFactory()
              .create(
                  createSpecForFilename(tmpFile),
                  coder,
                  options,
                  executionContext,
                  operationContext);
  assertSame(
      ismReader,
      new IsmReaderFactory()
          .create(
              createSpecForFilename(tmpFile),
              coder,
              options,
              executionContext,
              operationContext));
  assertNotSame(
      ismReader,
      new IsmReaderFactory()
          .create(
              createSpecForFilename(anotherTmpFile),
              coder,
              options,
              executionContext,
              operationContext));
}
 
Example 11
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void addOutput(String name, PCollection<?> value) {
  translator.producers.put(value, translator.currentTransform);
  // Wrap the PCollection element Coder inside a WindowedValueCoder.
  Coder<?> coder =
      WindowedValue.getFullCoder(
          value.getCoder(), value.getWindowingStrategy().getWindowFn().windowCoder());
  addOutput(name, value, coder);
}
 
Example 12
Source File: FlinkStreamingTransformTranslators.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform,
    FlinkStreamingTranslationContext context) {

  PCollection<KV<K, InputT>> input = context.getInput(transform);

  @SuppressWarnings("unchecked")
  WindowingStrategy<?, BoundedWindow> windowingStrategy =
      (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();

  KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();

  SingletonKeyedWorkItemCoder<K, InputT> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputKvCoder.getKeyCoder(),
          inputKvCoder.getValueCoder(),
          input.getWindowingStrategy().getWindowFn().windowCoder());

  DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);

  WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>>
      windowedWorkItemCoder =
          WindowedValue.getFullCoder(
              workItemCoder, input.getWindowingStrategy().getWindowFn().windowCoder());

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
      inputDataStream
          .flatMap(new ToKeyedWorkItem<>(context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  WorkItemKeySelector keySelector = new WorkItemKeySelector<>(inputKvCoder.getKeyCoder());

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer>
      keyedWorkItemStream =
          workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()));

  SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, BoundedWindow> reduceFn =
      SystemReduceFn.buffering(inputKvCoder.getValueCoder());

  Coder<WindowedValue<KV<K, Iterable<InputT>>>> outputCoder =
      context.getWindowedInputCoder(context.getOutput(transform));
  TypeInformation<WindowedValue<KV<K, Iterable<InputT>>>> outputTypeInfo =
      context.getTypeInfo(context.getOutput(transform));

  TupleTag<KV<K, Iterable<InputT>>> mainTag = new TupleTag<>("main output");

  String fullName = getCurrentTransformName(context);
  WindowDoFnOperator<K, InputT, Iterable<InputT>> doFnOperator =
      new WindowDoFnOperator<>(
          reduceFn,
          fullName,
          (Coder) windowedWorkItemCoder,
          mainTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          context.getPipelineOptions(),
          inputKvCoder.getKeyCoder(),
          keySelector);

  // our operator expects WindowedValue<KeyedWorkItem> while our input stream
  // is WindowedValue<SingletonKeyedWorkItem>, which is fine but Java doesn't like it ...
  @SuppressWarnings("unchecked")
  SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<InputT>>>> outDataStream =
      keyedWorkItemStream
          .transform(fullName, outputTypeInfo, (OneInputStreamOperator) doFnOperator)
          .uid(fullName);

  context.setOutputDataStream(context.getOutput(transform), outDataStream);
}
 
Example 13
Source File: PartitioningShuffleReaderTest.java    From beam with Apache License 2.0 4 votes vote down vote up
private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> expected)
    throws Exception {
  Coder<WindowedValue<KV<Integer, String>>> elemCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()),
          IntervalWindow.getCoder());

  BatchModeExecutionContext executionContext =
      BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "STAGE");
  // Write to shuffle with PARTITION_KEYS ShuffleSink.
  ShuffleSink<KV<Integer, String>> shuffleSink =
      new ShuffleSink<>(
          PipelineOptionsFactory.create(),
          null,
          ShuffleSink.ShuffleKind.PARTITION_KEYS,
          elemCoder,
          executionContext,
          TestOperationContext.create());

  TestShuffleWriter shuffleWriter = new TestShuffleWriter();

  List<Long> actualSizes = new ArrayList<>();
  try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
      shuffleSink.writer(shuffleWriter, "dataset")) {
    for (WindowedValue<KV<Integer, String>> value : expected) {
      actualSizes.add(shuffleSinkWriter.add(value));
    }
  }
  List<ShuffleEntry> records = shuffleWriter.getRecords();
  Assert.assertEquals(expected.size(), records.size());
  Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);

  // Read from shuffle with PartitioningShuffleReader.
  PartitioningShuffleReader<Integer, String> partitioningShuffleReader =
      new PartitioningShuffleReader<>(
          PipelineOptionsFactory.create(),
          null,
          null,
          null,
          elemCoder,
          executionContext,
          TestOperationContext.create());
  ExecutorTestUtils.TestReaderObserver observer =
      new ExecutorTestUtils.TestReaderObserver(partitioningShuffleReader);

  TestShuffleReader shuffleReader = new TestShuffleReader();
  List<Integer> expectedSizes = new ArrayList<>();
  for (ShuffleEntry record : records) {
    expectedSizes.add(record.length());
    shuffleReader.addEntry(record);
  }

  List<WindowedValue<KV<Integer, String>>> actual = new ArrayList<>();
  Assert.assertFalse(shuffleReader.isClosed());
  try (PartitioningShuffleReaderIterator<Integer, String> iter =
      partitioningShuffleReader.iterator(shuffleReader)) {
    for (boolean more = iter.start(); more; more = iter.advance()) {
      actual.add(iter.getCurrent());
    }
    Assert.assertFalse(iter.advance());
    try {
      iter.getCurrent();
      Assert.fail("should have failed");
    } catch (NoSuchElementException exn) {
      // As expected.
    }
  }
  Assert.assertTrue(shuffleReader.isClosed());

  Assert.assertEquals(expected, actual);
  Assert.assertEquals(expectedSizes, observer.getActualSizes());
}
 
Example 14
Source File: GroupingShuffleReaderTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testConsumedParallelism() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  BatchModeExecutionContext context = BatchModeExecutionContext.forTesting(options, "testStage");
  final int kFirstShard = 0;

  TestShuffleReader shuffleReader = new TestShuffleReader();
  final int kNumRecords = 5;
  for (int i = 0; i < kNumRecords; ++i) {
    byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
    ShuffleEntry entry =
        new ShuffleEntry(fabricatePosition(kFirstShard, i), key, EMPTY_BYTE_ARRAY, key);
    shuffleReader.addEntry(entry);
  }

  TestOperationContext operationContext = TestOperationContext.create();
  GroupingShuffleReader<Integer, Integer> groupingShuffleReader =
      new GroupingShuffleReader<>(
          options,
          null,
          null,
          null,
          WindowedValue.getFullCoder(
              KvCoder.of(
                  BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
              IntervalWindow.getCoder()),
          context,
          operationContext,
          ShuffleReadCounterFactory.INSTANCE,
          false /* do not sort values */);

  assertFalse(shuffleReader.isClosed());
  try (GroupingShuffleReaderIterator<Integer, Integer> iter =
      groupingShuffleReader.iterator(shuffleReader)) {

    // Iterator hasn't started; consumed parallelism is 0.
    assertEquals(0.0, consumedParallelismFromProgress(iter.getProgress()), 0);

    // The only way to set a stop *position* in tests is via a split. To do that,
    // we must call hasNext() first.

    // Should return entry at key 0.
    assertTrue(iter.start());

    // Iterator just started; consumed parallelism is 0.
    assertEquals(
        0.0,
        readerProgressToCloudProgress(iter.getProgress()).getConsumedParallelism().getValue(),
        0);
    assertNotNull(
        iter.requestDynamicSplit(
            splitRequestAtPosition(
                makeShufflePosition(
                    fabricatePosition(kFirstShard, 2).immediateSuccessor().getPosition()))));
    // Split does not affect consumed parallelism; consumed parallelism is still 0.
    assertEquals(0.0, consumedParallelismFromProgress(iter.getProgress()), 0);

    // Should return entry at key 1.
    assertTrue(iter.advance());
    assertEquals(1.0, consumedParallelismFromProgress(iter.getProgress()), 0);

    // Should return entry at key 2 (last key, because the stop position
    // is its immediate successor.) Consumed parallelism increments by one to 2.
    assertTrue(iter.advance());
    assertEquals(2.0, consumedParallelismFromProgress(iter.getProgress()), 0);

    // Iterator advanced by one and consumes one more split point (total consumed: 3).
    assertFalse(iter.advance());
    assertEquals(3.0, consumedParallelismFromProgress(iter.getProgress()), 0);
  }
  assertTrue(shuffleReader.isClosed());
}
 
Example 15
Source File: FlinkStreamingTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public <T> Coder<WindowedValue<T>> getWindowedInputCoder(PCollection<T> collection) {
  Coder<T> valueCoder = collection.getCoder();

  return WindowedValue.getFullCoder(
      valueCoder, collection.getWindowingStrategy().getWindowFn().windowCoder());
}
 
Example 16
Source File: IsmSideInputReaderTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testListInWindow() throws Exception {
  Coder<WindowedValue<Long>> valueCoder =
      WindowedValue.getFullCoder(VarLongCoder.of(), INTERVAL_WINDOW_CODER);
  IsmRecordCoder<WindowedValue<Long>> ismCoder =
      IsmRecordCoder.of(
          1, 0, ImmutableList.of(INTERVAL_WINDOW_CODER, BigEndianLongCoder.of()), valueCoder);

  final List<KV<Long, WindowedValue<Long>>> firstElements =
      Arrays.asList(
          KV.of(0L, valueInIntervalWindow(12, 10)),
          KV.of(1L, valueInIntervalWindow(22, 10)),
          KV.of(2L, valueInIntervalWindow(32, 10)));
  final List<KV<Long, WindowedValue<Long>>> secondElements =
      Arrays.asList(
          KV.of(0L, valueInIntervalWindow(42, 20)),
          KV.of(1L, valueInIntervalWindow(52, 20)),
          KV.of(2L, valueInIntervalWindow(62, 20)));
  final List<KV<Long, WindowedValue<Long>>> thirdElements =
      Arrays.asList(
          KV.of(0L, valueInIntervalWindow(42L, 30)),
          KV.of(1L, valueInIntervalWindow(52L, 30)),
          KV.of(2L, valueInIntervalWindow(62L, 30)));

  final PCollectionView<List<Long>> view =
      Pipeline.create()
          .apply(Create.empty(VarLongCoder.of()))
          .apply(Window.into(FixedWindows.of(Duration.millis(10))))
          .apply(View.asList());

  Source sourceA = initInputFile(fromKvsForList(concat(firstElements, secondElements)), ismCoder);
  Source sourceB = initInputFile(fromKvsForList(thirdElements), ismCoder);

  final IsmSideInputReader reader =
      sideInputReader(view.getTagInternal().getId(), sourceA, sourceB);

  List<Callable<Map<BoundedWindow, List<Long>>>> tasks = new ArrayList<>();
  for (int i = 0; i < NUM_THREADS; ++i) {
    tasks.add(
        () -> {
          // Store a strong reference to the returned value so that the logical reference
          // cache is not cleared for this test.
          List<Long> firstValues = reader.get(view, intervalWindow(10));
          List<Long> secondValues = reader.get(view, intervalWindow(20));
          List<Long> thirdValues = reader.get(view, intervalWindow(30));

          verifyList(toValueList(firstElements), firstValues);
          verifyList(toValueList(secondElements), secondValues);
          verifyList(toValueList(thirdElements), thirdValues);

          // Assert that the same value reference was returned showing that it was cached.
          assertSame(firstValues, reader.get(view, intervalWindow(10)));
          assertSame(secondValues, reader.get(view, intervalWindow(20)));
          assertSame(thirdValues, reader.get(view, intervalWindow(30)));

          // Also verify when requesting a window that is not part of the side input
          assertEquals(Collections.EMPTY_LIST, reader.get(view, intervalWindow(40)));

          return ImmutableMap.<BoundedWindow, List<Long>>of(
              intervalWindow(10), firstValues,
              intervalWindow(20), secondValues,
              intervalWindow(30), thirdValues);
        });
  }

  List<Future<Map<BoundedWindow, List<Long>>>> results =
      pipelineOptions.getExecutorService().invokeAll(tasks);
  Map<BoundedWindow, List<Long>> value = results.get(0).get();
  // Assert that all threads got back the same reference
  for (Future<Map<BoundedWindow, List<Long>>> result : results) {
    assertEquals(value, result.get());
    for (Map.Entry<BoundedWindow, List<Long>> entry : result.get().entrySet()) {
      assertSame(value.get(entry.getKey()), entry.getValue());
    }
  }
}
 
Example 17
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testStateRestore() throws Exception {
  DoFn<KV<String, Long>, KV<String, Long>> filterElementsEqualToCountFn =
      new DoFn<KV<String, Long>, KV<String, Long>>() {

        @StateId("counter")
        private final StateSpec<ValueState<Long>> counterSpec =
            StateSpecs.value(VarLongCoder.of());

        @ProcessElement
        public void processElement(
            ProcessContext context, @StateId("counter") ValueState<Long> count) {
          long currentCount = Optional.ofNullable(count.read()).orElse(0L);
          currentCount = currentCount + 1;
          count.write(currentCount);

          KV<String, Long> currentElement = context.element();
          if (currentCount == currentElement.getValue()) {
            context.output(currentElement);
          }
        }
      };

  WindowingStrategy<Object, GlobalWindow> windowingStrategy = WindowingStrategy.globalDefault();

  TupleTag<KV<String, Long>> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  KvToByteBufferKeySelector keySelector = new KvToByteBufferKeySelector<>(keyCoder);
  KvCoder<String, Long> coder = KvCoder.of(keyCoder, VarLongCoder.of());

  FullWindowedValueCoder<KV<String, Long>> kvCoder =
      WindowedValue.getFullCoder(coder, windowingStrategy.getWindowFn().windowCoder());

  CoderTypeInformation<ByteBuffer> keyCoderInfo =
      new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of());

  OneInputStreamOperatorTestHarness<
          WindowedValue<KV<String, Long>>, WindowedValue<KV<String, Long>>>
      testHarness =
          createTestHarness(
              windowingStrategy,
              filterElementsEqualToCountFn,
              kvCoder,
              kvCoder,
              keyCoder,
              outputTag,
              keyCoderInfo,
              keySelector);
  testHarness.open();

  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));

  OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
  testHarness.close();

  testHarness =
      createTestHarness(
          windowingStrategy,
          filterElementsEqualToCountFn,
          kvCoder,
          kvCoder,
          keyCoder,
          outputTag,
          keyCoderInfo,
          keySelector);
  testHarness.initializeState(snapshot);
  testHarness.open();

  // after restore: counter = 2
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 4L))));
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 5L))));
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(
          WindowedValue.valueInGlobalWindow(KV.of("a", 4L)),
          WindowedValue.valueInGlobalWindow(KV.of("a", 5L))));

  testHarness.close();
}
 
Example 18
Source File: GroupingShuffleReaderTest.java    From beam with Apache License 2.0 4 votes vote down vote up
private List<ShuffleEntry> writeShuffleEntries(
    List<KV<Integer, List<KV<Integer, Integer>>>> input, boolean sortValues) throws Exception {
  Coder<WindowedValue<KV<Integer, KV<Integer, Integer>>>> sinkElemCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(
              BigEndianIntegerCoder.of(),
              KvCoder.of(BigEndianIntegerCoder.of(), BigEndianIntegerCoder.of())),
          IntervalWindow.getCoder());
  // Write to shuffle with GROUP_KEYS ShuffleSink.
  BatchModeExecutionContext executionContext =
      BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "STAGE");
  ShuffleSink<KV<Integer, KV<Integer, Integer>>> shuffleSink =
      new ShuffleSink<>(
          PipelineOptionsFactory.create(),
          null,
          sortValues ? ShuffleKind.GROUP_KEYS_AND_SORT_VALUES : ShuffleKind.GROUP_KEYS,
          sinkElemCoder,
          executionContext,
          TestOperationContext.create());

  TestShuffleWriter shuffleWriter = new TestShuffleWriter();

  int kvCount = 0;
  List<Long> actualSizes = new ArrayList<>();
  try (Sink.SinkWriter<WindowedValue<KV<Integer, KV<Integer, Integer>>>> shuffleSinkWriter =
      shuffleSink.writer(shuffleWriter, "dataset")) {
    for (KV<Integer, List<KV<Integer, Integer>>> kvs : input) {
      Integer key = kvs.getKey();
      for (KV<Integer, Integer> value : kvs.getValue()) {
        ++kvCount;
        actualSizes.add(
            shuffleSinkWriter.add(
                WindowedValue.of(
                    KV.of(key, value),
                    timestamp,
                    Lists.newArrayList(window),
                    PaneInfo.NO_FIRING)));
      }
    }
  }
  List<ShuffleEntry> records = shuffleWriter.getRecords();
  assertEquals(kvCount, records.size());
  assertEquals(shuffleWriter.getSizes(), actualSizes);
  return records;
}
 
Example 19
Source File: IsmSideInputReaderTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testIterableInWindow() throws Exception {
  Coder<WindowedValue<Long>> valueCoder =
      WindowedValue.getFullCoder(VarLongCoder.of(), INTERVAL_WINDOW_CODER);
  IsmRecordCoder<WindowedValue<Long>> ismCoder =
      IsmRecordCoder.of(
          1, 0, ImmutableList.of(INTERVAL_WINDOW_CODER, BigEndianLongCoder.of()), valueCoder);

  final List<KV<Long, WindowedValue<Long>>> firstElements =
      Arrays.asList(
          KV.of(0L, valueInIntervalWindow(12, 10)),
          KV.of(1L, valueInIntervalWindow(22, 10)),
          KV.of(2L, valueInIntervalWindow(32, 10)));
  final List<KV<Long, WindowedValue<Long>>> secondElements =
      Arrays.asList(
          KV.of(0L, valueInIntervalWindow(42, 20)),
          KV.of(1L, valueInIntervalWindow(52, 20)),
          KV.of(2L, valueInIntervalWindow(62, 20)));
  final List<KV<Long, WindowedValue<Long>>> thirdElements =
      Arrays.asList(
          KV.of(0L, valueInIntervalWindow(42L, 30)),
          KV.of(1L, valueInIntervalWindow(52L, 30)),
          KV.of(2L, valueInIntervalWindow(62L, 30)));

  final PCollectionView<Iterable<Long>> view =
      Pipeline.create()
          .apply(Create.empty(VarLongCoder.of()))
          .apply(Window.into(FixedWindows.of(Duration.millis(10))))
          .apply(View.asIterable());

  Source sourceA = initInputFile(fromKvsForList(concat(firstElements, secondElements)), ismCoder);
  Source sourceB = initInputFile(fromKvsForList(thirdElements), ismCoder);

  final IsmSideInputReader reader =
      sideInputReader(view.getTagInternal().getId(), sourceA, sourceB);

  List<Callable<Map<BoundedWindow, Iterable<Long>>>> tasks = new ArrayList<>();
  for (int i = 0; i < NUM_THREADS; ++i) {
    tasks.add(
        () -> {
          // Store a strong reference to the returned value so that the logical reference
          // cache is not cleared for this test.
          Iterable<Long> firstValues = reader.get(view, intervalWindow(10));
          Iterable<Long> secondValues = reader.get(view, intervalWindow(20));
          Iterable<Long> thirdValues = reader.get(view, intervalWindow(30));

          verifyIterable(toValueList(firstElements), firstValues);
          verifyIterable(toValueList(secondElements), secondValues);
          verifyIterable(toValueList(thirdElements), thirdValues);

          // Assert that the same value reference was returned showing that it was cached.
          assertSame(firstValues, reader.get(view, intervalWindow(10)));
          assertSame(secondValues, reader.get(view, intervalWindow(20)));
          assertSame(thirdValues, reader.get(view, intervalWindow(30)));
          return ImmutableMap.<BoundedWindow, Iterable<Long>>of(
              intervalWindow(10), firstValues,
              intervalWindow(20), secondValues,
              intervalWindow(30), thirdValues);
        });
  }

  List<Future<Map<BoundedWindow, Iterable<Long>>>> results =
      pipelineOptions.getExecutorService().invokeAll(tasks);
  Map<BoundedWindow, Iterable<Long>> value = results.get(0).get();
  // Assert that all threads got back the same reference
  for (Future<Map<BoundedWindow, Iterable<Long>>> result : results) {
    assertEquals(value, result.get());
    for (Map.Entry<BoundedWindow, Iterable<Long>> entry : result.get().entrySet()) {
      assertSame(value.get(entry.getKey()), entry.getValue());
    }
  }
}
 
Example 20
Source File: ShuffleSinkFactoryTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testCreateUngroupingShuffleSink() throws Exception {
  FullWindowedValueCoder<?> coder =
      WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());
  runTestCreateUngroupingShuffleSink(new byte[] {(byte) 0xE1}, coder, coder);
}