org.apache.beam.runners.flink.FlinkPipelineOptions Java Examples

The following examples show how to use org.apache.beam.runners.flink.FlinkPipelineOptions. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReaderInvocationUtil.java    From beam with Apache License 2.0 5 votes vote down vote up
public ReaderInvocationUtil(
    String stepName, PipelineOptions options, FlinkMetricContainer container) {
  FlinkPipelineOptions flinkPipelineOptions = options.as(FlinkPipelineOptions.class);
  this.stepName = stepName;
  this.enableMetrics = !flinkPipelineOptions.getDisableMetrics();
  this.container = container;
}
 
Example #2
Source File: UnboundedSourceWrapperTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);

  TestCountingSource source = new TestCountingSource(20).withoutSplitting();

  UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> sourceWrapper =
      new UnboundedSourceWrapper<>("noReader", options, source, 2);

  StreamingRuntimeContext mock = Mockito.mock(StreamingRuntimeContext.class);
  Mockito.when(mock.getNumberOfParallelSubtasks()).thenReturn(1);
  Mockito.when(mock.getExecutionConfig()).thenReturn(new ExecutionConfig());
  Mockito.when(mock.getIndexOfThisSubtask()).thenReturn(0);
  sourceWrapper.setRuntimeContext(mock);

  sourceWrapper.open(new Configuration());

  String metricContainerFieldName = "metricContainer";
  FlinkMetricContainer monitoredContainer =
      Mockito.spy(
          (FlinkMetricContainer)
              Whitebox.getInternalState(sourceWrapper, metricContainerFieldName));
  Whitebox.setInternalState(sourceWrapper, metricContainerFieldName, monitoredContainer);

  sourceWrapper.close();
  Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
}
 
Example #3
Source File: WindowDoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private WindowDoFnOperator<Long, Long, Long> getWindowDoFnOperator() {
  WindowingStrategy<Object, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(FixedWindows.of(standardMinutes(1)));

  TupleTag<KV<Long, Long>> outputTag = new TupleTag<>("main-output");

  SystemReduceFn<Long, Long, long[], Long, BoundedWindow> reduceFn =
      SystemReduceFn.combining(
          VarLongCoder.of(),
          AppliedCombineFn.withInputCoder(
              Sum.ofLongs(),
              CoderRegistry.createDefault(),
              KvCoder.of(VarLongCoder.of(), VarLongCoder.of())));

  Coder<IntervalWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();
  SingletonKeyedWorkItemCoder<Long, Long> workItemCoder =
      SingletonKeyedWorkItemCoder.of(VarLongCoder.of(), VarLongCoder.of(), windowCoder);
  FullWindowedValueCoder<SingletonKeyedWorkItem<Long, Long>> inputCoder =
      WindowedValue.getFullCoder(workItemCoder, windowCoder);
  FullWindowedValueCoder<KV<Long, Long>> outputCoder =
      WindowedValue.getFullCoder(KvCoder.of(VarLongCoder.of(), VarLongCoder.of()), windowCoder);

  return new WindowDoFnOperator<Long, Long, Long>(
      reduceFn,
      "stepName",
      (Coder) inputCoder,
      outputTag,
      emptyList(),
      new MultiOutputOutputManagerFactory<>(outputTag, outputCoder),
      windowingStrategy,
      emptyMap(),
      emptyList(),
      PipelineOptionsFactory.as(FlinkPipelineOptions.class),
      VarLongCoder.of(),
      new WorkItemKeySelector(VarLongCoder.of()));
}
 
Example #4
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private static DoFnOperator getOperatorForCleanupInspection() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setParallelism(4);

  TupleTag<String> outputTag = new TupleTag<>("main-output");
  WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
  IdentityDoFn<String> doFn =
      new IdentityDoFn<String>() {
        @FinishBundle
        public void finishBundle(FinishBundleContext context) {
          context.output(
              "finishBundle", BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE);
        }
      };

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  return new DoFnOperator<>(
      doFn,
      "stepName",
      windowedValueCoder,
      Collections.emptyMap(),
      outputTag,
      Collections.emptyList(),
      outputManagerFactory,
      WindowingStrategy.globalDefault(),
      new HashMap<>(), /* side-input mapping */
      Collections.emptyList(), /* side inputs */
      options,
      null,
      null,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example #5
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private <K, InT, OutT>
    OneInputStreamOperatorTestHarness<WindowedValue<InT>, WindowedValue<OutT>> createTestHarness(
        WindowingStrategy<Object, ?> windowingStrategy,
        DoFn<InT, OutT> fn,
        FullWindowedValueCoder<InT> inputCoder,
        FullWindowedValueCoder<OutT> outputCoder,
        Coder<?> keyCoder,
        TupleTag<OutT> outputTag,
        TypeInformation<K> keyCoderInfo,
        KeySelector<WindowedValue<InT>, K> keySelector)
        throws Exception {
  DoFnOperator<InT, OutT> doFnOperator =
      new DoFnOperator<>(
          fn,
          "stepName",
          inputCoder,
          Collections.emptyMap(),
          outputTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          keyCoder /* key coder */,
          keySelector,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  return new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keyCoderInfo);
}
 
Example #6
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void nonKeyedParDoPushbackDataCheckpointing() throws Exception {
  pushbackDataCheckpointing(
      () -> {
        Coder<WindowedValue<String>> coder =
            WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());

        TupleTag<String> outputTag = new TupleTag<>("main-output");

        ImmutableMap<Integer, PCollectionView<?>> sideInputMapping =
            ImmutableMap.<Integer, PCollectionView<?>>builder()
                .put(1, view1)
                .put(2, view2)
                .build();

        DoFnOperator<String, String> doFnOperator =
            new DoFnOperator<>(
                new IdentityDoFn<>(),
                "stepName",
                coder,
                Collections.emptyMap(),
                outputTag,
                Collections.emptyList(),
                new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
                WindowingStrategy.of(FixedWindows.of(Duration.millis(100))),
                sideInputMapping, /* side-input mapping */
                ImmutableList.of(view1, view2), /* side inputs */
                PipelineOptionsFactory.as(FlinkPipelineOptions.class),
                null,
                null,
                DoFnSchemaInformation.create(),
                Collections.emptyMap());

        return new TwoInputStreamOperatorTestHarness<>(doFnOperator);
      });
}
 
Example #7
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void nonKeyedParDoSideInputCheckpointing() throws Exception {
  sideInputCheckpointing(
      () -> {
        Coder<WindowedValue<String>> coder =
            WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());
        TupleTag<String> outputTag = new TupleTag<>("main-output");

        ImmutableMap<Integer, PCollectionView<?>> sideInputMapping =
            ImmutableMap.<Integer, PCollectionView<?>>builder()
                .put(1, view1)
                .put(2, view2)
                .build();

        DoFnOperator<String, String> doFnOperator =
            new DoFnOperator<>(
                new IdentityDoFn<>(),
                "stepName",
                coder,
                Collections.emptyMap(),
                outputTag,
                Collections.emptyList(),
                new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
                WindowingStrategy.globalDefault(),
                sideInputMapping, /* side-input mapping */
                ImmutableList.of(view1, view2), /* side inputs */
                PipelineOptionsFactory.as(FlinkPipelineOptions.class),
                null,
                null,
                DoFnSchemaInformation.create(),
                Collections.emptyMap());

        return new TwoInputStreamOperatorTestHarness<>(doFnOperator);
      });
}
 
Example #8
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testSingleOutput() throws Exception {

  Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  DoFnOperator<String, String> doFnOperator =
      new DoFnOperator<>(
          new IdentityDoFn<>(),
          "stepName",
          coder,
          Collections.emptyMap(),
          outputTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
          WindowingStrategy.globalDefault(),
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          null,
          null,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("Hello")));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(WindowedValue.valueInGlobalWindow("Hello")));

  testHarness.close();
}
 
Example #9
Source File: NonMergingGroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testEnabledReIterationDoesNotThrowAnException() {
  final Pipeline p = FlinkTestPipeline.createForBatch();
  p.getOptions().as(FlinkPipelineOptions.class).setReIterableGroupByKeyResult(true);
  p.apply(Create.of(Arrays.asList(KV.of("a", 1), KV.of("b", 2), KV.of("c", 3))))
      .apply(GroupByKey.create())
      .apply(ParDo.of(new ReiterateDoFn<>()));
  final PipelineResult.State state = p.run().waitUntilFinish();
  Assert.assertEquals(PipelineResult.State.DONE, state);
}
 
Example #10
Source File: FlinkExecutableStageFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void open(Configuration parameters) {
  FlinkPipelineOptions options = pipelineOptions.get().as(FlinkPipelineOptions.class);
  // Register standard file systems.
  FileSystems.setDefaultPipelineOptions(options);
  executableStage = ExecutableStage.fromPayload(stagePayload);
  runtimeContext = getRuntimeContext();
  metricContainer = new FlinkMetricContainer(runtimeContext);
  // TODO: Wire this into the distributed cache and make it pluggable.
  stageContext = contextFactory.get(jobInfo);
  stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
  // NOTE: It's safe to reuse the state handler between partitions because each partition uses the
  // same backing runtime context and broadcast variables. We use checkState below to catch errors
  // in backward-incompatible Flink changes.
  stateRequestHandler =
      getStateRequestHandler(
          executableStage, stageBundleFactory.getProcessBundleDescriptor(), runtimeContext);
  progressHandler =
      new BundleProgressHandler() {
        @Override
        public void onProgress(ProcessBundleProgressResponse progress) {
          metricContainer.updateMetrics(stepName, progress.getMonitoringInfosList());
        }

        @Override
        public void onCompleted(ProcessBundleResponse response) {
          metricContainer.updateMetrics(stepName, response.getMonitoringInfosList());
        }
      };
}
 
Example #11
Source File: UnboundedSourceWrapper.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public UnboundedSourceWrapper(
    String stepName,
    PipelineOptions pipelineOptions,
    UnboundedSource<OutputT, CheckpointMarkT> source,
    int parallelism)
    throws Exception {
  this.stepName = stepName;
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
  this.isConvertedBoundedSource =
      source instanceof UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter;

  if (source.requiresDeduping()) {
    LOG.warn("Source {} requires deduping but Flink runner doesn't support this yet.", source);
  }

  Coder<CheckpointMarkT> checkpointMarkCoder = source.getCheckpointMarkCoder();
  if (checkpointMarkCoder == null) {
    LOG.info("No CheckpointMarkCoder specified for this source. Won't create snapshots.");
    checkpointCoder = null;
  } else {

    Coder<? extends UnboundedSource<OutputT, CheckpointMarkT>> sourceCoder =
        (Coder) SerializableCoder.of(new TypeDescriptor<UnboundedSource>() {});

    checkpointCoder = KvCoder.of(sourceCoder, checkpointMarkCoder);
  }

  // get the splits early. we assume that the generated splits are stable,
  // this is necessary so that the mapping of state to source is correct
  // when restoring
  splitSources = source.split(parallelism, pipelineOptions);

  FlinkPipelineOptions options = pipelineOptions.as(FlinkPipelineOptions.class);
  idleTimeoutMs = options.getShutdownSourcesAfterIdleMs();
}
 
Example #12
Source File: PipelineOptionsTableGenerator.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the extracted list of options via reflections on FlinkPipelineOptions. Options are
 * returned sorted in alphabetical order since Java does not guarantee any consistent order on the
 * class methods.
 */
private static List<Option> extractOptions(boolean isPython) {
  List<Option> options = new ArrayList<>();
  for (Method method : FlinkPipelineOptions.class.getDeclaredMethods()) {
    String name;
    String description;
    String defaultValue = null;
    name = method.getName();
    if (name.matches("^(get|is).*")) {
      name = name.replaceFirst("^(get|is)", "");

      if (isPython) {
        name = CaseFormat.UPPER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, name);
      } else {
        name = Character.toLowerCase(name.charAt(0)) + name.substring(1);
      }

      Description descriptionAnnotation = method.getAnnotation(Description.class);
      if (descriptionAnnotation == null) {
        throw new RuntimeException(
            "All pipeline options should have a description. Please add one for " + name);
      }
      description = descriptionAnnotation.value();

      Optional<String> defaultValueFromAnnotation = getDefaultValueFromAnnotation(method);
      if (defaultValueFromAnnotation.isPresent()) {
        defaultValue = defaultValueFromAnnotation.get();
      }

      options.add(new Option(name, description, defaultValue));
    }
  }
  options.sort(Comparator.comparing(option -> option.name));
  return options;
}
 
Example #13
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testCheckpointBufferingWithMultipleBundles() throws Exception {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setMaxBundleSize(10L);
  options.setCheckpointingInterval(1L);

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder coder = StringUtf8Coder.of();
  WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(coder);

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory<>(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  @SuppressWarnings("unchecked")
  Supplier<DoFnOperator<String, String>> doFnOperatorSupplier =
      () ->
          new DoFnOperator<>(
              new IdentityDoFn(),
              "stepName",
              windowedValueCoder,
              Collections.emptyMap(),
              outputTag,
              Collections.emptyList(),
              outputManagerFactory,
              WindowingStrategy.globalDefault(),
              new HashMap<>(), /* side-input mapping */
              Collections.emptyList(), /* side inputs */
              options,
              null,
              null,
              DoFnSchemaInformation.create(),
              Collections.emptyMap());

  DoFnOperator<String, String> doFnOperator = doFnOperatorSupplier.get();
  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  // start a bundle
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element")));

  // This callback will be executed in the snapshotState function in the course of
  // finishing the currently active bundle. Everything emitted in the callback should
  // be buffered and not sent downstream.
  doFnOperator.setBundleFinishedCallback(
      () -> {
        try {
          // Clear this early for the test here because we want to finish the bundle from within
          // the callback which would otherwise cause an infinitive recursion
          doFnOperator.setBundleFinishedCallback(null);
          testHarness.processElement(
              new StreamRecord<>(WindowedValue.valueInGlobalWindow("trigger another bundle")));
          doFnOperator.invokeFinishBundle();
          testHarness.processElement(
              new StreamRecord<>(
                  WindowedValue.valueInGlobalWindow(
                      "check that the previous element is not flushed")));
        } catch (Exception e) {
          throw new RuntimeException(e);
        }
      });

  OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(WindowedValue.valueInGlobalWindow("regular element")));
  testHarness.close();

  // Restore
  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness2 =
      new OneInputStreamOperatorTestHarness<>(doFnOperatorSupplier.get());

  testHarness2.initializeState(snapshot);
  testHarness2.open();

  testHarness2.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow("after restore")));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness2.getOutput()),
      contains(
          WindowedValue.valueInGlobalWindow("trigger another bundle"),
          WindowedValue.valueInGlobalWindow("check that the previous element is not flushed"),
          WindowedValue.valueInGlobalWindow("after restore")));
}
 
Example #14
Source File: KettleBeamPipelineExecutor.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
public Pipeline getPipeline( TransMeta transMeta, BeamJobConfig config ) throws KettleException {

    try {

      if ( StringUtils.isEmpty( config.getRunnerTypeName() ) ) {
        throw new KettleException( "You need to specify a runner type, one of : " + RunnerType.values().toString() );
      }
      PipelineOptions pipelineOptions = null;
      VariableSpace space = transMeta;

      RunnerType runnerType = RunnerType.getRunnerTypeByName( transMeta.environmentSubstitute( config.getRunnerTypeName() ) );
      switch ( runnerType ) {
        case Direct:
          pipelineOptions = PipelineOptionsFactory.create();
          break;
        case DataFlow:
          DataflowPipelineOptions dfOptions = PipelineOptionsFactory.as( DataflowPipelineOptions.class );
          configureDataFlowOptions( config, dfOptions, space );
          pipelineOptions = dfOptions;
          break;
        case Spark:
          SparkPipelineOptions sparkOptions;
          if (sparkContext!=null) {
            SparkContextOptions sparkContextOptions = PipelineOptionsFactory.as( SparkContextOptions.class );
            sparkContextOptions.setProvidedSparkContext( sparkContext );
            sparkOptions = sparkContextOptions;
          } else {
            sparkOptions = PipelineOptionsFactory.as( SparkPipelineOptions.class );
          }
          configureSparkOptions( config, sparkOptions, space, transMeta.getName() );
          pipelineOptions = sparkOptions;
          break;
        case Flink:
          FlinkPipelineOptions flinkOptions = PipelineOptionsFactory.as( FlinkPipelineOptions.class );
          configureFlinkOptions( config, flinkOptions, space );
          pipelineOptions = flinkOptions;
          break;
        default:
          throw new KettleException( "Sorry, this isn't implemented yet" );
      }

      configureStandardOptions( config, transMeta.getName(), pipelineOptions, space );

      setVariablesInTransformation( config, transMeta );

      TransMetaPipelineConverter converter;
      if (stepPluginClasses!=null && xpPluginClasses!=null) {
        converter = new TransMetaPipelineConverter( transMeta, metaStore, stepPluginClasses, xpPluginClasses, jobConfig );
      } else {
        converter = new TransMetaPipelineConverter( transMeta, metaStore, config.getPluginsToStage(), jobConfig );
      }
      Pipeline pipeline = converter.createPipeline( pipelineOptions );

      // Also set the pipeline options...
      //
      FileSystems.setDefaultPipelineOptions(pipelineOptions);

      return pipeline;
    } catch ( Exception e ) {
      throw new KettleException( "Error configuring local Beam Engine", e );
    }

  }
 
Example #15
Source File: UnboundedSourceWrapperTest.java    From beam with Apache License 2.0 4 votes vote down vote up
private static void testSourceDoesNotShutdown(boolean shouldHaveReaders) throws Exception {
  final int parallelism = 2;
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  // Make sure we do not shut down
  options.setShutdownSourcesAfterIdleMs(Long.MAX_VALUE);

  TestCountingSource source = new TestCountingSource(20).withoutSplitting();

  UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> sourceWrapper =
      new UnboundedSourceWrapper<>("noReader", options, source, parallelism);

  StreamingRuntimeContext mock = Mockito.mock(StreamingRuntimeContext.class);
  if (shouldHaveReaders) {
    // Since the source can't be split, the first subtask index will read everything
    Mockito.when(mock.getIndexOfThisSubtask()).thenReturn(0);
  } else {
    // Set up the RuntimeContext such that this instance won't receive any readers
    Mockito.when(mock.getIndexOfThisSubtask()).thenReturn(parallelism - 1);
  }

  Mockito.when(mock.getNumberOfParallelSubtasks()).thenReturn(parallelism);
  Mockito.when(mock.getExecutionConfig()).thenReturn(new ExecutionConfig());
  ProcessingTimeService timerService = Mockito.mock(ProcessingTimeService.class);
  Mockito.when(timerService.getCurrentProcessingTime()).thenReturn(Long.MAX_VALUE);
  Mockito.when(mock.getProcessingTimeService()).thenReturn(timerService);

  sourceWrapper.setRuntimeContext(mock);
  sourceWrapper.open(new Configuration());

  SourceFunction.SourceContext sourceContext = Mockito.mock(SourceFunction.SourceContext.class);
  Object checkpointLock = new Object();
  Mockito.when(sourceContext.getCheckpointLock()).thenReturn(checkpointLock);
  // Initialize source context early to avoid concurrency issues with its initialization in the
  // run
  // method and the onProcessingTime call on the wrapper.
  sourceWrapper.setSourceContext(sourceContext);

  sourceWrapper.open(new Configuration());
  assertThat(sourceWrapper.getLocalReaders().isEmpty(), is(!shouldHaveReaders));

  Thread thread =
      new Thread(
          () -> {
            try {
              sourceWrapper.run(sourceContext);
            } catch (Exception e) {
              LOG.error("Error while running UnboundedSourceWrapper", e);
            }
          });

  try {
    thread.start();
    // Wait to see if the wrapper shuts down immediately in case it doesn't have readers
    if (!shouldHaveReaders) {
      // The expected state is for finalizeSource to sleep instead of exiting
      while (true) {
        StackTraceElement[] callStack = thread.getStackTrace();
        if (callStack.length >= 2
            && "sleep".equals(callStack[0].getMethodName())
            && "finalizeSource".equals(callStack[1].getMethodName())) {
          break;
        }
        Thread.sleep(10);
      }
    }
    // Source should still be running even if there are no readers
    assertThat(sourceWrapper.isRunning(), is(true));
    synchronized (checkpointLock) {
      // Trigger emission of the watermark by updating processing time.
      // The actual processing time value does not matter.
      sourceWrapper.onProcessingTime(42);
    }
    // Source should still be running even when watermark is at max
    assertThat(sourceWrapper.isRunning(), is(true));
    assertThat(thread.isAlive(), is(true));
    sourceWrapper.cancel();
  } finally {
    thread.interrupt();
    // try to join but also don't mask exceptions with test timeout
    thread.join(1000);
  }
  assertThat(thread.isAlive(), is(false));
}
 
Example #16
Source File: UnboundedSourceWrapperTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 30_000)
public void testValueEmission() throws Exception {
  final int numElementsPerShard = 20;
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);

  final long[] numElementsReceived = {0L};
  final int[] numWatermarksReceived = {0};

  // this source will emit exactly NUM_ELEMENTS for each parallel reader,
  // afterwards it will stall. We check whether we also receive NUM_ELEMENTS
  // elements later.
  TestCountingSource source =
      new TestCountingSource(numElementsPerShard).withFixedNumSplits(numSplits);

  for (int subtaskIndex = 0; subtaskIndex < numTasks; subtaskIndex++) {
    UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> flinkWrapper =
        new UnboundedSourceWrapper<>("stepName", options, source, numTasks);

    // the source wrapper will only request as many splits as there are tasks and the source
    // will create at most numSplits splits
    assertEquals(numSplits, flinkWrapper.getSplitSources().size());

    StreamSource<
            WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>,
            UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark>>
        sourceOperator = new StreamSource<>(flinkWrapper);

    AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>>
        testHarness =
            new AbstractStreamOperatorTestHarness<>(
                sourceOperator,
                numTasks /* max parallelism */,
                numTasks /* parallelism */,
                subtaskIndex /* subtask index */);

    // The testing timer service is synchronous, so we must configure a watermark interval
    // > 0, otherwise we can get loop infinitely due to a timer always becoming ready after
    // it has been set.
    testHarness.getExecutionConfig().setAutoWatermarkInterval(10L);
    testHarness.setProcessingTime(System.currentTimeMillis());
    testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);

    Thread processingTimeUpdateThread = startProcessingTimeUpdateThread(testHarness);

    try {
      testHarness.open();
      StreamSources.run(
          sourceOperator,
          testHarness.getCheckpointLock(),
          new TestStreamStatusMaintainer(),
          new Output<StreamRecord<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>>>() {
            private boolean hasSeenMaxWatermark = false;

            @Override
            public void emitWatermark(Watermark watermark) {
              // we get this when there is no more data
              // it can happen that we get the max watermark several times, so guard against
              // this
              if (!hasSeenMaxWatermark
                  && watermark.getTimestamp()
                      >= BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()) {
                numWatermarksReceived[0]++;
                hasSeenMaxWatermark = true;
              }
            }

            @Override
            public <X> void collect(OutputTag<X> outputTag, StreamRecord<X> streamRecord) {
              collect((StreamRecord) streamRecord);
            }

            @Override
            public void emitLatencyMarker(LatencyMarker latencyMarker) {}

            @Override
            public void collect(
                StreamRecord<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>>
                    windowedValueStreamRecord) {
              numElementsReceived[0]++;
            }

            @Override
            public void close() {}
          });
    } finally {
      processingTimeUpdateThread.interrupt();
      processingTimeUpdateThread.join();
    }
  }
  // verify that we get the expected count across all subtasks
  assertEquals(numElementsPerShard * numSplits, numElementsReceived[0]);
  // and that we get as many final watermarks as there are subtasks
  assertEquals(numTasks, numWatermarksReceived[0]);
}
 
Example #17
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("rawtypes")
private ExecutableStageDoFnOperator getOperator(
    TupleTag<Integer> mainOutput,
    List<TupleTag<?>> additionalOutputs,
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory,
    WindowingStrategy windowingStrategy,
    @Nullable Coder keyCoder,
    Coder windowedInputCoder) {

  FlinkExecutableStageContextFactory contextFactory =
      Mockito.mock(FlinkExecutableStageContextFactory.class);
  when(contextFactory.get(any())).thenReturn(stageContext);

  final ExecutableStagePayload stagePayload;
  if (keyCoder != null) {
    stagePayload = this.stagePayloadWithUserState;
  } else {
    stagePayload = this.stagePayload;
  }

  ExecutableStageDoFnOperator<Integer, Integer> operator =
      new ExecutableStageDoFnOperator<>(
          "transform",
          windowedInputCoder,
          Collections.emptyMap(),
          mainOutput,
          additionalOutputs,
          outputManagerFactory,
          Collections.emptyMap() /* sideInputTagMapping */,
          Collections.emptyList() /* sideInputs */,
          Collections.emptyMap() /* sideInputId mapping */,
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          stagePayload,
          jobInfo,
          contextFactory,
          createOutputMap(mainOutput, additionalOutputs),
          windowingStrategy,
          keyCoder,
          keyCoder != null ? new KvToByteBufferKeySelector<>(keyCoder) : null);

  Whitebox.setInternalState(operator, "stateRequestHandler", stateRequestHandler);
  return operator;
}
 
Example #18
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testSerialization() {
  WindowedValue.ValueOnlyWindowedValueCoder<Integer> coder =
      WindowedValue.getValueOnlyCoder(VarIntCoder.of());

  TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
  TupleTag<Integer> additionalOutput = new TupleTag<>("additional-output");
  ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags =
      ImmutableMap.<TupleTag<?>, OutputTag<?>>builder()
          .put(
              additionalOutput,
              new OutputTag<>(additionalOutput.getId(), TypeInformation.of(Integer.class)))
          .build();
  ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders =
      ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder()
          .put(mainOutput, (Coder) coder)
          .put(additionalOutput, coder)
          .build();
  ImmutableMap<TupleTag<?>, Integer> tagsToIds =
      ImmutableMap.<TupleTag<?>, Integer>builder()
          .put(mainOutput, 0)
          .put(additionalOutput, 1)
          .build();

  DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds);

  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);

  ExecutableStageDoFnOperator<Integer, Integer> operator =
      new ExecutableStageDoFnOperator<>(
          "transform",
          WindowedValue.getValueOnlyCoder(VarIntCoder.of()),
          Collections.emptyMap(),
          mainOutput,
          ImmutableList.of(additionalOutput),
          outputManagerFactory,
          Collections.emptyMap() /* sideInputTagMapping */,
          Collections.emptyList() /* sideInputs */,
          Collections.emptyMap() /* sideInputId mapping */,
          options,
          stagePayload,
          jobInfo,
          FlinkExecutableStageContextFactory.getInstance(),
          createOutputMap(mainOutput, ImmutableList.of(additionalOutput)),
          WindowingStrategy.globalDefault(),
          null,
          null);

  ExecutableStageDoFnOperator<Integer, Integer> clone = SerializationUtils.clone(operator);
  assertNotNull(clone);
  assertNotEquals(operator, clone);
}
 
Example #19
Source File: DoFnOperator.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Constructor for DoFnOperator. */
public DoFnOperator(
    DoFn<InputT, OutputT> doFn,
    String stepName,
    Coder<WindowedValue<InputT>> inputWindowedCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    TupleTag<OutputT> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    OutputManagerFactory<OutputT> outputManagerFactory,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<Integer, PCollectionView<?>> sideInputTagMapping,
    Collection<PCollectionView<?>> sideInputs,
    PipelineOptions options,
    Coder<?> keyCoder,
    KeySelector<WindowedValue<InputT>, ?> keySelector,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  this.doFn = doFn;
  this.stepName = stepName;
  this.windowedInputCoder = inputWindowedCoder;
  this.outputCoders = outputCoders;
  this.mainOutputTag = mainOutputTag;
  this.additionalOutputTags = additionalOutputTags;
  this.sideInputTagMapping = sideInputTagMapping;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(options);
  this.windowingStrategy = windowingStrategy;
  this.outputManagerFactory = outputManagerFactory;

  setChainingStrategy(ChainingStrategy.ALWAYS);

  this.keyCoder = keyCoder;
  this.keySelector = keySelector;

  this.timerCoder =
      TimerInternals.TimerDataCoderV2.of(windowingStrategy.getWindowFn().windowCoder());

  FlinkPipelineOptions flinkOptions = options.as(FlinkPipelineOptions.class);

  this.maxBundleSize = flinkOptions.getMaxBundleSize();
  Preconditions.checkArgument(maxBundleSize > 0, "Bundle size must be at least 1");
  this.maxBundleTimeMills = flinkOptions.getMaxBundleTimeMills();
  Preconditions.checkArgument(maxBundleTimeMills > 0, "Bundle time must be at least 1");
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;

  this.requiresStableInput =
      // WindowDoFnOperator does not use a DoFn
      doFn != null
          && DoFnSignatures.getSignature(doFn.getClass()).processElement().requiresStableInput();

  if (requiresStableInput) {
    Preconditions.checkState(
        CheckpointingMode.valueOf(flinkOptions.getCheckpointingMode())
            == CheckpointingMode.EXACTLY_ONCE,
        "Checkpointing mode is not set to exactly once but @RequiresStableInput is used.");
    Preconditions.checkState(
        flinkOptions.getCheckpointingInterval() > 0,
        "No checkpointing configured but pipeline uses @RequiresStableInput");
    LOG.warn(
        "Enabling stable input for transform {}. Will only process elements at most every {} milliseconds.",
        stepName,
        flinkOptions.getCheckpointingInterval()
            + Math.max(0, flinkOptions.getMinPauseBetweenCheckpoints()));
  }

  this.finishBundleBeforeCheckpointing = flinkOptions.getFinishBundleBeforeCheckpointing();
}
 
Example #20
Source File: DoFnOperator.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void open() throws Exception {
  // WindowDoFnOperator need use state and timer to get DoFn.
  // So must wait StateInternals and TimerInternals ready.
  // This will be called after initializeState()
  this.doFn = getDoFn();
  doFnInvoker = DoFnInvokers.invokerFor(doFn);
  doFnInvoker.invokeSetup();

  FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class);
  StepContext stepContext = new FlinkStepContext();
  doFnRunner =
      DoFnRunners.simpleRunner(
          options,
          doFn,
          sideInputReader,
          outputManager,
          mainOutputTag,
          additionalOutputTags,
          stepContext,
          getInputCoder(),
          outputCoders,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);

  if (requiresStableInput) {
    // put this in front of the root FnRunner before any additional wrappers
    doFnRunner =
        bufferingDoFnRunner =
            BufferingDoFnRunner.create(
                doFnRunner,
                "stable-input-buffer",
                windowedInputCoder,
                windowingStrategy.getWindowFn().windowCoder(),
                getOperatorStateBackend(),
                getKeyedStateBackend(),
                options.getNumConcurrentCheckpoints());
  }
  doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext);
  earlyBindStateIfNeeded();

  if (!options.getDisableMetrics()) {
    flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
    doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
    String checkpointMetricNamespace = options.getReportCheckpointDuration();
    if (checkpointMetricNamespace != null) {
      MetricName checkpointMetric =
          MetricName.named(checkpointMetricNamespace, "checkpoint_duration");
      checkpointStats =
          new CheckpointStats(
              () ->
                  flinkMetricContainer
                      .getMetricsContainer(stepName)
                      .getDistribution(checkpointMetric));
    }
  }

  elementCount = 0L;
  lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime();

  // Schedule timer to check timeout of finish bundle.
  long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1);
  checkFinishBundleTimer =
      getProcessingTimeService()
          .scheduleAtFixedRate(
              timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod);

  if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) {
    pushbackDoFnRunner =
        new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler);
  } else {
    pushbackDoFnRunner =
        SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
  }
}
 
Example #21
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testBundleProcessingExceptionIsFatalDuringCheckpointing() throws Exception {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setMaxBundleSize(10L);
  options.setCheckpointingInterval(1L);

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder coder = StringUtf8Coder.of();
  WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(coder);

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  @SuppressWarnings("unchecked")
  DoFnOperator doFnOperator =
      new DoFnOperator<>(
          new IdentityDoFn() {
            @FinishBundle
            public void finishBundle() {
              throw new RuntimeException("something went wrong here");
            }
          },
          "stepName",
          windowedValueCoder,
          Collections.emptyMap(),
          outputTag,
          Collections.emptyList(),
          outputManagerFactory,
          WindowingStrategy.globalDefault(),
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          options,
          null,
          null,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  @SuppressWarnings("unchecked")
  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  // start a bundle
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element")));

  // Make sure we throw Error, not a regular Exception.
  // A regular exception would just cause the checkpoint to fail.
  assertThrows(Error.class, () -> testHarness.snapshot(0, 0));
}
 
Example #22
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void testFailOnRequiresStableInputAndDisabledCheckpointing() {
  TupleTag<String> outputTag = new TupleTag<>("main-output");

  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  KvToByteBufferKeySelector keySelector = new KvToByteBufferKeySelector<>(keyCoder);
  KvCoder<String, String> kvCoder = KvCoder.of(keyCoder, StringUtf8Coder.of());
  WindowedValue.ValueOnlyWindowedValueCoder<KV<String, String>> windowedValueCoder =
      WindowedValue.getValueOnlyCoder(kvCoder);

  DoFn<String, String> doFn =
      new DoFn<String, String>() {
        @ProcessElement
        // Use RequiresStableInput to force buffering elements
        @RequiresStableInput
        public void processElement(ProcessContext context) {
          context.output(context.element());
        }
      };

  DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(
          outputTag,
          WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));

  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  // should make the DoFnOperator creation fail
  options.setCheckpointingInterval(-1L);
  new DoFnOperator(
      doFn,
      "stepName",
      windowedValueCoder,
      Collections.emptyMap(),
      outputTag,
      Collections.emptyList(),
      outputManagerFactory,
      WindowingStrategy.globalDefault(),
      new HashMap<>(), /* side-input mapping */
      Collections.emptyList(), /* side inputs */
      options,
      keyCoder,
      keySelector,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example #23
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void keyedParDoPushbackDataCheckpointing() throws Exception {
  pushbackDataCheckpointing(
      () -> {
        StringUtf8Coder keyCoder = StringUtf8Coder.of();
        Coder<WindowedValue<String>> coder =
            WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder());

        TupleTag<String> outputTag = new TupleTag<>("main-output");

        KeySelector<WindowedValue<String>, ByteBuffer> keySelector =
            e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);

        ImmutableMap<Integer, PCollectionView<?>> sideInputMapping =
            ImmutableMap.<Integer, PCollectionView<?>>builder()
                .put(1, view1)
                .put(2, view2)
                .build();

        DoFnOperator<String, String> doFnOperator =
            new DoFnOperator<>(
                new IdentityDoFn<>(),
                "stepName",
                coder,
                Collections.emptyMap(),
                outputTag,
                Collections.emptyList(),
                new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
                WindowingStrategy.of(FixedWindows.of(Duration.millis(100))),
                sideInputMapping, /* side-input mapping */
                ImmutableList.of(view1, view2), /* side inputs */
                PipelineOptionsFactory.as(FlinkPipelineOptions.class),
                keyCoder,
                keySelector,
                DoFnSchemaInformation.create(),
                Collections.emptyMap());

        return new KeyedTwoInputStreamOperatorTestHarness<>(
            doFnOperator,
            keySelector,
            // we use a dummy key for the second input since it is considered to be broadcast
            null,
            new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of()));
      });
}
 
Example #24
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void keyedParDoSideInputCheckpointing() throws Exception {
  sideInputCheckpointing(
      () -> {
        StringUtf8Coder keyCoder = StringUtf8Coder.of();
        Coder<WindowedValue<String>> coder =
            WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder());
        TupleTag<String> outputTag = new TupleTag<>("main-output");

        KeySelector<WindowedValue<String>, ByteBuffer> keySelector =
            e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);

        ImmutableMap<Integer, PCollectionView<?>> sideInputMapping =
            ImmutableMap.<Integer, PCollectionView<?>>builder()
                .put(1, view1)
                .put(2, view2)
                .build();

        DoFnOperator<String, String> doFnOperator =
            new DoFnOperator<>(
                new IdentityDoFn<>(),
                "stepName",
                coder,
                Collections.emptyMap(),
                outputTag,
                Collections.emptyList(),
                new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
                WindowingStrategy.of(FixedWindows.of(Duration.millis(100))),
                sideInputMapping, /* side-input mapping */
                ImmutableList.of(view1, view2), /* side inputs */
                PipelineOptionsFactory.as(FlinkPipelineOptions.class),
                keyCoder,
                keySelector,
                DoFnSchemaInformation.create(),
                Collections.emptyMap());

        return new KeyedTwoInputStreamOperatorTestHarness<>(
            doFnOperator,
            keySelector,
            // we use a dummy key for the second input since it is considered to be broadcast
            null,
            new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of()));
      });
}
 
Example #25
Source File: FlinkStatefulDoFnFunction.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void reduce(
    Iterable<WindowedValue<KV<K, V>>> values, Collector<WindowedValue<RawUnionValue>> out)
    throws Exception {
  RuntimeContext runtimeContext = getRuntimeContext();

  DoFnRunners.OutputManager outputManager;
  if (outputMap.size() == 1) {
    outputManager = new FlinkDoFnFunction.DoFnOutputManager(out);
  } else {
    // it has some additional Outputs
    outputManager = new FlinkDoFnFunction.MultiDoFnOutputManager(out, outputMap);
  }

  final Iterator<WindowedValue<KV<K, V>>> iterator = values.iterator();

  // get the first value, we need this for initializing the state internals with the key.
  // we are guaranteed to have a first value, otherwise reduce() would not have been called.
  WindowedValue<KV<K, V>> currentValue = iterator.next();
  final K key = currentValue.getValue().getKey();

  final InMemoryStateInternals<K> stateInternals = InMemoryStateInternals.forKey(key);

  // Used with Batch, we know that all the data is available for this key. We can't use the
  // timer manager from the context because it doesn't exist. So we create one and advance
  // time to the end after processing all elements.
  final InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
  timerInternals.advanceProcessingTime(Instant.now());
  timerInternals.advanceSynchronizedProcessingTime(Instant.now());

  List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet());

  DoFnRunner<KV<K, V>, OutputT> doFnRunner =
      DoFnRunners.simpleRunner(
          serializedOptions.get(),
          dofn,
          new FlinkSideInputReader(sideInputs, runtimeContext),
          outputManager,
          mainOutputTag,
          additionalOutputTags,
          new FlinkNoOpStepContext() {
            @Override
            public StateInternals stateInternals() {
              return stateInternals;
            }

            @Override
            public TimerInternals timerInternals() {
              return timerInternals;
            }
          },
          inputCoder,
          outputCoderMap,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);

  FlinkPipelineOptions pipelineOptions = serializedOptions.get().as(FlinkPipelineOptions.class);
  if (!pipelineOptions.getDisableMetrics()) {
    doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, metricContainer);
  }

  doFnRunner.startBundle();

  doFnRunner.processElement(currentValue);
  while (iterator.hasNext()) {
    currentValue = iterator.next();
    doFnRunner.processElement(currentValue);
  }

  // Finish any pending windows by advancing the input watermark to infinity.
  timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);

  // Finally, advance the processing time to infinity to fire any timers.
  timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
  timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);

  fireEligibleTimers(key, timerInternals, doFnRunner);

  doFnRunner.finishBundle();
}
 
Example #26
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testLateDroppingForStatefulFn() throws Exception {

  WindowingStrategy<Object, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(FixedWindows.of(new Duration(10)));

  DoFn<Integer, String> fn =
      new DoFn<Integer, String>() {

        @StateId("state")
        private final StateSpec<ValueState<String>> stateSpec =
            StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context) {
          context.output(context.element().toString());
        }
      };

  VarIntCoder keyCoder = VarIntCoder.of();
  Coder<WindowedValue<Integer>> inputCoder =
      WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
  Coder<WindowedValue<String>> outputCoder =
      WindowedValue.getFullCoder(
          StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());

  KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector =
      e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  DoFnOperator<Integer, String> doFnOperator =
      new DoFnOperator<>(
          fn,
          "stepName",
          inputCoder,
          Collections.emptyMap(),
          outputTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          keyCoder, /* key coder */
          keySelector,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness =
      new KeyedOneInputStreamOperatorTestHarness<>(
          doFnOperator,
          keySelector,
          new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of()));

  testHarness.open();

  testHarness.processWatermark(0);

  IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));

  // this should not be late
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));

  testHarness.getOutput().clear();

  testHarness.processWatermark(9);

  // this should still not be considered late
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));

  testHarness.getOutput().clear();

  testHarness.processWatermark(10);

  // this should now be considered late
  testHarness.processElement(
      new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));

  assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());

  testHarness.close();
}
 
Example #27
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testMultiOutputOutput() throws Exception {

  WindowedValue.ValueOnlyWindowedValueCoder<String> coder =
      WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());

  TupleTag<String> mainOutput = new TupleTag<>("main-output");
  TupleTag<String> additionalOutput1 = new TupleTag<>("output-1");
  TupleTag<String> additionalOutput2 = new TupleTag<>("output-2");
  ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags =
      ImmutableMap.<TupleTag<?>, OutputTag<?>>builder()
          .put(additionalOutput1, new OutputTag<String>(additionalOutput1.getId()) {})
          .put(additionalOutput2, new OutputTag<String>(additionalOutput2.getId()) {})
          .build();
  ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders =
      ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder()
          .put(mainOutput, (Coder) coder)
          .put(additionalOutput1, coder)
          .put(additionalOutput2, coder)
          .build();
  ImmutableMap<TupleTag<?>, Integer> tagsToIds =
      ImmutableMap.<TupleTag<?>, Integer>builder()
          .put(mainOutput, 0)
          .put(additionalOutput1, 1)
          .put(additionalOutput2, 2)
          .build();

  DoFnOperator<String, String> doFnOperator =
      new DoFnOperator<>(
          new MultiOutputDoFn(additionalOutput1, additionalOutput2),
          "stepName",
          coder,
          Collections.emptyMap(),
          mainOutput,
          ImmutableList.of(additionalOutput1, additionalOutput2),
          new DoFnOperator.MultiOutputOutputManagerFactory(
              mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds),
          WindowingStrategy.globalDefault(),
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          null,
          null,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("one")));
  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("two")));
  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("hello")));

  assertThat(
      this.stripStreamRecord(testHarness.getOutput()),
      contains(WindowedValue.valueInGlobalWindow("got: hello")));

  assertThat(
      this.stripStreamRecord(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput1))),
      contains(
          WindowedValue.valueInGlobalWindow("extra: one"),
          WindowedValue.valueInGlobalWindow("got: hello")));

  assertThat(
      this.stripStreamRecord(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput2))),
      contains(
          WindowedValue.valueInGlobalWindow("extra: two"),
          WindowedValue.valueInGlobalWindow("got: hello")));

  testHarness.close();
}
 
Example #28
Source File: FlinkDoFnFunction.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void open(Configuration parameters) {
  // Note that the SerializablePipelineOptions already initialize FileSystems in the readObject()
  // deserialization method. However, this is a hack, and we want to properly initialize the
  // options where they are needed.
  FileSystems.setDefaultPipelineOptions(serializedOptions.get());
  doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn);
  metricContainer = new FlinkMetricContainer(getRuntimeContext());

  // setup DoFnRunner
  final RuntimeContext runtimeContext = getRuntimeContext();
  final DoFnRunners.OutputManager outputManager;
  if (outputMap.size() == 1) {
    outputManager = new DoFnOutputManager();
  } else {
    // it has some additional outputs
    outputManager = new MultiDoFnOutputManager(outputMap);
  }

  final List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet());

  DoFnRunner<InputT, OutputT> doFnRunner =
      DoFnRunners.simpleRunner(
          serializedOptions.get(),
          doFn,
          new FlinkSideInputReader(sideInputs, runtimeContext),
          outputManager,
          mainOutputTag,
          additionalOutputTags,
          new FlinkNoOpStepContext(),
          inputCoder,
          outputCoderMap,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);

  if (!serializedOptions.get().as(FlinkPipelineOptions.class).getDisableMetrics()) {
    doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, metricContainer);
  }

  this.collectorAware = (CollectorAware) outputManager;
  this.doFnRunner = doFnRunner;
}