org.apache.beam.sdk.transforms.DoFn Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.DoFn. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HllCount.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<Long> expand(PCollection<byte[]> input) {
  return input.apply(
      ParDo.of(
          new DoFn<byte[], Long>() {
            @ProcessElement
            public void processElement(
                @Element byte[] sketch, OutputReceiver<Long> receiver) {
              if (sketch == null) {
                LOG.warn(
                    "Received a null and treated it as an empty sketch. "
                        + "Consider replacing nulls with empty byte arrays (byte[0]) "
                        + "in upstream transforms for better space-efficiency and safety.");
                receiver.output(0L);
              } else if (sketch.length == 0) {
                receiver.output(0L);
              } else {
                receiver.output(HyperLogLogPlusPlus.forProto(sketch).result());
              }
            }
          }));
}
 
Example #2
Source File: SelectEvent.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<Row> expand(PCollection<Event> input) {
  if (!input.hasSchema()) {
    throw new RuntimeException("Input PCollection must have a schema!");
  }
  int index = getNestedIndex(input.getSchema());
  return input
      .apply(
          ParDo.of(
              new DoFn<Event, Row>() {
                @ProcessElement
                public void processElement(@Element Row row, OutputReceiver<Row> o) {
                  o.output(row.getRow(index));
                }
              }))
      .setRowSchema(input.getSchema().getField(index).getType().getRowSchema());
}
 
Example #3
Source File: ByteBuddyOnTimerInvokerFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public <InputT, OutputT> OnTimerInvoker<InputT, OutputT> forTimer(
    DoFn<InputT, OutputT> fn, String timerId) {

  @SuppressWarnings("unchecked")
  Class<? extends DoFn<?, ?>> fnClass = (Class<? extends DoFn<?, ?>>) fn.getClass();
  try {
    OnTimerMethodSpecifier onTimerMethodSpecifier =
        OnTimerMethodSpecifier.forClassAndTimerId(fnClass, timerId);
    Constructor<?> constructor = constructorCache.get(onTimerMethodSpecifier);

    return (OnTimerInvoker<InputT, OutputT>) constructor.newInstance(fn);
  } catch (InstantiationException
      | IllegalAccessException
      | IllegalArgumentException
      | InvocationTargetException
      | SecurityException
      | ExecutionException e) {
    throw new RuntimeException(
        String.format(
            "Unable to construct @%s invoker for %s",
            OnTimer.class.getSimpleName(), fn.getClass().getName()),
        e);
  }
}
 
Example #4
Source File: StatefulDoFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private DoFnRunner<KV<String, Integer>, Integer> getDoFnRunner(
    DoFn<KV<String, Integer>, Integer> fn, @Nullable OutputManager outputManager) {
  return new SimpleDoFnRunner<>(
      null,
      fn,
      NullSideInputReader.empty(),
      MoreObjects.firstNonNull(outputManager, discardingOutputManager()),
      outputTag,
      Collections.emptyList(),
      mockStepContext,
      null,
      Collections.emptyMap(),
      WINDOWING_STRATEGY,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example #5
Source File: Group.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<Row> expand(PCollection<InputT> input) {
  Schema schema = input.getSchema();
  Schema keySchema = getKeySchema(schema);
  Schema outputSchema =
      Schema.builder()
          .addRowField(getKeyField(), keySchema)
          .addIterableField(getValueField(), FieldType.row(schema))
          .build();

  return input
      .apply("ToKvs", getToKvs())
      .apply(
          "ToRow",
          ParDo.of(
              new DoFn<KV<Row, Iterable<Row>>, Row>() {
                @ProcessElement
                public void process(@Element KV<Row, Iterable<Row>> e, OutputReceiver<Row> o) {
                  o.output(
                      Row.withSchema(outputSchema)
                          .attachValues(Lists.newArrayList(e.getKey(), e.getValue())));
                }
              }))
      .setRowSchema(outputSchema);
}
 
Example #6
Source File: BatchStatefulParDoOverrides.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollectionTuple expand(PCollection<KV<K, InputT>> input) {
  DoFn<KV<K, InputT>, OutputT> fn = originalParDo.getFn();
  verifyFnIsStateful(fn);
  DataflowRunner.verifyDoFnSupportedBatch(fn);
  DataflowRunner.verifyStateSupportForWindowingStrategy(input.getWindowingStrategy());

  if (isFnApi) {
    return input.apply(Reshuffle.of()).apply(originalParDo);
  }

  PTransform<
          PCollection<? extends KV<K, Iterable<KV<Instant, WindowedValue<KV<K, InputT>>>>>>,
          PCollectionTuple>
      statefulParDo =
          ParDo.of(new BatchStatefulDoFn<>(fn))
              .withSideInputs(originalParDo.getSideInputs())
              .withOutputTags(
                  originalParDo.getMainOutputTag(), originalParDo.getAdditionalOutputTags());

  return input.apply(new GbkBeforeStatefulParDo<>()).apply(statefulParDo);
}
 
Example #7
Source File: CsvImport.java    From cloud-bigtable-examples with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(DoFn<String, Mutation>.ProcessContext c) throws Exception {
  try {
    String[] headers = c.getPipelineOptions().as(BigtableCsvOptions.class).getHeaders()
        .split(",");
    String[] values = c.element().split(",");
    Preconditions.checkArgument(headers.length == values.length);

    byte[] rowkey = Bytes.toBytes(values[0]);
    byte[][] headerBytes = new byte[headers.length][];
    for (int i = 0; i < headers.length; i++) {
      headerBytes[i] = Bytes.toBytes(headers[i]);
    }

    Put row = new Put(rowkey);
    long timestamp = System.currentTimeMillis();
    for (int i = 1; i < values.length; i++) {
      row.addColumn(FAMILY, headerBytes[i], timestamp, Bytes.toBytes(values[i]));
    }
    c.output(row);
  } catch (Exception e) {
    LOG.error("Failed to process input {}", c.element(), e);
    throw e;
  }

}
 
Example #8
Source File: ByteBuddyDoFnInvokerFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public ByteCodeAppender appender(final Target implementationTarget) {
  return (methodVisitor, implementationContext, instrumentedMethod) -> {
    StackManipulation.Size size =
        new StackManipulation.Compound(
                // Load the this reference
                MethodVariableAccess.REFERENCE.loadFrom(0),
                // Load the delegate argument
                MethodVariableAccess.REFERENCE.loadFrom(1),
                // Invoke the super constructor (default constructor of Object)
                MethodInvocation.invoke(
                    new TypeDescription.ForLoadedType(clazz)
                        .getDeclaredMethods()
                        .filter(
                            ElementMatchers.isConstructor()
                                .and(ElementMatchers.takesArguments(DoFn.class)))
                        .getOnly()),
                // Return void.
                MethodReturn.VOID)
            .apply(methodVisitor, implementationContext);
    return new ByteCodeAppender.Size(size.getMaximalSize(), instrumentedMethod.getStackSize());
  };
}
 
Example #9
Source File: GroupByKeyAndWindowDoFnTransform.java    From incubator-nemo with Apache License 2.0 6 votes vote down vote up
/**
 * This creates a new DoFn that groups elements by key and window.
 *
 * @param doFn original doFn.
 * @return GroupAlsoByWindowViaWindowSetNewDoFn
 */
@Override
protected DoFn wrapDoFn(final DoFn doFn) {
  final Map<K, StateAndTimerForKey> map = new HashMap<>();
  this.inMemoryStateInternalsFactory = new InMemoryStateInternalsFactory(map);
  this.inMemoryTimerInternalsFactory = new InMemoryTimerInternalsFactory(map);

  // This function performs group by key and window operation
  return
    GroupAlsoByWindowViaWindowSetNewDoFn.create(
      getWindowingStrategy(),
      inMemoryStateInternalsFactory,
      inMemoryTimerInternalsFactory,
      null, // GBK has no sideinput.
      reduceFn,
      getOutputManager(),
      getMainOutputTag());
}
 
Example #10
Source File: TestBoundedTable.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public POutput buildIOWriter(PCollection<Row> input) {
  input.apply(
      ParDo.of(
          new DoFn<Row, Void>() {
            @ProcessElement
            public void processElement(ProcessContext c) {
              CONTENT.add(c.element());
            }

            @Teardown
            public void close() {
              CONTENT.clear();
            }
          }));
  return PDone.in(input.getPipeline());
}
 
Example #11
Source File: DoFnSignaturesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testTimerParameterDuplicate() throws Exception {
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("duplicate");
  thrown.expectMessage("my-id");
  thrown.expectMessage("myProcessElement");
  thrown.expectMessage("index 2");
  thrown.expectMessage(not(mentionsState()));
  DoFnSignatures.getSignature(
      new DoFn<KV<String, Integer>, Long>() {
        @TimerId("my-id")
        private final TimerSpec myfield = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);

        @ProcessElement
        public void myProcessElement(
            ProcessContext context, @TimerId("my-id") Timer one, @TimerId("my-id") Timer two) {}

        @OnTimer("my-id")
        public void onWhatever() {}
      }.getClass());
}
 
Example #12
Source File: LocalSpannerIO.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@DoFn.ProcessElement
public void processElement(ProcessContext c) {
  MutationGroup mg = c.element();
  if (mg.primary().getOperation() == Op.DELETE && !isPointDelete(mg.primary())) {
    // Ranged deletes are not batchable.
    c.output(unbatchableMutationsTag, Arrays.asList(mg));
    unBatchableMutationGroupsCounter.inc();
    return;
  }

  SpannerSchema spannerSchema = c.sideInput(schemaView);
  long groupSize = MutationSizeEstimator.sizeOf(mg);
  long groupCells = MutationCellCounter.countOf(spannerSchema, mg);
  long groupRows = Iterables.size(mg);

  if (groupSize >= batchSizeBytes || groupCells >= maxNumMutations || groupRows >= maxNumRows) {
    c.output(unbatchableMutationsTag, Arrays.asList(mg));
    unBatchableMutationGroupsCounter.inc();
  } else {
    c.output(mg);
    batchableMutationGroupsCounter.inc();
  }
}
 
Example #13
Source File: DoFnSignaturesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testTimerIdNoCallback() throws Exception {
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("No callback registered");
  thrown.expectMessage("my-id");
  thrown.expectMessage(not(mentionsState()));
  thrown.expectMessage(mentionsTimers());
  DoFnSignatures.getSignature(
      new DoFn<KV<String, Integer>, Long>() {
        @TimerId("my-id")
        private final TimerSpec myfield1 = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @ProcessElement
        public void foo(ProcessContext context) {}
      }.getClass());
}
 
Example #14
Source File: DoFnSignaturesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testTimerIdWithWrongType() throws Exception {
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("TimerId");
  thrown.expectMessage("TimerSpec");
  thrown.expectMessage("bizzle");
  thrown.expectMessage(not(mentionsState()));
  DoFnSignatures.getSignature(
      new DoFn<String, String>() {
        @TimerId("foo")
        private final String bizzle = "bazzle";

        @ProcessElement
        public void foo(ProcessContext context) {}
      }.getClass());
}
 
Example #15
Source File: DoFnSignaturesSplittableDoFnTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetWatermarkEstimatorStateCoderReturnsWrongType() throws Exception {
  class BadFn extends DoFn<Integer, String> {
    @ProcessElement
    public void process(
        ProcessContext context, RestrictionTracker<SomeRestriction, Void> tracker) {}

    @GetInitialRestriction
    public SomeRestriction getInitialRestriction(@Element Integer element) {
      return null;
    }

    @GetWatermarkEstimatorStateCoder
    public KvCoder getWatermarkEstimatorStateCoder() {
      return null;
    }
  }

  thrown.expectMessage(
      "getWatermarkEstimatorStateCoder() returns KvCoder which is not a subtype of Coder<Void>");
  DoFnSignatures.getSignature(BadFn.class);
}
 
Example #16
Source File: DoFnSignaturesSplittableDoFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnsplittableIsBounded() throws Exception {
  class UnsplittableFn extends DoFn<Integer, String> {
    @ProcessElement
    public void process(ProcessContext context) {}
  }

  assertEquals(
      PCollection.IsBounded.BOUNDED,
      DoFnSignatures.getSignature(UnsplittableFn.class).isBoundedPerElement());
}
 
Example #17
Source File: CombineShardsFn.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
  final String result =
      combineShards(
          c.getPipelineOptions().as(Options.class),
          c.element(),
          c.sideInput(shards),
          c.sideInput(eofContents));
  c.output(result);
}
 
Example #18
Source File: ParDoTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
public static DoFnWithExecutionInformation doFnWithExecutionInformationFromProto(
    FunctionSpec fnSpec) {
  checkArgument(
      fnSpec.getUrn().equals(CUSTOM_JAVA_DO_FN_URN),
      "Expected %s to be %s with URN %s, but URN was %s",
      DoFn.class.getSimpleName(),
      FunctionSpec.class.getSimpleName(),
      CUSTOM_JAVA_DO_FN_URN,
      fnSpec.getUrn());
  byte[] serializedFn = fnSpec.getPayload().toByteArray();
  return (DoFnWithExecutionInformation)
      SerializableUtils.deserializeFromByteArray(serializedFn, "Custom DoFn With Execution Info");
}
 
Example #19
Source File: BeamAggregationRel.java    From beam with Apache License 2.0 5 votes vote down vote up
static DoFn<Row, Row> mergeRecord(
    Schema outputSchema,
    int windowStartFieldIndex,
    boolean ignoreValues,
    boolean verifyRowValues) {
  return new DoFn<Row, Row>() {
    @ProcessElement
    public void processElement(
        @Element Row kvRow, BoundedWindow window, OutputReceiver<Row> o) {
      int capacity =
          kvRow.getRow(0).getFieldCount()
              + (!ignoreValues ? kvRow.getRow(1).getFieldCount() : 0);
      List<Object> fieldValues = Lists.newArrayListWithCapacity(capacity);

      fieldValues.addAll(kvRow.getRow(0).getBaseValues());
      if (!ignoreValues) {
        fieldValues.addAll(kvRow.getRow(1).getBaseValues());
      }

      if (windowStartFieldIndex != -1) {
        fieldValues.add(windowStartFieldIndex, ((IntervalWindow) window).start());
      }

      Row row =
          verifyRowValues
              ? Row.withSchema(outputSchema).addValues(fieldValues).build()
              : Row.withSchema(outputSchema).attachValues(fieldValues);
      o.output(row);
    }
  };
}
 
Example #20
Source File: PTransformMatchers.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * A {@link PTransformMatcher} that matches a {@link ParDo.MultiOutput} containing a {@link DoFn}
 * that uses state or timers, as specified by {@link DoFnSignature#usesState()} and {@link
 * DoFnSignature#usesTimers()}.
 */
public static PTransformMatcher stateOrTimerParDoMulti() {
  return new PTransformMatcher() {
    @Override
    public boolean matches(AppliedPTransform<?, ?, ?> application) {
      PTransform<?, ?> transform = application.getTransform();
      if (transform instanceof ParDo.MultiOutput) {
        DoFn<?, ?> fn = ((ParDo.MultiOutput<?, ?>) transform).getFn();
        DoFnSignature signature = DoFnSignatures.signatureForDoFn(fn);
        return signature.usesState() || signature.usesTimers();
      }
      return false;
    }

    @Override
    public String toString() {
      return MoreObjects.toStringHelper("StateOrTimerParDoMultiMatcher").toString();
    }
  };
}
 
Example #21
Source File: GetReferencesFromHeaderFn.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(DoFn<HeaderInfo, String>.ProcessContext c) throws Exception {
  final SAMFileHeader header = c.element().header;
  for (SAMSequenceRecord sequence : header.getSequenceDictionary().getSequences()) {
    c.output(sequence.getSequenceName());
  }
  LOG.info("Processed " + header.getSequenceDictionary().size() + " references");
}
 
Example #22
Source File: DoFnSignaturesTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testPipelineOptionsParameter() throws Exception {
  DoFnSignature sig =
      DoFnSignatures.getSignature(
          new DoFn<String, String>() {
            @ProcessElement
            public void process(ProcessContext c, PipelineOptions options) {}
          }.getClass());

  assertThat(
      sig.processElement().extraParameters(),
      Matchers.hasItem(instanceOf(Parameter.PipelineOptionsParameter.class)));
}
 
Example #23
Source File: BigQueryInputRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
@DoFn.ProcessElement
public void processElement(ProcessContext c) throws IOException {
    TableRow row = c.element();
    if (row == null) {
        return;
    }
    if (converter == null) {
        converter = new BigQueryTableRowIndexedRecordConverter();
        converter.setSchema(new Schema.Parser().parse(schemaStr));
    }
    c.output(converter.convertToAvro(row));
}
 
Example #24
Source File: DIBatchSimulationTest.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(final PCollection<org.talend.sdk.component.api.record.Record> input) {
    input.apply(ParDo.of(new DoFn<org.talend.sdk.component.api.record.Record, Void>() {

        @ProcessElement
        public void onElement(final ProcessContext context) {
            RECORDS.add(context.element());
        }
    }));
    return PDone.in(input.getPipeline());
}
 
Example #25
Source File: DoFnSignaturesTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWrongTimestampType() throws Exception {
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("@Timestamp argument must have type org.joda.time.Instant");
  DoFnSignature sig =
      DoFnSignatures.getSignature(
          new DoFn<String, String>() {
            @ProcessElement
            public void process(@Timestamp String timestamp) {}
          }.getClass());
}
 
Example #26
Source File: SnsIO.java    From beam with Apache License 2.0 5 votes vote down vote up
private BiConsumer<? super PublishResponse, ? super Throwable> getPublishResponse(
    DoFn<T, SnsResponse<T>>.ProcessContext context) {
  return (response, ex) -> {
    if (ex == null) {
      SnsResponse<T> snsResponse = SnsResponse.of(context.element(), response);
      context.output(snsResponse);
    } else {
      LOG.error("Error while publishing request to SNS", ex);
      throw new SnsWriteException("Error while publishing request to SNS", ex);
    }
  };
}
 
Example #27
Source File: CacheTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Test checks how the cache candidates map is populated by the runner when evaluating the
 * pipeline.
 */
@Test
public void cacheCandidatesUpdaterTest() {
  SparkPipelineOptions options = createOptions();
  Pipeline pipeline = Pipeline.create(options);
  PCollection<String> pCollection = pipeline.apply(Create.of("foo", "bar"));

  // First use of pCollection.
  pCollection.apply(Count.globally());
  // Second use of pCollection.
  PCollectionView<List<String>> view = pCollection.apply(View.asList());

  // Internally View.asList() creates a PCollection that underlies the PCollectionView, that
  // PCollection should not be cached as the SparkRunner does not access that PCollection to
  // access the PCollectionView.
  pipeline
      .apply(Create.of("foo", "baz"))
      .apply(
          ParDo.of(
                  new DoFn<String, String>() {
                    @ProcessElement
                    public void processElement(ProcessContext processContext) {
                      if (processContext.sideInput(view).contains(processContext.element())) {
                        processContext.output(processContext.element());
                      }
                    }
                  })
              .withSideInputs(view));

  JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
  EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options);
  SparkRunner.CacheVisitor cacheVisitor =
      new SparkRunner.CacheVisitor(new TransformTranslator.Translator(), ctxt);
  pipeline.traverseTopologically(cacheVisitor);
  assertEquals(2L, (long) ctxt.getCacheCandidates().get(pCollection));
  assertEquals(1L, ctxt.getCacheCandidates().values().stream().filter(l -> l > 1).count());
}
 
Example #28
Source File: SplittableParDoNaiveBounded.java    From beam with Apache License 2.0 5 votes vote down vote up
private NestedProcessContext(
    DoFn<InputT, OutputT> fn,
    DoFn<KV<InputT, RestrictionT>, OutputT>.ProcessContext outerContext,
    InputT element,
    BoundedWindow window,
    TrackerT tracker,
    WatermarkEstimatorT watermarkEstimator) {
  fn.super();
  this.window = window;
  this.outerContext = outerContext;
  this.element = element;
  this.tracker = tracker;
  this.watermarkEstimator = watermarkEstimator;
}
 
Example #29
Source File: BeamEnumerableConverter.java    From beam with Apache License 2.0 5 votes vote down vote up
private static PipelineResult limitRun(
    PipelineOptions options,
    BeamRelNode node,
    DoFn<Row, Void> doFn,
    Queue<Row> values,
    int limitCount) {
  options.as(DirectOptions.class).setBlockOnRun(false);
  Pipeline pipeline = Pipeline.create(options);
  PCollection<Row> resultCollection = BeamSqlRelUtils.toPCollection(pipeline, node);
  resultCollection.apply(ParDo.of(doFn));

  PipelineResult result = pipeline.run();

  State state;
  while (true) {
    // Check pipeline state in every second
    state = result.waitUntilFinish(Duration.standardSeconds(1));
    if (state != null && state.isTerminal()) {
      if (PipelineResult.State.FAILED.equals(state)) {
        throw new RuntimeException("Pipeline failed for unknown reason");
      }
      break;
    }

    try {
      if (values.size() >= limitCount) {
        result.cancel();
        break;
      }
    } catch (IOException e) {
      LOG.warn(e.toString());
      break;
    }
  }

  return result;
}
 
Example #30
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private <K, InT, OutT>
    OneInputStreamOperatorTestHarness<WindowedValue<InT>, WindowedValue<OutT>> createTestHarness(
        WindowingStrategy<Object, ?> windowingStrategy,
        DoFn<InT, OutT> fn,
        FullWindowedValueCoder<InT> inputCoder,
        FullWindowedValueCoder<OutT> outputCoder,
        Coder<?> keyCoder,
        TupleTag<OutT> outputTag,
        TypeInformation<K> keyCoderInfo,
        KeySelector<WindowedValue<InT>, K> keySelector)
        throws Exception {
  DoFnOperator<InT, OutT> doFnOperator =
      new DoFnOperator<>(
          fn,
          "stepName",
          inputCoder,
          Collections.emptyMap(),
          outputTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          keyCoder /* key coder */,
          keySelector,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  return new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keyCoderInfo);
}