org.apache.beam.sdk.transforms.DoFn Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.DoFn.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ByteBuddyOnTimerInvokerFactory.java From beam with Apache License 2.0 | 6 votes |
@Override public <InputT, OutputT> OnTimerInvoker<InputT, OutputT> forTimer( DoFn<InputT, OutputT> fn, String timerId) { @SuppressWarnings("unchecked") Class<? extends DoFn<?, ?>> fnClass = (Class<? extends DoFn<?, ?>>) fn.getClass(); try { OnTimerMethodSpecifier onTimerMethodSpecifier = OnTimerMethodSpecifier.forClassAndTimerId(fnClass, timerId); Constructor<?> constructor = constructorCache.get(onTimerMethodSpecifier); return (OnTimerInvoker<InputT, OutputT>) constructor.newInstance(fn); } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | SecurityException | ExecutionException e) { throw new RuntimeException( String.format( "Unable to construct @%s invoker for %s", OnTimer.class.getSimpleName(), fn.getClass().getName()), e); } }
Example #2
Source File: BatchStatefulParDoOverrides.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollectionTuple expand(PCollection<KV<K, InputT>> input) { DoFn<KV<K, InputT>, OutputT> fn = originalParDo.getFn(); verifyFnIsStateful(fn); DataflowRunner.verifyDoFnSupportedBatch(fn); DataflowRunner.verifyStateSupportForWindowingStrategy(input.getWindowingStrategy()); if (isFnApi) { return input.apply(Reshuffle.of()).apply(originalParDo); } PTransform< PCollection<? extends KV<K, Iterable<KV<Instant, WindowedValue<KV<K, InputT>>>>>>, PCollectionTuple> statefulParDo = ParDo.of(new BatchStatefulDoFn<>(fn)) .withSideInputs(originalParDo.getSideInputs()) .withOutputTags( originalParDo.getMainOutputTag(), originalParDo.getAdditionalOutputTags()); return input.apply(new GbkBeforeStatefulParDo<>()).apply(statefulParDo); }
Example #3
Source File: HllCount.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<Long> expand(PCollection<byte[]> input) { return input.apply( ParDo.of( new DoFn<byte[], Long>() { @ProcessElement public void processElement( @Element byte[] sketch, OutputReceiver<Long> receiver) { if (sketch == null) { LOG.warn( "Received a null and treated it as an empty sketch. " + "Consider replacing nulls with empty byte arrays (byte[0]) " + "in upstream transforms for better space-efficiency and safety."); receiver.output(0L); } else if (sketch.length == 0) { receiver.output(0L); } else { receiver.output(HyperLogLogPlusPlus.forProto(sketch).result()); } } })); }
Example #4
Source File: ByteBuddyDoFnInvokerFactory.java From beam with Apache License 2.0 | 6 votes |
@Override public ByteCodeAppender appender(final Target implementationTarget) { return (methodVisitor, implementationContext, instrumentedMethod) -> { StackManipulation.Size size = new StackManipulation.Compound( // Load the this reference MethodVariableAccess.REFERENCE.loadFrom(0), // Load the delegate argument MethodVariableAccess.REFERENCE.loadFrom(1), // Invoke the super constructor (default constructor of Object) MethodInvocation.invoke( new TypeDescription.ForLoadedType(clazz) .getDeclaredMethods() .filter( ElementMatchers.isConstructor() .and(ElementMatchers.takesArguments(DoFn.class))) .getOnly()), // Return void. MethodReturn.VOID) .apply(methodVisitor, implementationContext); return new ByteCodeAppender.Size(size.getMaximalSize(), instrumentedMethod.getStackSize()); }; }
Example #5
Source File: GroupByKeyAndWindowDoFnTransform.java From incubator-nemo with Apache License 2.0 | 6 votes |
/** * This creates a new DoFn that groups elements by key and window. * * @param doFn original doFn. * @return GroupAlsoByWindowViaWindowSetNewDoFn */ @Override protected DoFn wrapDoFn(final DoFn doFn) { final Map<K, StateAndTimerForKey> map = new HashMap<>(); this.inMemoryStateInternalsFactory = new InMemoryStateInternalsFactory(map); this.inMemoryTimerInternalsFactory = new InMemoryTimerInternalsFactory(map); // This function performs group by key and window operation return GroupAlsoByWindowViaWindowSetNewDoFn.create( getWindowingStrategy(), inMemoryStateInternalsFactory, inMemoryTimerInternalsFactory, null, // GBK has no sideinput. reduceFn, getOutputManager(), getMainOutputTag()); }
Example #6
Source File: DoFnSignaturesTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testTimerParameterDuplicate() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("duplicate"); thrown.expectMessage("my-id"); thrown.expectMessage("myProcessElement"); thrown.expectMessage("index 2"); thrown.expectMessage(not(mentionsState())); DoFnSignatures.getSignature( new DoFn<KV<String, Integer>, Long>() { @TimerId("my-id") private final TimerSpec myfield = TimerSpecs.timer(TimeDomain.PROCESSING_TIME); @ProcessElement public void myProcessElement( ProcessContext context, @TimerId("my-id") Timer one, @TimerId("my-id") Timer two) {} @OnTimer("my-id") public void onWhatever() {} }.getClass()); }
Example #7
Source File: DoFnSignaturesSplittableDoFnTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testGetWatermarkEstimatorStateCoderReturnsWrongType() throws Exception { class BadFn extends DoFn<Integer, String> { @ProcessElement public void process( ProcessContext context, RestrictionTracker<SomeRestriction, Void> tracker) {} @GetInitialRestriction public SomeRestriction getInitialRestriction(@Element Integer element) { return null; } @GetWatermarkEstimatorStateCoder public KvCoder getWatermarkEstimatorStateCoder() { return null; } } thrown.expectMessage( "getWatermarkEstimatorStateCoder() returns KvCoder which is not a subtype of Coder<Void>"); DoFnSignatures.getSignature(BadFn.class); }
Example #8
Source File: DoFnSignaturesTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testTimerIdWithWrongType() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("TimerId"); thrown.expectMessage("TimerSpec"); thrown.expectMessage("bizzle"); thrown.expectMessage(not(mentionsState())); DoFnSignatures.getSignature( new DoFn<String, String>() { @TimerId("foo") private final String bizzle = "bazzle"; @ProcessElement public void foo(ProcessContext context) {} }.getClass()); }
Example #9
Source File: DoFnSignaturesTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testTimerIdNoCallback() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("No callback registered"); thrown.expectMessage("my-id"); thrown.expectMessage(not(mentionsState())); thrown.expectMessage(mentionsTimers()); DoFnSignatures.getSignature( new DoFn<KV<String, Integer>, Long>() { @TimerId("my-id") private final TimerSpec myfield1 = TimerSpecs.timer(TimeDomain.EVENT_TIME); @ProcessElement public void foo(ProcessContext context) {} }.getClass()); }
Example #10
Source File: LocalSpannerIO.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@DoFn.ProcessElement public void processElement(ProcessContext c) { MutationGroup mg = c.element(); if (mg.primary().getOperation() == Op.DELETE && !isPointDelete(mg.primary())) { // Ranged deletes are not batchable. c.output(unbatchableMutationsTag, Arrays.asList(mg)); unBatchableMutationGroupsCounter.inc(); return; } SpannerSchema spannerSchema = c.sideInput(schemaView); long groupSize = MutationSizeEstimator.sizeOf(mg); long groupCells = MutationCellCounter.countOf(spannerSchema, mg); long groupRows = Iterables.size(mg); if (groupSize >= batchSizeBytes || groupCells >= maxNumMutations || groupRows >= maxNumRows) { c.output(unbatchableMutationsTag, Arrays.asList(mg)); unBatchableMutationGroupsCounter.inc(); } else { c.output(mg); batchableMutationGroupsCounter.inc(); } }
Example #11
Source File: TestBoundedTable.java From beam with Apache License 2.0 | 6 votes |
@Override public POutput buildIOWriter(PCollection<Row> input) { input.apply( ParDo.of( new DoFn<Row, Void>() { @ProcessElement public void processElement(ProcessContext c) { CONTENT.add(c.element()); } @Teardown public void close() { CONTENT.clear(); } })); return PDone.in(input.getPipeline()); }
Example #12
Source File: CsvImport.java From cloud-bigtable-examples with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement(DoFn<String, Mutation>.ProcessContext c) throws Exception { try { String[] headers = c.getPipelineOptions().as(BigtableCsvOptions.class).getHeaders() .split(","); String[] values = c.element().split(","); Preconditions.checkArgument(headers.length == values.length); byte[] rowkey = Bytes.toBytes(values[0]); byte[][] headerBytes = new byte[headers.length][]; for (int i = 0; i < headers.length; i++) { headerBytes[i] = Bytes.toBytes(headers[i]); } Put row = new Put(rowkey); long timestamp = System.currentTimeMillis(); for (int i = 1; i < values.length; i++) { row.addColumn(FAMILY, headerBytes[i], timestamp, Bytes.toBytes(values[i])); } c.output(row); } catch (Exception e) { LOG.error("Failed to process input {}", c.element(), e); throw e; } }
Example #13
Source File: Group.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<Row> expand(PCollection<InputT> input) { Schema schema = input.getSchema(); Schema keySchema = getKeySchema(schema); Schema outputSchema = Schema.builder() .addRowField(getKeyField(), keySchema) .addIterableField(getValueField(), FieldType.row(schema)) .build(); return input .apply("ToKvs", getToKvs()) .apply( "ToRow", ParDo.of( new DoFn<KV<Row, Iterable<Row>>, Row>() { @ProcessElement public void process(@Element KV<Row, Iterable<Row>> e, OutputReceiver<Row> o) { o.output( Row.withSchema(outputSchema) .attachValues(Lists.newArrayList(e.getKey(), e.getValue()))); } })) .setRowSchema(outputSchema); }
Example #14
Source File: SelectEvent.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<Row> expand(PCollection<Event> input) { if (!input.hasSchema()) { throw new RuntimeException("Input PCollection must have a schema!"); } int index = getNestedIndex(input.getSchema()); return input .apply( ParDo.of( new DoFn<Event, Row>() { @ProcessElement public void processElement(@Element Row row, OutputReceiver<Row> o) { o.output(row.getRow(index)); } })) .setRowSchema(input.getSchema().getField(index).getType().getRowSchema()); }
Example #15
Source File: StatefulDoFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
private DoFnRunner<KV<String, Integer>, Integer> getDoFnRunner( DoFn<KV<String, Integer>, Integer> fn, @Nullable OutputManager outputManager) { return new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), MoreObjects.firstNonNull(outputManager, discardingOutputManager()), outputTag, Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WINDOWING_STRATEGY, DoFnSchemaInformation.create(), Collections.emptyMap()); }
Example #16
Source File: PTransformMatchers.java From beam with Apache License 2.0 | 5 votes |
/** * A {@link PTransformMatcher} that matches a {@link ParDo.MultiOutput} containing a {@link DoFn} * that uses state or timers, as specified by {@link DoFnSignature#usesState()} and {@link * DoFnSignature#usesTimers()}. */ public static PTransformMatcher stateOrTimerParDoMulti() { return new PTransformMatcher() { @Override public boolean matches(AppliedPTransform<?, ?, ?> application) { PTransform<?, ?> transform = application.getTransform(); if (transform instanceof ParDo.MultiOutput) { DoFn<?, ?> fn = ((ParDo.MultiOutput<?, ?>) transform).getFn(); DoFnSignature signature = DoFnSignatures.signatureForDoFn(fn); return signature.usesState() || signature.usesTimers(); } return false; } @Override public String toString() { return MoreObjects.toStringHelper("StateOrTimerParDoMultiMatcher").toString(); } }; }
Example #17
Source File: CoGroupByKeyTest.java From beam with Apache License 2.0 | 5 votes |
/** Converts the given list with timestamps into a PCollection. */ private PCollection<KV<Integer, String>> createInput( String name, Pipeline p, List<KV<Integer, String>> list, List<Long> timestamps) { PCollection<KV<Integer, String>> input; if (timestamps.isEmpty()) { input = p.apply( "Create" + name, Create.of(list) .withCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()))); } else { input = p.apply( "Create" + name, Create.timestamped(list, timestamps) .withCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()))); } return input.apply( "Identity" + name, ParDo.of( new DoFn<KV<Integer, String>, KV<Integer, String>>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element()); } })); }
Example #18
Source File: DoFnSignaturesTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testOnWindowExpirationNoParam() { DoFnSignature sig = DoFnSignatures.getSignature( new DoFn<String, String>() { @ProcessElement public void process(ProcessContext c) {} @OnWindowExpiration public void bar() {} }.getClass()); assertThat(sig.onWindowExpiration().extraParameters().size(), equalTo(0)); }
Example #19
Source File: Task.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<Integer> expand(PCollection<String> input) { return input .apply(ParDo.of(new DoFn<String, Integer>() { @ProcessElement public void processElement(@Element String numbers, OutputReceiver<Integer> out) { Arrays.stream(numbers.split(",")) .forEach(numStr -> out.output(Integer.parseInt(numStr))); } })) .apply(MapElements.into(integers()).via(number -> number * 10)); }
Example #20
Source File: DoFnRunners.java From beam with Apache License 2.0 | 5 votes |
/** * Returns an implementation of {@link DoFnRunner} that for a {@link DoFn}. * * <p>If the {@link DoFn} observes the window, this runner will explode the windows of a * compressed {@link WindowedValue}. It is the responsibility of the runner to perform any key * partitioning needed, etc. */ public static <InputT, OutputT> DoFnRunner<InputT, OutputT> simpleRunner( PipelineOptions options, DoFn<InputT, OutputT> fn, SideInputReader sideInputReader, OutputManager outputManager, TupleTag<OutputT> mainOutputTag, List<TupleTag<?>> additionalOutputTags, StepContext stepContext, Coder<InputT> inputCoder, Map<TupleTag<?>, Coder<?>> outputCoders, WindowingStrategy<?, ?> windowingStrategy, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) { return new SimpleDoFnRunner<>( options, fn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping); }
Example #21
Source File: Broadcast.java From incubator-nemo with Apache License 2.0 | 5 votes |
/** * Main function for the BEAM program. * * @param args arguments. */ public static void main(final String[] args) { final String inputFilePath = args[0]; final String outputFilePath = args[1]; final PipelineOptions options = NemoPipelineOptionsFactory.create(); final Pipeline p = Pipeline.create(options); final PCollection<String> elemCollection = GenericSourceSink.read(p, inputFilePath); final PCollectionView<Iterable<String>> allCollection = elemCollection.apply(View.<String>asIterable()); final PCollection<String> result = elemCollection.apply(ParDo.of(new DoFn<String, String>() { @ProcessElement public void processElement(final ProcessContext c) { final String line = c.element(); final Iterable<String> all = c.sideInput(allCollection); final Optional<String> appended = StreamSupport.stream(all.spliterator(), false) .reduce((l, r) -> l + '\n' + r); if (appended.isPresent()) { c.output("line: " + line + "\n" + appended.get()); } else { c.output("error"); } } }).withSideInputs(allCollection) ); GenericSourceSink.write(result, outputFilePath); p.run().waitUntilFinish(); }
Example #22
Source File: HCatalogIOTest.java From beam with Apache License 2.0 | 5 votes |
/** Perform end-to-end test of Write-then-Read operation. */ @Test @NeedsEmptyTestTables public void testWriteThenReadSuccess() { defaultPipeline .apply(Create.of(buildHCatRecords(TEST_RECORDS_COUNT))) .apply( HCatalogIO.write() .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf())) .withDatabase(TEST_DATABASE) .withTable(TEST_TABLE) .withPartition(new java.util.HashMap<>()) .withBatchSize(512L)); defaultPipeline.run(); PCollection<String> output = readAfterWritePipeline .apply( HCatalogIO.read() .withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf())) .withDatabase(TEST_DATABASE) .withTable(TEST_TABLE) .withFilter(TEST_FILTER)) .apply( ParDo.of( new DoFn<HCatRecord, String>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element().get(0).toString()); } })); PAssert.that(output).containsInAnyOrder(getExpectedRecords(TEST_RECORDS_COUNT)); readAfterWritePipeline.run(); }
Example #23
Source File: DoFnSignatures.java From beam with Apache License 2.0 | 5 votes |
@VisibleForTesting static DoFnSignature.GetRestrictionCoderMethod analyzeGetRestrictionCoderMethod( ErrorReporter errors, TypeDescriptor<? extends DoFn> fnT, Method m) { errors.checkArgument(m.getParameterTypes().length == 0, "Must have zero arguments"); TypeDescriptor<?> resT = fnT.resolveType(m.getGenericReturnType()); errors.checkArgument( resT.isSubtypeOf(TypeDescriptor.of(Coder.class)), "Must return a Coder, but returns %s", format(resT)); return DoFnSignature.GetRestrictionCoderMethod.create(m, resT); }
Example #24
Source File: DoFnSignaturesProcessElementTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testPrivateProcessElement() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("process()"); thrown.expectMessage("Must be public"); thrown.expectMessage(getClass().getName() + "$"); DoFnSignatures.getSignature( new DoFn<String, String>() { @ProcessElement private void process() {} }.getClass()); }
Example #25
Source File: ParDoP.java From beam with Apache License 2.0 | 5 votes |
public Supplier( String stepId, String ownerId, DoFn<InputT, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy, DoFnSchemaInformation doFnSchemaInformation, SerializablePipelineOptions pipelineOptions, TupleTag<OutputT> mainOutputTag, Set<TupleTag<OutputT>> allOutputTags, Coder<InputT> inputCoder, Map<PCollectionView<?>, Coder<?>> sideInputCoders, Map<TupleTag<?>, Coder<?>> outputCoders, Coder<InputT> inputValueCoder, Map<TupleTag<?>, Coder<?>> outputValueCoders, List<PCollectionView<?>> sideInputs) { super( stepId, ownerId, doFn, windowingStrategy, doFnSchemaInformation, pipelineOptions, mainOutputTag, allOutputTags, inputCoder, sideInputCoders, outputCoders, inputValueCoder, outputValueCoders, sideInputs); }
Example #26
Source File: HelloWorldWrite.java From java-docs-samples with Apache License 2.0 | 5 votes |
public static void main(String[] args) { // [START bigtable_beam_helloworld_create_pipeline] BigtableOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(BigtableOptions.class); Pipeline p = Pipeline.create(options); // [END bigtable_beam_helloworld_create_pipeline] // [START bigtable_beam_helloworld_write_config] CloudBigtableTableConfiguration bigtableTableConfig = new CloudBigtableTableConfiguration.Builder() .withProjectId(options.getBigtableProjectId()) .withInstanceId(options.getBigtableInstanceId()) .withTableId(options.getBigtableTableId()) .build(); // [END bigtable_beam_helloworld_write_config] // [START bigtable_beam_helloworld_write_transforms] p.apply(Create.of("phone#4c410523#20190501", "phone#4c410523#20190502")) .apply( ParDo.of( new DoFn<String, Mutation>() { @ProcessElement public void processElement(@Element String rowkey, OutputReceiver<Mutation> out) { long timestamp = System.currentTimeMillis(); Put row = new Put(Bytes.toBytes(rowkey)); row.addColumn( Bytes.toBytes("stats_summary"), Bytes.toBytes("os_build"), timestamp, Bytes.toBytes("android")); out.output(row); } })) .apply(CloudBigtableIO.writeToTable(bigtableTableConfig)); // [END bigtable_beam_helloworld_write_transforms] p.run().waitUntilFinish(); }
Example #27
Source File: DoFnSignaturesProcessElementTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testNoProcessElement() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("No method annotated with @ProcessElement found"); thrown.expectMessage(getClass().getName() + "$"); DoFnSignatures.getSignature(new DoFn<String, String>() {}.getClass()); }
Example #28
Source File: DoFnSignaturesSplittableDoFnTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testUnsplittableButDeclaresUnbounded() throws Exception { @UnboundedPerElement class SomeFn extends DoFn<Integer, String> { @ProcessElement public void process(ProcessContext context) {} } thrown.expectMessage("Non-splittable, but annotated as @Unbounded"); DoFnSignatures.getSignature(SomeFn.class); }
Example #29
Source File: SplittableParDoNaiveBounded.java From beam with Apache License 2.0 | 5 votes |
private NestedProcessContext( DoFn<InputT, OutputT> fn, DoFn<KV<InputT, RestrictionT>, OutputT>.ProcessContext outerContext, InputT element, BoundedWindow window, TrackerT tracker, WatermarkEstimatorT watermarkEstimator) { fn.super(); this.window = window; this.outerContext = outerContext; this.element = element; this.tracker = tracker; this.watermarkEstimator = watermarkEstimator; }
Example #30
Source File: SnsIO.java From beam with Apache License 2.0 | 5 votes |
private BiConsumer<? super PublishResponse, ? super Throwable> getPublishResponse( DoFn<T, SnsResponse<T>>.ProcessContext context) { return (response, ex) -> { if (ex == null) { SnsResponse<T> snsResponse = SnsResponse.of(context.element(), response); context.output(snsResponse); } else { LOG.error("Error while publishing request to SNS", ex); throw new SnsWriteException("Error while publishing request to SNS", ex); } }; }