org.apache.beam.sdk.transforms.PTransform Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.PTransform.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PTransformTranslation.java From beam with Apache License 2.0 | 6 votes |
private static Map<Class<? extends PTransform>, TransformPayloadTranslator> loadTransformPayloadTranslators() { HashMap<Class<? extends PTransform>, TransformPayloadTranslator> translators = new HashMap<>(); for (TransformPayloadTranslatorRegistrar registrar : ServiceLoader.load(TransformPayloadTranslatorRegistrar.class)) { Map<Class<? extends PTransform>, TransformPayloadTranslator> newTranslators = (Map) registrar.getTransformPayloadTranslators(); Set<Class<? extends PTransform>> alreadyRegistered = Sets.intersection(translators.keySet(), newTranslators.keySet()); if (!alreadyRegistered.isEmpty()) { throw new IllegalArgumentException( String.format( "Classes already registered: %s", Joiner.on(", ").join(alreadyRegistered))); } translators.putAll(newTranslators); } return ImmutableMap.copyOf(translators); }
Example #2
Source File: ReadTranslator.java From beam with Apache License 2.0 | 6 votes |
public static <T> void translateReadHelper( Source<T> source, PTransform<?, ? extends PCollection<?>> transform, TranslationContext context) { try { StepTranslationContext stepContext = context.addStep(transform, "ParallelRead"); stepContext.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT); stepContext.addInput( PropertyNames.SOURCE_STEP_INPUT, cloudSourceToDictionary( CustomSources.serializeToCloudSource(source, context.getPipelineOptions()))); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform)); } catch (Exception e) { throw new RuntimeException(e); } }
Example #3
Source File: BeamSideInputLookupJoinRel.java From beam with Apache License 2.0 | 6 votes |
@Override public PTransform<PCollectionList<Row>, PCollection<Row>> buildPTransform() { // if one of the sides is Seekable & the other is non Seekable // then do a sideInputLookup join. // When doing a sideInputLookup join, the windowFn does not need to match. // Only support INNER JOIN & LEFT OUTER JOIN where left side of the join must be // non Seekable & RIGHT OUTER JOIN where right side of the join must be non Seekable if (joinType == JoinRelType.FULL) { throw new UnsupportedOperationException( "FULL OUTER JOIN is not supported when join " + "a Seekable table with a non Seekable table."); } if ((joinType == JoinRelType.LEFT && seekableInputIndex().get() == 0) || (joinType == JoinRelType.RIGHT && seekableInputIndex().get() == 1)) { throw new UnsupportedOperationException( String.format("%s side of an OUTER JOIN must be a non Seekable table.", joinType.name())); } return new SideInputLookupJoin(); }
Example #4
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private static <K, InputT, OutputT> SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn( PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform, Pipeline pipeline, KvCoder<K, InputT> kvInputCoder) { if (transform instanceof GroupByKey) { return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>) SystemReduceFn.buffering(kvInputCoder.getValueCoder()); } else if (transform instanceof Combine.PerKey) { final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn = ((Combine.PerKey) transform).getFn(); return SystemReduceFn.combining( kvInputCoder.getKeyCoder(), AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder)); } else { throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey."); } }
Example #5
Source File: EvaluationContext.java From beam with Apache License 2.0 | 6 votes |
/** * Add output of transform to context map and possibly cache if it conforms {@link * #shouldCache(PTransform, PValue)}. * * @param transform from which Dataset was created * @param pvalue output of transform * @param dataset created Dataset from transform */ private void putDataset( @Nullable PTransform<?, ? extends PValue> transform, PValue pvalue, Dataset dataset) { try { dataset.setName(pvalue.getName()); } catch (IllegalStateException e) { // name not set, ignore } if (shouldCache(transform, pvalue)) { // we cache only PCollection Coder<?> coder = ((PCollection<?>) pvalue).getCoder(); Coder<? extends BoundedWindow> wCoder = ((PCollection<?>) pvalue).getWindowingStrategy().getWindowFn().windowCoder(); dataset.cache(storageLevel(), WindowedValue.getFullCoder(coder, wCoder)); } datasets.put(pvalue, dataset); leaves.add(dataset); }
Example #6
Source File: NativeWrappedIOTest.java From component-runtime with Apache License 2.0 | 6 votes |
@Test public void source() { final String plugin = COMPONENTS.getTestPlugins().iterator().next(); final PTransform<PBegin, PCollection<JsonObject>> jdbc = PTransform.class .cast(COMPONENTS .asManager() .createComponent("beamtest", "source", ComponentManager.ComponentType.MAPPER, 1, emptyMap()) .orElseThrow(() -> new IllegalArgumentException("no beamtest#source component"))); PAssert .that(pipeline.apply(jdbc).setCoder(JsonpJsonObjectCoder.of(plugin))) .satisfies((SerializableFunction<Iterable<JsonObject>, Void>) input -> { assertEquals("test", input.iterator().next().getString("id")); return null; }); pipeline.run().waitUntilFinish(); }
Example #7
Source File: FlinkStreamingPipelineTranslator.java From beam with Apache License 2.0 | 6 votes |
@Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { LOG.info("{} visitPrimitiveTransform- {}", genSpaces(this.depth), node.getFullName()); // get the transformation corresponding to hte node we are // currently visiting and translate it into its Flink alternative. PTransform<?, ?> transform = node.getTransform(); StreamTransformTranslator<?> translator = FlinkStreamingTransformTranslators.getTranslator(transform); if (translator == null || !applyCanTranslate(transform, node, translator)) { String transformUrn = PTransformTranslation.urnForTransform(transform); LOG.info(transformUrn); throw new UnsupportedOperationException( "The transform " + transformUrn + " is currently not supported."); } applyStreamingTransform(transform, node, translator); }
Example #8
Source File: DirectRunnerTest.java From beam with Apache License 2.0 | 6 votes |
private PTransform<PBegin, PDone> outputStartTo(StaticQueue<Integer> queue) { return new PTransform<PBegin, PDone>() { @Override public PDone expand(PBegin input) { input .apply(Create.of(1)) .apply( MapElements.into(TypeDescriptors.voids()) .via( in -> { queue.add(in); return null; })); return PDone.in(input.getPipeline()); } }; }
Example #9
Source File: PTransformMatchers.java From beam with Apache License 2.0 | 6 votes |
/** * A {@link PTransformMatcher} that matches a {@link ParDo.SingleOutput} containing a {@link DoFn} * that is splittable, as signified by {@link ProcessElementMethod#isSplittable()}. */ public static PTransformMatcher splittableProcessKeyedBounded() { return new PTransformMatcher() { @Override public boolean matches(AppliedPTransform<?, ?, ?> application) { PTransform<?, ?> transform = application.getTransform(); if (transform instanceof SplittableParDo.ProcessKeyedElements) { DoFn<?, ?> fn = ((SplittableParDo.ProcessKeyedElements) transform).getFn(); DoFnSignature signature = DoFnSignatures.signatureForDoFn(fn); return signature.processElement().isSplittable() && signature.isBoundedPerElement() == IsBounded.BOUNDED; } return false; } @Override public String toString() { return MoreObjects.toStringHelper("SplittableProcessKeyedBoundedMatcher").toString(); } }; }
Example #10
Source File: WriteWithShardingFactory.java From beam with Apache License 2.0 | 6 votes |
@Override public PTransformReplacement<PCollection<InputT>, WriteFilesResult<DestinationT>> getReplacementTransform( AppliedPTransform< PCollection<InputT>, WriteFilesResult<DestinationT>, PTransform<PCollection<InputT>, WriteFilesResult<DestinationT>>> transform) { try { WriteFiles<InputT, DestinationT, ?> replacement = WriteFiles.to(WriteFilesTranslation.getSink(transform)) .withSideInputs(WriteFilesTranslation.getDynamicDestinationSideInputs(transform)) .withSharding(new LogElementShardsWithDrift<>()); if (WriteFilesTranslation.isWindowedWrites(transform)) { replacement = replacement.withWindowedWrites(); } return PTransformReplacement.of( PTransformReplacements.getSingletonMainInput(transform), replacement); } catch (IOException e) { throw new RuntimeException(e); } }
Example #11
Source File: PTransformMatchersTest.java From beam with Apache License 2.0 | 6 votes |
/** * Gets the {@link AppliedPTransform} that has a created {@code PCollection<KV<String, Integer>>} * as input. */ private AppliedPTransform<?, ?, ?> getAppliedTransform(PTransform pardo) { PCollection<KV<String, Integer>> input = PCollection.createPrimitiveOutputInternal( p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); input.setName("dummy input"); PCollection<Integer> output = PCollection.createPrimitiveOutputInternal( p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of()); output.setName("dummy output"); return AppliedPTransform.of("pardo", input.expand(), output.expand(), pardo, p); }
Example #12
Source File: PipelineRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesCommittedMetrics.class, UsesCounterMetrics.class}) public void testRunPTransform() { final String namespace = PipelineRunnerTest.class.getName(); final Counter counter = Metrics.counter(namespace, "count"); final PipelineResult result = PipelineRunner.fromOptions(p.getOptions()) .run( new PTransform<PBegin, POutput>() { @Override public POutput expand(PBegin input) { PCollection<Double> output = input .apply(Create.of(1, 2, 3, 4)) .apply("ScaleByTwo", MapElements.via(new ScaleFn<>(2.0, counter))); PAssert.that(output).containsInAnyOrder(2.0, 4.0, 6.0, 8.0); return output; } }); // Checking counters to verify the pipeline actually ran. assertThat( result .metrics() .queryMetrics( MetricsFilter.builder() .addNameFilter(MetricNameFilter.inNamespace(namespace)) .build()) .getCounters(), hasItem(metricsResult(namespace, "count", "ScaleByTwo", 4L, true))); }
Example #13
Source File: PTransformTranslation.java From beam with Apache License 2.0 | 5 votes |
/** Returns the URN for the transform if it is known, otherwise {@code null}. */ @Nullable public static String urnForTransformOrNull(PTransform<?, ?> transform) { TransformTranslator<?> transformTranslator = Iterables.find( KNOWN_TRANSLATORS, translator -> translator.canTranslate(transform), DefaultUnknownTransformTranslator.INSTANCE); return ((TransformTranslator) transformTranslator).getUrn(transform); }
Example #14
Source File: BigQueryDeadletterSink.java From feast with Apache License 2.0 | 5 votes |
@Override public PTransform<PCollection<FailedElement>, PDone> write() { return WriteFailedElement.newBuilder() .setJsonSchema(jsonSchema) .setTableSpec(tableSpec) .build(); }
Example #15
Source File: FileBasedDeadLetterQueueReconsumer.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** Build a {@link PTransform} that consumes matched DLQ files. */ static PTransform<PCollection<Metadata>, PCollection<String>> moveAndConsumeMatches() { return new PTransform<PCollection<Metadata>, PCollection<String>>() { @Override public PCollection<String> expand(PCollection<Metadata> input) { // TODO(pabloem, dhercher): Use a Beam standard transform once possible // TODO(pabloem, dhercher): Add a _metadata attribute to track whether a row comes from DLQ. return input.apply(Reshuffle.viaRandomKey()) .apply(ParDo.of(new MoveAndConsumeFn())); } }; }
Example #16
Source File: AppliedPTransform.java From beam with Apache License 2.0 | 5 votes |
public static < InputT extends PInput, OutputT extends POutput, TransformT extends PTransform<? super InputT, OutputT>> AppliedPTransform<InputT, OutputT, TransformT> of( String fullName, Map<TupleTag<?>, PValue> input, Map<TupleTag<?>, PValue> output, TransformT transform, Pipeline p) { return new AutoValue_AppliedPTransform<>(fullName, input, output, transform, p); }
Example #17
Source File: FlinkStreamingTransformTranslators.java From beam with Apache License 2.0 | 5 votes |
@Override public void translateNode( PTransform<PCollection<T>, PCollection<T>> transform, FlinkStreamingTranslationContext context) { @SuppressWarnings("unchecked") WindowingStrategy<T, BoundedWindow> windowingStrategy = (WindowingStrategy<T, BoundedWindow>) context.getOutput(transform).getWindowingStrategy(); TypeInformation<WindowedValue<T>> typeInfo = context.getTypeInfo(context.getOutput(transform)); DataStream<WindowedValue<T>> inputDataStream = context.getInputDataStream(context.getInput(transform)); WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn(); FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction = new FlinkAssignWindows<>(windowFn); String fullName = context.getOutput(transform).getName(); SingleOutputStreamOperator<WindowedValue<T>> outputDataStream = inputDataStream .flatMap(assignWindowsFunction) .name(fullName) .uid(fullName) .returns(typeInfo); context.setOutputDataStream(context.getOutput(transform), outputDataStream); }
Example #18
Source File: FlinkBatchTransformTranslators.java From beam with Apache License 2.0 | 5 votes |
@Override public void translateNode( PTransform<PBegin, PCollection<byte[]>> transform, FlinkBatchTranslationContext context) { String name = transform.getName(); PCollection<byte[]> output = context.getOutput(transform); TypeInformation<WindowedValue<byte[]>> typeInformation = context.getTypeInfo(output); DataSource<WindowedValue<byte[]>> dataSource = new DataSource<>( context.getExecutionEnvironment(), new ImpulseInputFormat(), typeInformation, name); context.setOutputDataSet(output, dataSource); }
Example #19
Source File: Twister2BatchPipelineTranslator.java From beam with Apache License 2.0 | 5 votes |
@Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { LOG.fine(String.format("visiting transform %s", node.getTransform())); PTransform transform = node.getTransform(); BatchTransformTranslator translator = getTransformTranslator(transform); if (null == translator) { throw new IllegalStateException("no translator registered for " + transform); } translationContext.setCurrentTransform(node.toAppliedPTransform(getPipeline())); translator.translateNode(transform, translationContext); }
Example #20
Source File: Twister2BatchPipelineTranslator.java From twister2 with Apache License 2.0 | 5 votes |
@Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { LOG.fine(String.format("visiting transform %s", node.getTransform())); PTransform transform = node.getTransform(); BatchTransformTranslator translator = getTransformTranslator(transform.getClass()); if (null == translator) { throw new IllegalStateException("no translator registered for " + transform); } translationContext.setCurrentTransform(node.toAppliedPTransform(getPipeline())); translator.translateNode(transform, translationContext); }
Example #21
Source File: Transforms.java From nomulus with Apache License 2.0 | 5 votes |
/** * Returns a {@link PTransform} from file {@link Metadata} to {@link VersionedEntity} using * caller-provided {@code transformer}. */ static PTransform<PCollection<Metadata>, PCollection<VersionedEntity>> processFiles( DoFn<ReadableFile, VersionedEntity> transformer) { return new PTransform<PCollection<Metadata>, PCollection<VersionedEntity>>() { @Override public PCollection<VersionedEntity> expand(PCollection<Metadata> input) { return input .apply(FileIO.readMatches().withCompression(Compression.UNCOMPRESSED)) .apply(transformer.getClass().getSimpleName(), ParDo.of(transformer)); // TODO(weiminyu): reshuffle to enable dynamic work rebalance per beam dev guide } }; }
Example #22
Source File: ReadTranslation.java From beam with Apache License 2.0 | 5 votes |
private static <T> ReadPayload getReadPayload( AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> transform) throws IOException { SdkComponents components = SdkComponents.create(transform.getPipeline().getOptions()); return ReadPayload.parseFrom( PTransformTranslation.toProto(transform, Collections.emptyList(), components) .getSpec() .getPayload()); }
Example #23
Source File: QueueMapper.java From component-runtime with Apache License 2.0 | 5 votes |
public QueueMapper(final String plugin, final String family, final String name, final PTransform<PBegin, PCollection<Record>> transform) { this.plugin = plugin; this.family = family; this.name = name; this.transform = transform; this.state = LoopState.newTracker(plugin); log.debug("Associating state {} to {}#{}", this.state.getId(), family, name); }
Example #24
Source File: RecordBranchMapper.java From component-runtime with Apache License 2.0 | 5 votes |
public static PTransform<PCollection<Record>, PCollection<Record>> of(final String plugin, final String fromBranch, final String toBranch) { final RecordBuilderFactory lookup = ServiceLookup.lookup(ComponentManager.instance(), plugin, RecordBuilderFactory.class); return new RecordParDoTransformCoderProvider<>(SchemaRegistryCoder.of(), new RecordBranchMapper(lookup, fromBranch, toBranch)); }
Example #25
Source File: GroupByKeyTranslatorBatch.java From beam with Apache License 2.0 | 5 votes |
@Override public void translateTransform( PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> transform, TranslationContext context) { @SuppressWarnings("unchecked") final PCollection<KV<K, V>> inputPCollection = (PCollection<KV<K, V>>) context.getInput(); Dataset<WindowedValue<KV<K, V>>> input = context.getDataset(inputPCollection); WindowingStrategy<?, ?> windowingStrategy = inputPCollection.getWindowingStrategy(); KvCoder<K, V> kvCoder = (KvCoder<K, V>) inputPCollection.getCoder(); Coder<V> valueCoder = kvCoder.getValueCoder(); // group by key only Coder<K> keyCoder = kvCoder.getKeyCoder(); KeyValueGroupedDataset<K, WindowedValue<KV<K, V>>> groupByKeyOnly = input.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder)); // group also by windows WindowedValue.FullWindowedValueCoder<KV<K, Iterable<V>>> outputCoder = WindowedValue.FullWindowedValueCoder.of( KvCoder.of(keyCoder, IterableCoder.of(valueCoder)), windowingStrategy.getWindowFn().windowCoder()); Dataset<WindowedValue<KV<K, Iterable<V>>>> output = groupByKeyOnly.flatMapGroups( new GroupAlsoByWindowViaOutputBufferFn<>( windowingStrategy, new InMemoryStateInternalsFactory<>(), SystemReduceFn.buffering(valueCoder), context.getSerializableOptions()), EncoderHelpers.fromBeamCoder(outputCoder)); context.putDataset(context.getOutput(), output); }
Example #26
Source File: AutoKVWrapper.java From component-runtime with Apache License 2.0 | 5 votes |
public static PTransform<PCollection<Record>, PCollection<KV<String, Record>>> of(final String plugin, final Function<GroupKeyProvider.GroupContext, String> idGenerator, final String component, final String branch) { return new RecordParDoTransformCoderProvider<>(KvCoder.of(StringUtf8Coder.of(), SchemaRegistryCoder.of()), new AutoKVWrapper(idGenerator, component, branch)); }
Example #27
Source File: Transforms.java From nomulus with Apache License 2.0 | 5 votes |
/** * Returns a {@link PTransform} from file name patterns to file {@link Metadata Metadata records}. */ public static PTransform<PCollection<String>, PCollection<Metadata>> getFilesByPatterns() { return new PTransform<PCollection<String>, PCollection<Metadata>>() { @Override public PCollection<Metadata> expand(PCollection<String> input) { return input.apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)); } }; }
Example #28
Source File: ExpansionService.java From beam with Apache License 2.0 | 5 votes |
private static PTransform buildTransform( Class<? extends ExternalTransformBuilder> builderClass, Object configObject) throws Exception { Constructor<? extends ExternalTransformBuilder> constructor = builderClass.getDeclaredConstructor(); constructor.setAccessible(true); ExternalTransformBuilder<?, ?, ?> externalTransformBuilder = constructor.newInstance(); Method buildMethod = builderClass.getMethod("buildExternal", configObject.getClass()); buildMethod.setAccessible(true); return (PTransform) buildMethod.invoke(externalTransformBuilder, configObject); }
Example #29
Source File: ParDoMultiOverrideFactory.java From beam with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private PTransform<PCollection<? extends InputT>, PCollectionTuple> getReplacementForApplication( AppliedPTransform< PCollection<? extends InputT>, PCollectionTuple, PTransform<PCollection<? extends InputT>, PCollectionTuple>> application) throws IOException { DoFn<InputT, OutputT> fn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(application); DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass()); if (signature.processElement().isSplittable()) { return SplittableParDo.forAppliedParDo((AppliedPTransform) application); } else if (signature.stateDeclarations().size() > 0 || signature.timerDeclarations().size() > 0 || signature.timerFamilyDeclarations().size() > 0) { return new GbkThenStatefulParDo( fn, ParDoTranslation.getMainOutputTag(application), ParDoTranslation.getAdditionalOutputTags(application), ParDoTranslation.getSideInputs(application), ParDoTranslation.getSchemaInformation(application), ParDoTranslation.getSideInputMapping(application)); } else { return application.getTransform(); } }
Example #30
Source File: Pipeline.java From beam with Apache License 2.0 | 5 votes |
@VisibleForTesting void validate(PipelineOptions options) { this.traverseTopologically(new ValidateVisitor(options)); final Collection<Map.Entry<String, Collection<PTransform<?, ?>>>> errors = Collections2.filter(instancePerName.asMap().entrySet(), Predicates.not(new IsUnique<>())); if (!errors.isEmpty()) { switch (options.getStableUniqueNames()) { case OFF: break; case WARNING: LOG.warn( "The following transforms do not have stable unique names: {}", Joiner.on(", ").join(transform(errors, new KeysExtractor()))); break; case ERROR: // be very verbose here since it will just fail the execution throw new IllegalStateException( String.format( "Pipeline update will not be possible" + " because the following transforms do not have stable unique names: %s.", Joiner.on(", ").join(transform(errors, new KeysExtractor()))) + "\n\n" + "Conflicting instances:\n" + Joiner.on("\n") .join(transform(errors, new UnstableNameToMessage(instancePerName))) + "\n\nYou can fix it adding a name when you call apply(): " + "pipeline.apply(<name>, <transform>)."); default: throw new IllegalArgumentException( "Unrecognized value for stable unique names: " + options.getStableUniqueNames()); } } }