org.apache.beam.sdk.values.PValue Java Examples
The following examples show how to use
org.apache.beam.sdk.values.PValue.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlattenTranslatorBatch.java From twister2 with Apache License 2.0 | 6 votes |
@Override public void translateNode( Flatten.PCollections<T> transform, Twister2BatchTranslationContext context) { Collection<PValue> pcs = context.getInputs().values(); List<BatchTSetImpl<WindowedValue<T>>> tSets = new ArrayList<>(); BatchTSetImpl<WindowedValue<T>> unionTSet = null; if (pcs.isEmpty()) { // TODO: create empty TSet throw new UnsupportedOperationException("Operation not implemented yet"); } else { for (PValue pc : pcs) { BatchTSetImpl<WindowedValue<T>> curr = context.getInputDataSet(pc); tSets.add(curr); } BatchTSetImpl<WindowedValue<T>> first = tSets.remove(0); Collection<TSet<WindowedValue<T>>> others = new ArrayList<>(); others.addAll(tSets); if (tSets.size() > 0) { unionTSet = first.union(others); } else { unionTSet = first; } } context.setOutputDataSet(context.getOutput(transform), unionTSet); }
Example #2
Source File: EvaluationContext.java From beam with Apache License 2.0 | 6 votes |
private EvaluationContext( Clock clock, BundleFactory bundleFactory, DirectGraph graph, Set<PValue> keyedPValues, ExecutorService executorService) { this.clock = clock; this.bundleFactory = checkNotNull(bundleFactory); this.graph = checkNotNull(graph); this.keyedPValues = keyedPValues; this.executorService = executorService; this.watermarkManager = WatermarkManager.create(clock, graph, AppliedPTransform::getFullName); this.sideInputContainer = SideInputContainer.create(this, graph.getViews()); this.applicationStateInternals = new ConcurrentHashMap<>(); this.metrics = new DirectMetrics(executorService); this.callbackExecutor = WatermarkCallbackExecutor.create(MoreExecutors.directExecutor()); }
Example #3
Source File: UnconsumedReadsTest.java From beam with Apache License 2.0 | 6 votes |
private void validateConsumed() { final Set<PValue> consumedOutputs = new HashSet<>(); final Set<PValue> allReadOutputs = new HashSet<>(); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(Node node) { consumedOutputs.addAll(node.getInputs().values()); } @Override public void visitValue(PValue value, Node producer) { if (producer.getTransform() instanceof Read.Bounded || producer.getTransform() instanceof Read.Unbounded) { allReadOutputs.add(value); } } }); assertThat(consumedOutputs, Matchers.hasItems(allReadOutputs.toArray(new PValue[0]))); }
Example #4
Source File: SparkRunner.java From beam with Apache License 2.0 | 6 votes |
@Override public void doVisitTransform(TransformHierarchy.Node node) { // we populate cache candidates by updating the map with inputs of each node. // The goal is to detect the PCollections accessed more than one time, and so enable cache // on the underlying RDDs or DStreams. Map<TupleTag<?>, PValue> inputs = new HashMap<>(node.getInputs()); for (TupleTag<?> tupleTag : node.getTransform().getAdditionalInputs().keySet()) { inputs.remove(tupleTag); } for (PValue value : inputs.values()) { if (value instanceof PCollection) { long count = 1L; if (ctxt.getCacheCandidates().get(value) != null) { count = ctxt.getCacheCandidates().get(value) + 1; } ctxt.getCacheCandidates().put((PCollection) value, count); } } }
Example #5
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 6 votes |
private void testStreamingWriteOverride(PipelineOptions options, int expectedNumShards) { TestPipeline p = TestPipeline.fromOptions(options); StreamingShardedWriteFactory<Object, Void, Object> factory = new StreamingShardedWriteFactory<>(p.getOptions()); WriteFiles<Object, Void, Object> original = WriteFiles.to(new TestSink(tmpFolder.toString())); PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of())); AppliedPTransform<PCollection<Object>, WriteFilesResult<Void>, WriteFiles<Object, Void, Object>> originalApplication = AppliedPTransform.of("writefiles", objs.expand(), Collections.emptyMap(), original, p); WriteFiles<Object, Void, Object> replacement = (WriteFiles<Object, Void, Object>) factory.getReplacementTransform(originalApplication).getTransform(); assertThat(replacement, not(equalTo((Object) original))); assertThat(replacement.getNumShardsProvider().get(), equalTo(expectedNumShards)); WriteFilesResult<Void> originalResult = objs.apply(original); WriteFilesResult<Void> replacementResult = objs.apply(replacement); Map<PValue, ReplacementOutput> res = factory.mapOutputs(originalResult.expand(), replacementResult); assertEquals(1, res.size()); assertEquals( originalResult.getPerDestinationOutputFilenames(), res.get(replacementResult.getPerDestinationOutputFilenames()).getOriginal().getValue()); }
Example #6
Source File: PTransformReplacements.java From beam with Apache License 2.0 | 6 votes |
private static <T> PCollection<T> getSingletonMainInput( Map<TupleTag<?>, PValue> inputs, Set<TupleTag<?>> ignoredTags) { PCollection<T> mainInput = null; for (Map.Entry<TupleTag<?>, PValue> input : inputs.entrySet()) { if (!ignoredTags.contains(input.getKey())) { checkArgument( mainInput == null, "Got multiple inputs that are not additional inputs for a " + "singleton main input: %s and %s", mainInput, input.getValue()); checkArgument( input.getValue() instanceof PCollection, "Unexpected input type %s", input.getValue().getClass()); mainInput = (PCollection<T>) input.getValue(); } } checkArgument( mainInput != null, "No main input found in inputs: Inputs %s, Side Input tags %s", inputs, ignoredTags); return mainInput; }
Example #7
Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0 | 6 votes |
private Object applyReadSourceTransform( PTransform<?, ?> transform, PCollection.IsBounded isBounded, StreamExecutionEnvironment env) { FlinkStreamingPipelineTranslator.StreamTransformTranslator<PTransform<?, ?>> translator = getReadSourceTranslator(); FlinkStreamingTranslationContext ctx = new FlinkStreamingTranslationContext(env, PipelineOptionsFactory.create()); Pipeline pipeline = Pipeline.create(); PCollection<String> pc = PCollection.createPrimitiveOutputInternal( pipeline, WindowingStrategy.globalDefault(), isBounded, StringUtf8Coder.of()); pc.setName("output"); Map<TupleTag<?>, PValue> outputs = new HashMap<>(); outputs.put(new TupleTag<>(), pc); AppliedPTransform<?, ?, ?> appliedTransform = AppliedPTransform.of( "test-transform", Collections.emptyMap(), outputs, transform, Pipeline.create()); ctx.setCurrentTransform(appliedTransform); translator.translateNode(transform, ctx); return ctx.getInputDataStream(pc).getTransformation(); }
Example #8
Source File: BeamEnumerableConverter.java From beam with Apache License 2.0 | 6 votes |
private static boolean containsUnboundedPCollection(Pipeline p) { class BoundednessVisitor extends PipelineVisitor.Defaults { IsBounded boundedness = IsBounded.BOUNDED; @Override public void visitValue(PValue value, Node producer) { if (value instanceof PCollection) { boundedness = boundedness.and(((PCollection) value).isBounded()); } } } BoundednessVisitor visitor = new BoundednessVisitor(); p.traverseTopologically(visitor); return visitor.boundedness == IsBounded.UNBOUNDED; }
Example #9
Source File: ConfigGeneratorTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testSamzaLocalExecutionEnvironmentConfig() { SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); options.setJobName("TestEnvConfig"); options.setRunner(SamzaRunner.class); options.setSamzaExecutionEnvironment(SamzaExecutionEnvironment.LOCAL); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally()); pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline); final ConfigBuilder configBuilder = new ConfigBuilder(options); SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder); final Config config = configBuilder.build(); assertTrue( Maps.difference(config, ConfigBuilder.localRunConfig()).entriesOnlyOnRight().isEmpty()); }
Example #10
Source File: EvaluationContext.java From beam with Apache License 2.0 | 6 votes |
/** * Add output of transform to context map and possibly cache if it conforms {@link * #shouldCache(PTransform, PValue)}. * * @param transform from which Dataset was created * @param pvalue output of transform * @param dataset created Dataset from transform */ private void putDataset( @Nullable PTransform<?, ? extends PValue> transform, PValue pvalue, Dataset dataset) { try { dataset.setName(pvalue.getName()); } catch (IllegalStateException e) { // name not set, ignore } if (shouldCache(transform, pvalue)) { // we cache only PCollection Coder<?> coder = ((PCollection<?>) pvalue).getCoder(); Coder<? extends BoundedWindow> wCoder = ((PCollection<?>) pvalue).getWindowingStrategy().getWindowFn().windowCoder(); dataset.cache(storageLevel(), WindowedValue.getFullCoder(coder, wCoder)); } datasets.put(pvalue, dataset); leaves.add(dataset); }
Example #11
Source File: TranslationContext.java From beam with Apache License 2.0 | 5 votes |
public <OutT> MessageStream<OpMessage<OutT>> getMessageStream(PValue pvalue) { @SuppressWarnings("unchecked") final MessageStream<OpMessage<OutT>> stream = (MessageStream<OpMessage<OutT>>) messsageStreams.get(pvalue); if (stream == null) { throw new IllegalArgumentException("No stream registered for pvalue: " + pvalue); } return stream; }
Example #12
Source File: WriteWithShardingFactory.java From beam with Apache License 2.0 | 5 votes |
@Override public Map<PValue, ReplacementOutput> mapOutputs( Map<TupleTag<?>, PValue> outputs, WriteFilesResult<DestinationT> newOutput) { // We must connect the new output from WriteFilesResult to the outputs provided by the original // transform. return ReplacementOutputs.tagged(outputs, newOutput); }
Example #13
Source File: TranslationContext.java From beam with Apache License 2.0 | 5 votes |
public String getIdForPValue(PValue pvalue) { final String id = idMap.get(pvalue); if (id == null) { throw new IllegalArgumentException("No id mapping for value: " + pvalue); } return id; }
Example #14
Source File: TranslationContext.java From beam with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public <T> Dataset<WindowedValue<T>> getDataset(PValue value) { Dataset<?> dataset = datasets.get(value); // assume that the Dataset is used as an input if retrieved here. So it is not a leaf anymore leaves.remove(dataset); return (Dataset<WindowedValue<T>>) dataset; }
Example #15
Source File: JetTransformTranslators.java From beam with Apache License 2.0 | 5 votes |
@Override public Vertex translate( Pipeline pipeline, AppliedPTransform<?, ?, ?> appliedTransform, Node node, JetTranslationContext context) { Collection<PValue> mainInputs = Utils.getMainInputs(pipeline, node); Map<String, Coder> inputCoders = Utils.getCoders( Utils.getInputs(appliedTransform), e -> Utils.getTupleTagId(e.getValue())); Map.Entry<TupleTag<?>, PValue> output = Utils.getOutput(appliedTransform); Coder outputCoder = Utils.getCoder((PCollection) output.getValue()); DAGBuilder dagBuilder = context.getDagBuilder(); String vertexId = dagBuilder.newVertexId(appliedTransform.getFullName()); FlattenP.Supplier processorSupplier = new FlattenP.Supplier(inputCoders, outputCoder, vertexId); Vertex vertex = dagBuilder.addVertex(vertexId, processorSupplier); dagBuilder.registerConstructionListeners(processorSupplier); for (PValue value : mainInputs) { PCollection<T> input = (PCollection<T>) value; dagBuilder.registerEdgeEndPoint(Utils.getTupleTagId(input), vertex); } String outputEdgeId = Utils.getTupleTagId(output.getValue()); dagBuilder.registerCollectionOfEdge(outputEdgeId, output.getKey().getId()); dagBuilder.registerEdgeStartPoint(outputEdgeId, vertex, outputCoder); return vertex; }
Example #16
Source File: WithFailures.java From beam with Apache License 2.0 | 5 votes |
@Override public Map<TupleTag<?>, PValue> expand() { Map<TupleTag<?>, PValue> values = new HashMap<>(); values.put(failuresTag(), failures()); if (outputTag() != null && output() instanceof PValue) { values.put(outputTag(), (PValue) output()); } return values; }
Example #17
Source File: PipelineTranslator.java From beam with Apache License 2.0 | 5 votes |
@Override public void visitValue(PValue value, TransformHierarchy.Node producer) { if (translationMode.equals(TranslationMode.BATCH)) { if (value instanceof PCollection && ((PCollection) value).isBounded() == PCollection.IsBounded.UNBOUNDED) { LOG.info( "Found unbounded PCollection {}. Switching to streaming execution.", value.getName()); translationMode = TranslationMode.STREAMING; } } }
Example #18
Source File: TransformHierarchy.java From beam with Apache License 2.0 | 5 votes |
/** * Finish specifying all of the input {@link PValue PValues} of the current {@link Node}. Ensures * that all of the inputs to the current node have been fully specified, and have been produced by * a node in this graph. */ public void finishSpecifyingInput() { // Inputs must be completely specified before they are consumed by a transform. for (PValue inputValue : current.getInputs().values()) { PInput input = producerInput.remove(inputValue); Node producerNode = maybeGetProducer(inputValue); if (producerNode != null) { inputValue.finishSpecifying(input, producerNode.getTransform()); } } }
Example #19
Source File: ReplacementOutputs.java From beam with Apache License 2.0 | 5 votes |
public static Map<PValue, ReplacementOutput> singleton( Map<TupleTag<?>, PValue> original, PValue replacement) { Entry<TupleTag<?>, PValue> originalElement = Iterables.getOnlyElement(original.entrySet()); TupleTag<?> replacementTag = Iterables.getOnlyElement(replacement.expand().entrySet()).getKey(); return Collections.singletonMap( replacement, ReplacementOutput.of( TaggedPValue.of(originalElement.getKey(), originalElement.getValue()), TaggedPValue.of(replacementTag, replacement))); }
Example #20
Source File: TransformHierarchy.java From beam with Apache License 2.0 | 5 votes |
/** * Creates a new Node with the given parent and transform. * * @param enclosingNode the composite node containing this node * @param transform the PTransform tracked by this node * @param fullName the fully qualified name of the transform * @param input the unexpanded input to the transform */ private Node(Node enclosingNode, PTransform<?, ?> transform, String fullName, PInput input) { this.enclosingNode = enclosingNode; this.transform = transform; this.fullName = fullName; ImmutableMap.Builder<TupleTag<?>, PValue> inputs = ImmutableMap.builder(); inputs.putAll(input.expand()); inputs.putAll(transform.getAdditionalInputs()); this.inputs = inputs.build(); }
Example #21
Source File: DataflowRunner.java From beam with Apache License 2.0 | 5 votes |
@Override public PTransformReplacement<PCollection<InputT>, PValue> getReplacementTransform( AppliedPTransform<PCollection<InputT>, PValue, PTransform<PCollection<InputT>, PValue>> transform) { Combine.GloballyAsSingletonView<?, ?> combineTransform = (Combine.GloballyAsSingletonView) transform.getTransform(); return PTransformReplacement.of( PTransformReplacements.getSingletonMainInput(transform), new BatchViewOverrides.BatchViewAsSingleton( runner, findCreatePCollectionView(transform), (CombineFn) combineTransform.getCombineFn(), combineTransform.getFanout())); }
Example #22
Source File: EvaluationContext.java From beam with Apache License 2.0 | 5 votes |
/** * Retrieve an object of Type T associated with the PValue passed in. * * @param value PValue to retrieve associated data for. * @param <T> Type of object to return. * @return Native object. */ @SuppressWarnings("TypeParameterUnusedInFormals") public <T> T get(PValue value) { if (pobjects.containsKey(value)) { return (T) pobjects.get(value); } if (pcollections.containsKey(value)) { JavaRDD<?> rdd = ((BoundedDataset) pcollections.get(value)).getRDD(); T res = (T) Iterables.getOnlyElement(rdd.collect()); pobjects.put(value, res); return res; } throw new IllegalStateException("Cannot resolve un-known PObject: " + value); }
Example #23
Source File: PipelineTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Override public void visitValue(PValue value, Node producer) { if (value instanceof PCollection) { PCollection pc = (PCollection) value; pcollections.add(pc); addCoders(pc.getCoder()); windowingStrategies.add(pc.getWindowingStrategy()); addCoders(pc.getWindowingStrategy().getWindowFn().windowCoder()); } }
Example #24
Source File: AppliedPTransform.java From beam with Apache License 2.0 | 5 votes |
public static < InputT extends PInput, OutputT extends POutput, TransformT extends PTransform<? super InputT, OutputT>> AppliedPTransform<InputT, OutputT, TransformT> of( String fullName, Map<TupleTag<?>, PValue> input, Map<TupleTag<?>, PValue> output, TransformT transform, Pipeline p) { return new AutoValue_AppliedPTransform<>(fullName, input, output, transform, p); }
Example #25
Source File: Twister2PipelineExecutionEnvironment.java From beam with Apache License 2.0 | 5 votes |
@Override public void visitValue(PValue value, TransformHierarchy.Node producer) { if (!isStreaming) { if (value instanceof PCollection && ((PCollection) value).isBounded() == PCollection.IsBounded.UNBOUNDED) { LOG.info( "Found unbounded PCollection {}. Switching to streaming execution.", value.getName()); isStreaming = true; } } }
Example #26
Source File: JetTransformTranslators.java From beam with Apache License 2.0 | 5 votes |
@Override public Vertex translate( Pipeline pipeline, AppliedPTransform<?, ?, ?> appliedTransform, Node node, JetTranslationContext context) { String transformName = appliedTransform.getFullName(); PCollection<KV<K, InputT>> input = (PCollection<KV<K, InputT>>) Utils.getInput(appliedTransform); WindowedValue.WindowedValueCoder<KV<K, InputT>> inputCoder = Utils.getWindowedValueCoder(input); Map.Entry<TupleTag<?>, PValue> output = Utils.getOutput(appliedTransform); Coder outputCoder = Utils.getCoder((PCollection) output.getValue()); WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy(); DAGBuilder dagBuilder = context.getDagBuilder(); String vertexId = dagBuilder.newVertexId(transformName); Vertex vertex = dagBuilder.addVertex( vertexId, WindowGroupP.supplier( context.getOptions(), inputCoder, outputCoder, windowingStrategy, vertexId)); dagBuilder.registerEdgeEndPoint(Utils.getTupleTagId(input), vertex); String outputEdgeId = Utils.getTupleTagId(output.getValue()); dagBuilder.registerCollectionOfEdge(outputEdgeId, output.getKey().getId()); dagBuilder.registerEdgeStartPoint(outputEdgeId, vertex, outputCoder); return vertex; }
Example #27
Source File: ConfigContext.java From beam with Apache License 2.0 | 5 votes |
private String getIdForPValue(PValue pvalue) { final String id = idMap.get(pvalue); if (id == null) { throw new IllegalArgumentException("No id mapping for value: " + pvalue); } return id; }
Example #28
Source File: DirectGraph.java From beam with Apache License 2.0 | 5 votes |
@Override public AppliedPTransform<?, ?, ?> getProducer(PValue produced) { if (produced instanceof PCollection) { return producers.get(produced); } else if (produced instanceof PCollectionView) { return getWriter((PCollectionView<?>) produced); } throw new IllegalArgumentException( String.format( "Unknown %s type %s. Known types: %s and %s", PValue.class.getSimpleName(), produced.getClass().getName(), PCollection.class.getSimpleName(), PCollectionView.class.getSimpleName())); }
Example #29
Source File: DataflowPipelineTranslator.java From beam with Apache License 2.0 | 5 votes |
@Override public void addInput(String name, PInput value) { if (value instanceof PValue) { PValue pvalue = (PValue) value; addInput(name, translator.asOutputReference(pvalue, translator.getProducer(pvalue))); } else { throw new IllegalStateException("Input must be a PValue"); } }
Example #30
Source File: WriteResult.java From beam with Apache License 2.0 | 5 votes |
@Override public Map<TupleTag<?>, PValue> expand() { if (failedInsertsTag != null) { return ImmutableMap.of(failedInsertsTag, failedInserts); } else { return ImmutableMap.of(failedInsertsWithErrTag, failedInsertsWithErr); } }