org.apache.beam.sdk.values.PValue Java Examples

The following examples show how to use org.apache.beam.sdk.values.PValue. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlattenTranslatorBatch.java    From twister2 with Apache License 2.0 6 votes vote down vote up
@Override
public void translateNode(
    Flatten.PCollections<T> transform, Twister2BatchTranslationContext context) {
  Collection<PValue> pcs = context.getInputs().values();
  List<BatchTSetImpl<WindowedValue<T>>> tSets = new ArrayList<>();
  BatchTSetImpl<WindowedValue<T>> unionTSet = null;
  if (pcs.isEmpty()) {
    // TODO: create empty TSet
    throw new UnsupportedOperationException("Operation not implemented yet");
  } else {
    for (PValue pc : pcs) {
      BatchTSetImpl<WindowedValue<T>> curr = context.getInputDataSet(pc);
      tSets.add(curr);
    }

    BatchTSetImpl<WindowedValue<T>> first = tSets.remove(0);
    Collection<TSet<WindowedValue<T>>> others = new ArrayList<>();
    others.addAll(tSets);
    if (tSets.size() > 0) {
      unionTSet = first.union(others);
    } else {
      unionTSet = first;
    }
  }
  context.setOutputDataSet(context.getOutput(transform), unionTSet);
}
 
Example #2
Source File: EvaluationContext.java    From beam with Apache License 2.0 6 votes vote down vote up
private EvaluationContext(
    Clock clock,
    BundleFactory bundleFactory,
    DirectGraph graph,
    Set<PValue> keyedPValues,
    ExecutorService executorService) {
  this.clock = clock;
  this.bundleFactory = checkNotNull(bundleFactory);
  this.graph = checkNotNull(graph);
  this.keyedPValues = keyedPValues;
  this.executorService = executorService;

  this.watermarkManager = WatermarkManager.create(clock, graph, AppliedPTransform::getFullName);
  this.sideInputContainer = SideInputContainer.create(this, graph.getViews());

  this.applicationStateInternals = new ConcurrentHashMap<>();
  this.metrics = new DirectMetrics(executorService);

  this.callbackExecutor = WatermarkCallbackExecutor.create(MoreExecutors.directExecutor());
}
 
Example #3
Source File: UnconsumedReadsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void validateConsumed() {
  final Set<PValue> consumedOutputs = new HashSet<>();
  final Set<PValue> allReadOutputs = new HashSet<>();
  pipeline.traverseTopologically(
      new PipelineVisitor.Defaults() {
        @Override
        public void visitPrimitiveTransform(Node node) {
          consumedOutputs.addAll(node.getInputs().values());
        }

        @Override
        public void visitValue(PValue value, Node producer) {
          if (producer.getTransform() instanceof Read.Bounded
              || producer.getTransform() instanceof Read.Unbounded) {
            allReadOutputs.add(value);
          }
        }
      });
  assertThat(consumedOutputs, Matchers.hasItems(allReadOutputs.toArray(new PValue[0])));
}
 
Example #4
Source File: SparkRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void doVisitTransform(TransformHierarchy.Node node) {
  // we populate cache candidates by updating the map with inputs of each node.
  // The goal is to detect the PCollections accessed more than one time, and so enable cache
  // on the underlying RDDs or DStreams.
  Map<TupleTag<?>, PValue> inputs = new HashMap<>(node.getInputs());
  for (TupleTag<?> tupleTag : node.getTransform().getAdditionalInputs().keySet()) {
    inputs.remove(tupleTag);
  }

  for (PValue value : inputs.values()) {
    if (value instanceof PCollection) {
      long count = 1L;
      if (ctxt.getCacheCandidates().get(value) != null) {
        count = ctxt.getCacheCandidates().get(value) + 1;
      }
      ctxt.getCacheCandidates().put((PCollection) value, count);
    }
  }
}
 
Example #5
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void testStreamingWriteOverride(PipelineOptions options, int expectedNumShards) {
  TestPipeline p = TestPipeline.fromOptions(options);

  StreamingShardedWriteFactory<Object, Void, Object> factory =
      new StreamingShardedWriteFactory<>(p.getOptions());
  WriteFiles<Object, Void, Object> original = WriteFiles.to(new TestSink(tmpFolder.toString()));
  PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));
  AppliedPTransform<PCollection<Object>, WriteFilesResult<Void>, WriteFiles<Object, Void, Object>>
      originalApplication =
          AppliedPTransform.of("writefiles", objs.expand(), Collections.emptyMap(), original, p);

  WriteFiles<Object, Void, Object> replacement =
      (WriteFiles<Object, Void, Object>)
          factory.getReplacementTransform(originalApplication).getTransform();
  assertThat(replacement, not(equalTo((Object) original)));
  assertThat(replacement.getNumShardsProvider().get(), equalTo(expectedNumShards));

  WriteFilesResult<Void> originalResult = objs.apply(original);
  WriteFilesResult<Void> replacementResult = objs.apply(replacement);
  Map<PValue, ReplacementOutput> res =
      factory.mapOutputs(originalResult.expand(), replacementResult);
  assertEquals(1, res.size());
  assertEquals(
      originalResult.getPerDestinationOutputFilenames(),
      res.get(replacementResult.getPerDestinationOutputFilenames()).getOriginal().getValue());
}
 
Example #6
Source File: PTransformReplacements.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <T> PCollection<T> getSingletonMainInput(
    Map<TupleTag<?>, PValue> inputs, Set<TupleTag<?>> ignoredTags) {
  PCollection<T> mainInput = null;
  for (Map.Entry<TupleTag<?>, PValue> input : inputs.entrySet()) {
    if (!ignoredTags.contains(input.getKey())) {
      checkArgument(
          mainInput == null,
          "Got multiple inputs that are not additional inputs for a "
              + "singleton main input: %s and %s",
          mainInput,
          input.getValue());
      checkArgument(
          input.getValue() instanceof PCollection,
          "Unexpected input type %s",
          input.getValue().getClass());
      mainInput = (PCollection<T>) input.getValue();
    }
  }
  checkArgument(
      mainInput != null,
      "No main input found in inputs: Inputs %s, Side Input tags %s",
      inputs,
      ignoredTags);
  return mainInput;
}
 
Example #7
Source File: FlinkStreamingTransformTranslatorsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private Object applyReadSourceTransform(
    PTransform<?, ?> transform, PCollection.IsBounded isBounded, StreamExecutionEnvironment env) {

  FlinkStreamingPipelineTranslator.StreamTransformTranslator<PTransform<?, ?>> translator =
      getReadSourceTranslator();
  FlinkStreamingTranslationContext ctx =
      new FlinkStreamingTranslationContext(env, PipelineOptionsFactory.create());

  Pipeline pipeline = Pipeline.create();
  PCollection<String> pc =
      PCollection.createPrimitiveOutputInternal(
          pipeline, WindowingStrategy.globalDefault(), isBounded, StringUtf8Coder.of());
  pc.setName("output");

  Map<TupleTag<?>, PValue> outputs = new HashMap<>();
  outputs.put(new TupleTag<>(), pc);
  AppliedPTransform<?, ?, ?> appliedTransform =
      AppliedPTransform.of(
          "test-transform", Collections.emptyMap(), outputs, transform, Pipeline.create());

  ctx.setCurrentTransform(appliedTransform);
  translator.translateNode(transform, ctx);

  return ctx.getInputDataStream(pc).getTransformation();
}
 
Example #8
Source File: BeamEnumerableConverter.java    From beam with Apache License 2.0 6 votes vote down vote up
private static boolean containsUnboundedPCollection(Pipeline p) {
  class BoundednessVisitor extends PipelineVisitor.Defaults {
    IsBounded boundedness = IsBounded.BOUNDED;

    @Override
    public void visitValue(PValue value, Node producer) {
      if (value instanceof PCollection) {
        boundedness = boundedness.and(((PCollection) value).isBounded());
      }
    }
  }

  BoundednessVisitor visitor = new BoundednessVisitor();
  p.traverseTopologically(visitor);
  return visitor.boundedness == IsBounded.UNBOUNDED;
}
 
Example #9
Source File: ConfigGeneratorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSamzaLocalExecutionEnvironmentConfig() {
  SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
  options.setJobName("TestEnvConfig");
  options.setRunner(SamzaRunner.class);
  options.setSamzaExecutionEnvironment(SamzaExecutionEnvironment.LOCAL);

  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally());

  pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides());

  final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
  final ConfigBuilder configBuilder = new ConfigBuilder(options);
  SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
  final Config config = configBuilder.build();

  assertTrue(
      Maps.difference(config, ConfigBuilder.localRunConfig()).entriesOnlyOnRight().isEmpty());
}
 
Example #10
Source File: EvaluationContext.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Add output of transform to context map and possibly cache if it conforms {@link
 * #shouldCache(PTransform, PValue)}.
 *
 * @param transform from which Dataset was created
 * @param pvalue output of transform
 * @param dataset created Dataset from transform
 */
private void putDataset(
    @Nullable PTransform<?, ? extends PValue> transform, PValue pvalue, Dataset dataset) {
  try {
    dataset.setName(pvalue.getName());
  } catch (IllegalStateException e) {
    // name not set, ignore
  }
  if (shouldCache(transform, pvalue)) {
    // we cache only PCollection
    Coder<?> coder = ((PCollection<?>) pvalue).getCoder();
    Coder<? extends BoundedWindow> wCoder =
        ((PCollection<?>) pvalue).getWindowingStrategy().getWindowFn().windowCoder();
    dataset.cache(storageLevel(), WindowedValue.getFullCoder(coder, wCoder));
  }
  datasets.put(pvalue, dataset);
  leaves.add(dataset);
}
 
Example #11
Source File: TranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
public <OutT> MessageStream<OpMessage<OutT>> getMessageStream(PValue pvalue) {
  @SuppressWarnings("unchecked")
  final MessageStream<OpMessage<OutT>> stream =
      (MessageStream<OpMessage<OutT>>) messsageStreams.get(pvalue);
  if (stream == null) {
    throw new IllegalArgumentException("No stream registered for pvalue: " + pvalue);
  }
  return stream;
}
 
Example #12
Source File: WriteWithShardingFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Map<PValue, ReplacementOutput> mapOutputs(
    Map<TupleTag<?>, PValue> outputs, WriteFilesResult<DestinationT> newOutput) {
  // We must connect the new output from WriteFilesResult to the outputs provided by the original
  // transform.
  return ReplacementOutputs.tagged(outputs, newOutput);
}
 
Example #13
Source File: TranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
public String getIdForPValue(PValue pvalue) {
  final String id = idMap.get(pvalue);
  if (id == null) {
    throw new IllegalArgumentException("No id mapping for value: " + pvalue);
  }
  return id;
}
 
Example #14
Source File: TranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public <T> Dataset<WindowedValue<T>> getDataset(PValue value) {
  Dataset<?> dataset = datasets.get(value);
  // assume that the Dataset is used as an input if retrieved here. So it is not a leaf anymore
  leaves.remove(dataset);
  return (Dataset<WindowedValue<T>>) dataset;
}
 
Example #15
Source File: JetTransformTranslators.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Vertex translate(
    Pipeline pipeline,
    AppliedPTransform<?, ?, ?> appliedTransform,
    Node node,
    JetTranslationContext context) {
  Collection<PValue> mainInputs = Utils.getMainInputs(pipeline, node);
  Map<String, Coder> inputCoders =
      Utils.getCoders(
          Utils.getInputs(appliedTransform), e -> Utils.getTupleTagId(e.getValue()));
  Map.Entry<TupleTag<?>, PValue> output = Utils.getOutput(appliedTransform);
  Coder outputCoder = Utils.getCoder((PCollection) output.getValue());

  DAGBuilder dagBuilder = context.getDagBuilder();
  String vertexId = dagBuilder.newVertexId(appliedTransform.getFullName());
  FlattenP.Supplier processorSupplier =
      new FlattenP.Supplier(inputCoders, outputCoder, vertexId);
  Vertex vertex = dagBuilder.addVertex(vertexId, processorSupplier);
  dagBuilder.registerConstructionListeners(processorSupplier);

  for (PValue value : mainInputs) {
    PCollection<T> input = (PCollection<T>) value;
    dagBuilder.registerEdgeEndPoint(Utils.getTupleTagId(input), vertex);
  }

  String outputEdgeId = Utils.getTupleTagId(output.getValue());
  dagBuilder.registerCollectionOfEdge(outputEdgeId, output.getKey().getId());
  dagBuilder.registerEdgeStartPoint(outputEdgeId, vertex, outputCoder);
  return vertex;
}
 
Example #16
Source File: WithFailures.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Map<TupleTag<?>, PValue> expand() {
  Map<TupleTag<?>, PValue> values = new HashMap<>();
  values.put(failuresTag(), failures());
  if (outputTag() != null && output() instanceof PValue) {
    values.put(outputTag(), (PValue) output());
  }
  return values;
}
 
Example #17
Source File: PipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void visitValue(PValue value, TransformHierarchy.Node producer) {
  if (translationMode.equals(TranslationMode.BATCH)) {
    if (value instanceof PCollection
        && ((PCollection) value).isBounded() == PCollection.IsBounded.UNBOUNDED) {
      LOG.info(
          "Found unbounded PCollection {}. Switching to streaming execution.", value.getName());
      translationMode = TranslationMode.STREAMING;
    }
  }
}
 
Example #18
Source File: TransformHierarchy.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Finish specifying all of the input {@link PValue PValues} of the current {@link Node}. Ensures
 * that all of the inputs to the current node have been fully specified, and have been produced by
 * a node in this graph.
 */
public void finishSpecifyingInput() {
  // Inputs must be completely specified before they are consumed by a transform.
  for (PValue inputValue : current.getInputs().values()) {
    PInput input = producerInput.remove(inputValue);
    Node producerNode = maybeGetProducer(inputValue);
    if (producerNode != null) {
      inputValue.finishSpecifying(input, producerNode.getTransform());
    }
  }
}
 
Example #19
Source File: ReplacementOutputs.java    From beam with Apache License 2.0 5 votes vote down vote up
public static Map<PValue, ReplacementOutput> singleton(
    Map<TupleTag<?>, PValue> original, PValue replacement) {
  Entry<TupleTag<?>, PValue> originalElement = Iterables.getOnlyElement(original.entrySet());
  TupleTag<?> replacementTag = Iterables.getOnlyElement(replacement.expand().entrySet()).getKey();
  return Collections.singletonMap(
      replacement,
      ReplacementOutput.of(
          TaggedPValue.of(originalElement.getKey(), originalElement.getValue()),
          TaggedPValue.of(replacementTag, replacement)));
}
 
Example #20
Source File: TransformHierarchy.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new Node with the given parent and transform.
 *
 * @param enclosingNode the composite node containing this node
 * @param transform the PTransform tracked by this node
 * @param fullName the fully qualified name of the transform
 * @param input the unexpanded input to the transform
 */
private Node(Node enclosingNode, PTransform<?, ?> transform, String fullName, PInput input) {
  this.enclosingNode = enclosingNode;
  this.transform = transform;
  this.fullName = fullName;
  ImmutableMap.Builder<TupleTag<?>, PValue> inputs = ImmutableMap.builder();
  inputs.putAll(input.expand());
  inputs.putAll(transform.getAdditionalInputs());
  this.inputs = inputs.build();
}
 
Example #21
Source File: DataflowRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PTransformReplacement<PCollection<InputT>, PValue> getReplacementTransform(
    AppliedPTransform<PCollection<InputT>, PValue, PTransform<PCollection<InputT>, PValue>>
        transform) {
  Combine.GloballyAsSingletonView<?, ?> combineTransform =
      (Combine.GloballyAsSingletonView) transform.getTransform();
  return PTransformReplacement.of(
      PTransformReplacements.getSingletonMainInput(transform),
      new BatchViewOverrides.BatchViewAsSingleton(
          runner,
          findCreatePCollectionView(transform),
          (CombineFn) combineTransform.getCombineFn(),
          combineTransform.getFanout()));
}
 
Example #22
Source File: EvaluationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Retrieve an object of Type T associated with the PValue passed in.
 *
 * @param value PValue to retrieve associated data for.
 * @param <T> Type of object to return.
 * @return Native object.
 */
@SuppressWarnings("TypeParameterUnusedInFormals")
public <T> T get(PValue value) {
  if (pobjects.containsKey(value)) {
    return (T) pobjects.get(value);
  }
  if (pcollections.containsKey(value)) {
    JavaRDD<?> rdd = ((BoundedDataset) pcollections.get(value)).getRDD();
    T res = (T) Iterables.getOnlyElement(rdd.collect());
    pobjects.put(value, res);
    return res;
  }
  throw new IllegalStateException("Cannot resolve un-known PObject: " + value);
}
 
Example #23
Source File: PipelineTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void visitValue(PValue value, Node producer) {
  if (value instanceof PCollection) {
    PCollection pc = (PCollection) value;
    pcollections.add(pc);
    addCoders(pc.getCoder());
    windowingStrategies.add(pc.getWindowingStrategy());
    addCoders(pc.getWindowingStrategy().getWindowFn().windowCoder());
  }
}
 
Example #24
Source File: AppliedPTransform.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <
        InputT extends PInput,
        OutputT extends POutput,
        TransformT extends PTransform<? super InputT, OutputT>>
    AppliedPTransform<InputT, OutputT, TransformT> of(
        String fullName,
        Map<TupleTag<?>, PValue> input,
        Map<TupleTag<?>, PValue> output,
        TransformT transform,
        Pipeline p) {
  return new AutoValue_AppliedPTransform<>(fullName, input, output, transform, p);
}
 
Example #25
Source File: Twister2PipelineExecutionEnvironment.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void visitValue(PValue value, TransformHierarchy.Node producer) {
  if (!isStreaming) {
    if (value instanceof PCollection
        && ((PCollection) value).isBounded() == PCollection.IsBounded.UNBOUNDED) {
      LOG.info(
          "Found unbounded PCollection {}. Switching to streaming execution.", value.getName());
      isStreaming = true;
    }
  }
}
 
Example #26
Source File: JetTransformTranslators.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Vertex translate(
    Pipeline pipeline,
    AppliedPTransform<?, ?, ?> appliedTransform,
    Node node,
    JetTranslationContext context) {
  String transformName = appliedTransform.getFullName();

  PCollection<KV<K, InputT>> input =
      (PCollection<KV<K, InputT>>) Utils.getInput(appliedTransform);
  WindowedValue.WindowedValueCoder<KV<K, InputT>> inputCoder =
      Utils.getWindowedValueCoder(input);
  Map.Entry<TupleTag<?>, PValue> output = Utils.getOutput(appliedTransform);
  Coder outputCoder = Utils.getCoder((PCollection) output.getValue());

  WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();

  DAGBuilder dagBuilder = context.getDagBuilder();
  String vertexId = dagBuilder.newVertexId(transformName);
  Vertex vertex =
      dagBuilder.addVertex(
          vertexId,
          WindowGroupP.supplier(
              context.getOptions(), inputCoder, outputCoder, windowingStrategy, vertexId));

  dagBuilder.registerEdgeEndPoint(Utils.getTupleTagId(input), vertex);

  String outputEdgeId = Utils.getTupleTagId(output.getValue());
  dagBuilder.registerCollectionOfEdge(outputEdgeId, output.getKey().getId());
  dagBuilder.registerEdgeStartPoint(outputEdgeId, vertex, outputCoder);
  return vertex;
}
 
Example #27
Source File: ConfigContext.java    From beam with Apache License 2.0 5 votes vote down vote up
private String getIdForPValue(PValue pvalue) {
  final String id = idMap.get(pvalue);
  if (id == null) {
    throw new IllegalArgumentException("No id mapping for value: " + pvalue);
  }
  return id;
}
 
Example #28
Source File: DirectGraph.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public AppliedPTransform<?, ?, ?> getProducer(PValue produced) {
  if (produced instanceof PCollection) {
    return producers.get(produced);
  } else if (produced instanceof PCollectionView) {
    return getWriter((PCollectionView<?>) produced);
  }
  throw new IllegalArgumentException(
      String.format(
          "Unknown %s type %s. Known types: %s and %s",
          PValue.class.getSimpleName(),
          produced.getClass().getName(),
          PCollection.class.getSimpleName(),
          PCollectionView.class.getSimpleName()));
}
 
Example #29
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void addInput(String name, PInput value) {
  if (value instanceof PValue) {
    PValue pvalue = (PValue) value;
    addInput(name, translator.asOutputReference(pvalue, translator.getProducer(pvalue)));
  } else {
    throw new IllegalStateException("Input must be a PValue");
  }
}
 
Example #30
Source File: WriteResult.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Map<TupleTag<?>, PValue> expand() {
  if (failedInsertsTag != null) {
    return ImmutableMap.of(failedInsertsTag, failedInserts);
  } else {
    return ImmutableMap.of(failedInsertsWithErrTag, failedInsertsWithErr);
  }
}