Java Code Examples for org.apache.beam.sdk.io.Read#Bounded

The following examples show how to use org.apache.beam.sdk.io.Read#Bounded . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FlinkStreamingTransformTranslatorsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void readSourceTranslatorBoundedWithoutMaxParallelism() {

  final int parallelism = 2;

  Read.Bounded transform = Read.from(new TestBoundedSource(parallelism));
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);

  SourceTransformation<?> sourceTransform =
      (SourceTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.BOUNDED, env);

  UnboundedSourceWrapperNoValueWithRecordId source =
      (UnboundedSourceWrapperNoValueWithRecordId) sourceTransform.getOperator().getUserFunction();

  assertEquals(parallelism, source.getUnderlyingSource().getSplitSources().size());
}
 
Example 2
Source File: FlinkStreamingTransformTranslatorsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void readSourceTranslatorBoundedWithMaxParallelism() {

  final int maxParallelism = 6;
  final int parallelism = 2;

  Read.Bounded transform = Read.from(new TestBoundedSource(maxParallelism));
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.setMaxParallelism(maxParallelism);

  SourceTransformation<?> sourceTransform =
      (SourceTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.BOUNDED, env);

  UnboundedSourceWrapperNoValueWithRecordId source =
      (UnboundedSourceWrapperNoValueWithRecordId) sourceTransform.getOperator().getUserFunction();

  assertEquals(maxParallelism, source.getUnderlyingSource().getSplitSources().size());
}
 
Example 3
Source File: TransformTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <T> TransformEvaluator<Read.Bounded<T>> readBounded() {
  return new TransformEvaluator<Read.Bounded<T>>() {
    @Override
    public void evaluate(Read.Bounded<T> transform, EvaluationContext context) {
      String stepName = context.getCurrentTransform().getFullName();
      final JavaSparkContext jsc = context.getSparkContext();
      // create an RDD from a BoundedSource.
      JavaRDD<WindowedValue<T>> input =
          new SourceRDD.Bounded<>(
                  jsc.sc(), transform.getSource(), context.getSerializableOptions(), stepName)
              .toJavaRDD();

      context.putDataset(transform, new BoundedDataset<>(input));
    }

    @Override
    public String toNativeString() {
      return "sparkContext.<readFrom(<source>)>()";
    }
  };
}
 
Example 4
Source File: ReadSourceTranslatorBatch.java    From twister2 with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(Read.Bounded<T> transform, Twister2BatchTranslationContext context) {
  BoundedSource<T> boundedSource = transform.getSource();
  Twister2BoundedSource<T> twister2BoundedSource =
      new Twister2BoundedSource<T>(boundedSource, context, context.getOptions());
  final TSetEnvironment tsetEnv = context.getEnvironment();
  // TODO: need to set paralliem value
  SourceTSet<WindowedValue<T>> sourceTSet =
      ((BatchTSetEnvironment) tsetEnv).createSource(twister2BoundedSource, 1);
  PCollection<T> output = context.getOutput(transform);
  context.setOutputDataSet(output, sourceTSet);
}
 
Example 5
Source File: SparkNativePipelineVisitor.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public String toString() {
  try {
    Class<? extends PTransform> transformClass = transform.getClass();
    if ("KafkaIO.Read".equals(node.getFullName())) {
      return "KafkaUtils.createDirectStream(...)";
    }
    if (composite) {
      return "_.<" + transformClass.getName() + ">";
    }
    String transformString = transformEvaluator.toNativeString();
    if (transformString.contains("<fn>")) {
      transformString = replaceFnString(transformClass, transformString, "fn");
    } else if (transformString.contains("<windowFn>")) {
      transformString = replaceFnString(transformClass, transformString, "windowFn");
    } else if (transformString.contains("<source>")) {
      String sourceName = "...";
      if (transform instanceof Read.Bounded) {
        sourceName = ((Read.Bounded<?>) transform).getSource().getClass().getName();
      } else if (transform instanceof Read.Unbounded) {
        sourceName = ((Read.Unbounded<?>) transform).getSource().getClass().getName();
      }
      transformString = transformString.replace("<source>", sourceName);
    }
    if (transformString.startsWith("sparkContext")
        || transformString.startsWith("streamingContext")) {
      return transformString;
    }
    return "_." + transformString;
  } catch (NoSuchMethodException
      | InvocationTargetException
      | IllegalAccessException
      | NoSuchFieldException e) {
    return "<FailedTranslation>";
  }
}
 
Example 6
Source File: ReadTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testToFromProtoBounded() throws Exception {
  // TODO: Split into two tests.
  assumeThat(source, instanceOf(BoundedSource.class));
  BoundedSource<?> boundedSource = (BoundedSource<?>) this.source;
  Read.Bounded<?> boundedRead = Read.from(boundedSource);
  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(Environments.createDockerEnvironment("java"));
  ReadPayload payload = ReadTranslation.toProto(boundedRead, components);
  assertThat(payload.getIsBounded(), equalTo(RunnerApi.IsBounded.Enum.BOUNDED));
  BoundedSource<?> deserializedSource = ReadTranslation.boundedSourceFromProto(payload);
  assertThat(deserializedSource, equalTo(source));
}
 
Example 7
Source File: ReadTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public FunctionSpec translate(
    AppliedPTransform<?, ?, Read.Bounded<?>> transform, SdkComponents components) {
  ReadPayload payload = toProto(transform.getTransform(), components);
  return RunnerApi.FunctionSpec.newBuilder()
      .setUrn(getUrn(transform.getTransform()))
      .setPayload(payload.toByteString())
      .build();
}
 
Example 8
Source File: PTransformTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public RunnerApi.PTransform translate(
    AppliedPTransform<?, ?, ?> appliedPTransform,
    List<AppliedPTransform<?, ?, ?>> subtransforms,
    SdkComponents components)
    throws IOException {
  RunnerApi.PTransform.Builder transformBuilder =
      translateAppliedPTransform(appliedPTransform, subtransforms, components);

  FunctionSpec spec =
      KNOWN_PAYLOAD_TRANSLATORS
          .get(appliedPTransform.getTransform().getClass())
          .translate(appliedPTransform, components);
  if (spec != null) {
    transformBuilder.setSpec(spec);

    // Required runner implemented transforms should not have an environment id.
    if (!RUNNER_IMPLEMENTED_TRANSFORMS.contains(spec.getUrn())) {
      // TODO(BEAM-9309): Remove existing hacks around deprecated READ transform.
      if (spec.getUrn().equals(READ_TRANSFORM_URN)) {
        // Only assigning environment to Bounded reads. Not assigning an environment to
        // Unbounded
        // reads since they are a Runner translated transform, unless, in the future, we have an
        // adapter available for splittable DoFn.
        if (appliedPTransform.getTransform().getClass() == Read.Bounded.class) {
          transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
        }
      } else {
        transformBuilder.setEnvironmentId(components.getOnlyEnvironmentId());
      }
    }
  }
  return transformBuilder.build();
}
 
Example 9
Source File: ReadSourceTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(Read.Bounded<T> transform, Twister2BatchTranslationContext context) {
  BoundedSource<T> boundedSource = transform.getSource();
  Twister2BoundedSource<T> twister2BoundedSource =
      new Twister2BoundedSource<T>(boundedSource, context, context.getOptions());
  final TSetEnvironment tsetEnv = context.getEnvironment();

  SourceTSet<WindowedValue<T>> sourceTSet =
      ((BatchTSetEnvironment) tsetEnv)
          .createSource(twister2BoundedSource, context.getOptions().getParallelism());
  PCollection<T> output = context.getOutput(transform);
  context.setOutputDataSet(output, sourceTSet);
}
 
Example 10
Source File: PipelineTranslator.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
/**
 * @param ctx       provides translation context
 * @param beamNode  the beam node to be translated
 * @param transform transform which can be obtained from {@code beamNode}
 */
@PrimitiveTransformTranslator(Read.Bounded.class)
private static void boundedReadTranslator(final PipelineTranslationContext ctx,
                                          final TransformHierarchy.Node beamNode,
                                          final Read.Bounded<?> transform) {
  final IRVertex vertex = new BeamBoundedSourceVertex<>(transform.getSource(), DisplayData.from(transform));
  ctx.addVertex(vertex);
  beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input));
  beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output));
}
 
Example 11
Source File: ReadTranslation.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public String getUrn(Read.Bounded<?> transform) {
  return PTransformTranslation.READ_TRANSFORM_URN;
}
 
Example 12
Source File: ReadTranslation.java    From beam with Apache License 2.0 4 votes vote down vote up
public static ReadPayload toProto(Read.Bounded<?> read, SdkComponents components) {
  return ReadPayload.newBuilder()
      .setIsBounded(IsBounded.Enum.BOUNDED)
      .setSource(toProto(read.getSource(), components))
      .build();
}
 
Example 13
Source File: TransformHierarchyTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void replaceSucceeds() {
  PTransform<?, ?> enclosingPT =
      new PTransform<PInput, POutput>() {
        @Override
        public POutput expand(PInput input) {
          return PDone.in(input.getPipeline());
        }
      };

  TransformHierarchy.Node enclosing =
      hierarchy.pushNode("Enclosing", PBegin.in(pipeline), enclosingPT);

  Create.Values<Long> originalTransform = Create.of(1L);
  TransformHierarchy.Node original =
      hierarchy.pushNode("Create", PBegin.in(pipeline), originalTransform);
  assertThat(hierarchy.getCurrent(), equalTo(original));
  PCollection<Long> originalOutput = pipeline.apply(originalTransform);
  hierarchy.setOutput(originalOutput);
  hierarchy.popNode();
  assertThat(original.finishedSpecifying, is(true));
  hierarchy.setOutput(PDone.in(pipeline));
  hierarchy.popNode();

  assertThat(hierarchy.getCurrent(), not(equalTo(enclosing)));
  Read.Bounded<Long> replacementTransform = Read.from(CountingSource.upTo(1L));
  PCollection<Long> replacementOutput = pipeline.apply(replacementTransform);
  Node replacement = hierarchy.replaceNode(original, PBegin.in(pipeline), replacementTransform);
  assertThat(hierarchy.getCurrent(), equalTo(replacement));
  hierarchy.setOutput(replacementOutput);

  TaggedPValue taggedReplacement = TaggedPValue.ofExpandedValue(replacementOutput);
  Map<PValue, ReplacementOutput> replacementOutputs =
      Collections.singletonMap(
          replacementOutput,
          ReplacementOutput.of(TaggedPValue.ofExpandedValue(originalOutput), taggedReplacement));
  hierarchy.replaceOutputs(replacementOutputs);

  assertThat(replacement.getInputs(), equalTo(original.getInputs()));
  assertThat(replacement.getEnclosingNode(), equalTo(original.getEnclosingNode()));
  assertThat(replacement.getEnclosingNode(), equalTo(enclosing));
  assertThat(replacement.getTransform(), equalTo(replacementTransform));
  // THe tags of the replacement transform are matched to the appropriate PValues of the original
  assertThat(replacement.getOutputs().keySet(), Matchers.contains(taggedReplacement.getTag()));
  assertThat(replacement.getOutputs().values(), Matchers.contains(originalOutput));
  hierarchy.popNode();
}
 
Example 14
Source File: ReadTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translate(Read.Bounded<?> transform, TranslationContext context) {
  translateReadHelper(transform.getSource(), transform, context);
}
 
Example 15
Source File: DataflowRunner.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public PTransformReplacement<PBegin, PCollection<T>> getReplacementTransform(
    AppliedPTransform<PBegin, PCollection<T>, Read.Bounded<T>> transform) {
  return PTransformReplacement.of(
      transform.getPipeline().begin(), new StreamingBoundedRead<>(transform.getTransform()));
}
 
Example 16
Source File: DataflowRunner.java    From beam with Apache License 2.0 4 votes vote down vote up
public StreamingBoundedRead(Read.Bounded<T> transform) {
  this.source = transform.getSource();
}
 
Example 17
Source File: ConfigurableHDFSFileSource.java    From components with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a {@code Read} transform that will read from an {@code HDFSFileSource} with the given file name or
 * pattern ("glob") using the given Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}, with
 * key-value types specified by the given key class and value class.
 */
public static <K, V, T extends FileInputFormat<K, V>> Read.Bounded<KV<K, V>> readFrom(String filepattern,
        Class<T> formatClass, Class<K> keyClass, Class<V> valueClass) {
    return Read.from(from(filepattern, formatClass, keyClass, valueClass));
}