Java Code Examples for org.apache.beam.sdk.io.Read#from()

The following examples show how to use org.apache.beam.sdk.io.Read#from() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FlinkStreamingTransformTranslatorsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void readSourceTranslatorUnboundedWithoutMaxParallelism() {

  final int parallelism = 2;

  Read.Unbounded transform = Read.from(new TestUnboundedSource());
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);

  OneInputTransformation<?, ?> sourceTransform =
      (OneInputTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env);

  UnboundedSourceWrapper source =
      (UnboundedSourceWrapper)
          ((SourceTransformation) sourceTransform.getInput()).getOperator().getUserFunction();

  assertEquals(parallelism, source.getSplitSources().size());
}
 
Example 2
Source File: FlinkStreamingTransformTranslatorsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void readSourceTranslatorUnboundedWithMaxParallelism() {

  final int maxParallelism = 6;
  final int parallelism = 2;

  Read.Unbounded transform = Read.from(new TestUnboundedSource());
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.setMaxParallelism(maxParallelism);

  OneInputTransformation<?, ?> sourceTransform =
      (OneInputTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env);

  UnboundedSourceWrapper source =
      (UnboundedSourceWrapper)
          ((SourceTransformation) sourceTransform.getInput()).getOperator().getUserFunction();

  assertEquals(maxParallelism, source.getSplitSources().size());
}
 
Example 3
Source File: FlinkStreamingTransformTranslatorsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void readSourceTranslatorBoundedWithoutMaxParallelism() {

  final int parallelism = 2;

  Read.Bounded transform = Read.from(new TestBoundedSource(parallelism));
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);

  SourceTransformation<?> sourceTransform =
      (SourceTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.BOUNDED, env);

  UnboundedSourceWrapperNoValueWithRecordId source =
      (UnboundedSourceWrapperNoValueWithRecordId) sourceTransform.getOperator().getUserFunction();

  assertEquals(parallelism, source.getUnderlyingSource().getSplitSources().size());
}
 
Example 4
Source File: FlinkStreamingTransformTranslatorsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void readSourceTranslatorBoundedWithMaxParallelism() {

  final int maxParallelism = 6;
  final int parallelism = 2;

  Read.Bounded transform = Read.from(new TestBoundedSource(maxParallelism));
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.setMaxParallelism(maxParallelism);

  SourceTransformation<?> sourceTransform =
      (SourceTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.BOUNDED, env);

  UnboundedSourceWrapperNoValueWithRecordId source =
      (UnboundedSourceWrapperNoValueWithRecordId) sourceTransform.getOperator().getUserFunction();

  assertEquals(maxParallelism, source.getUnderlyingSource().getSplitSources().size());
}
 
Example 5
Source File: UnconsumedReadsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void doesNotConsumeAlreadyConsumedRead() {
  Unbounded<Long> transform = Read.from(CountingSource.unbounded());
  final PCollection<Long> output = pipeline.apply(transform);
  final Flatten.PCollections<Long> consumer = Flatten.pCollections();
  PCollectionList.of(output).apply(consumer);
  UnconsumedReads.ensureAllReadsConsumed(pipeline);
  pipeline.traverseTopologically(
      new PipelineVisitor.Defaults() {
        @Override
        public void visitPrimitiveTransform(Node node) {
          // The output should only be consumed by a single consumer
          if (node.getInputs().values().contains(output)) {
            assertThat(node.getTransform(), Matchers.is(consumer));
          }
        }
      });
}
 
Example 6
Source File: NexmarkUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Return a transform which yields a finite number of synthesized events generated as a batch. */
public static PTransform<PBegin, PCollection<Event>> batchEventsSource(
    NexmarkConfiguration configuration) {
  return Read.from(
      new BoundedEventSource(
          standardGeneratorConfig(configuration), configuration.numEventGenerators));
}
 
Example 7
Source File: UnboundedReadEvaluatorFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private void processElement(final TestUnboundedSource<String> source) throws Exception {
  final EvaluationContext context =
      EvaluationContext.create(
          MockClock.fromInstant(Instant.now()),
          CloningBundleFactory.create(),
          DirectGraph.create(
              emptyMap(), emptyMap(), LinkedListMultimap.create(), emptySet(), emptyMap()),
          emptySet(),
          Executors.newCachedThreadPool());
  final UnboundedReadEvaluatorFactory factory =
      new UnboundedReadEvaluatorFactory(context, options);

  final Read.Unbounded<String> unbounded = Read.from(source);
  final Pipeline pipeline = Pipeline.create(options);
  final PCollection<String> pCollection = pipeline.apply(unbounded);
  final AppliedPTransform<PBegin, PCollection<String>, Read.Unbounded<String>> application =
      AppliedPTransform.of(
          "test",
          new HashMap<>(),
          singletonMap(new TupleTag(), pCollection),
          unbounded,
          pipeline);
  final TransformEvaluator<UnboundedSourceShard<String, TestCheckpointMark>> evaluator =
      factory.forApplication(application, null);
  final UnboundedSource.UnboundedReader<String> reader = source.createReader(options, null);
  final UnboundedSourceShard<String, TestCheckpointMark> shard =
      UnboundedSourceShard.of(source, new NeverDeduplicator(), reader, null);
  final WindowedValue<UnboundedSourceShard<String, TestCheckpointMark>> value =
      WindowedValue.of(
          shard, BoundedWindow.TIMESTAMP_MAX_VALUE, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
  TestUnboundedSource.readerClosedCount = 0;
  evaluator.processElement(value);
}
 
Example 8
Source File: UnconsumedReadsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void matcherProducesUnconsumedValueUnboundedRead() {
  Unbounded<Long> transform = Read.from(CountingSource.unbounded());
  pipeline.apply(transform);
  UnconsumedReads.ensureAllReadsConsumed(pipeline);
  validateConsumed();
}
 
Example 9
Source File: UnconsumedReadsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void matcherProducesUnconsumedValueBoundedRead() {
  Bounded<Long> transform = Read.from(CountingSource.upTo(20L));
  pipeline.apply(transform);
  UnconsumedReads.ensureAllReadsConsumed(pipeline);
  validateConsumed();
}
 
Example 10
Source File: ReadTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testToFromProtoUnbounded() throws Exception {
  assumeThat(source, instanceOf(UnboundedSource.class));
  UnboundedSource<?, ?> unboundedSource = (UnboundedSource<?, ?>) this.source;
  Read.Unbounded<?> unboundedRead = Read.from(unboundedSource);
  SdkComponents components = SdkComponents.create();
  // No environment set for unbounded sources
  ReadPayload payload = ReadTranslation.toProto(unboundedRead, components);
  assertThat(payload.getIsBounded(), equalTo(RunnerApi.IsBounded.Enum.UNBOUNDED));
  UnboundedSource<?, ?> deserializedSource = ReadTranslation.unboundedSourceFromProto(payload);
  assertThat(deserializedSource, equalTo(source));
}
 
Example 11
Source File: ReadTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testToFromProtoBounded() throws Exception {
  // TODO: Split into two tests.
  assumeThat(source, instanceOf(BoundedSource.class));
  BoundedSource<?> boundedSource = (BoundedSource<?>) this.source;
  Read.Bounded<?> boundedRead = Read.from(boundedSource);
  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(Environments.createDockerEnvironment("java"));
  ReadPayload payload = ReadTranslation.toProto(boundedRead, components);
  assertThat(payload.getIsBounded(), equalTo(RunnerApi.IsBounded.Enum.BOUNDED));
  BoundedSource<?> deserializedSource = ReadTranslation.boundedSourceFromProto(payload);
  assertThat(deserializedSource, equalTo(source));
}
 
Example 12
Source File: LoadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
PTransform<PBegin, PCollection<KV<byte[], byte[]>>> readFromSource(
    SyntheticSourceOptions sourceOptions) {
  if (options.isStreaming()) {
    return Read.from(new SyntheticUnboundedSource(sourceOptions));
  } else {
    return Read.from(new SyntheticBoundedSource(sourceOptions));
  }
}
 
Example 13
Source File: NexmarkUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Return a transform which yields a finite number of synthesized events generated on-the-fly in
 * real time.
 */
public static PTransform<PBegin, PCollection<Event>> streamEventsSource(
    NexmarkConfiguration configuration) {
  return Read.from(
      new UnboundedEventSource(
          NexmarkUtils.standardGeneratorConfig(configuration),
          configuration.numEventGenerators,
          configuration.watermarkHoldbackSec,
          configuration.isRateLimited));
}
 
Example 14
Source File: PTransformTranslationTest.java    From beam with Apache License 2.0 4 votes vote down vote up
private static AppliedPTransform<?, ?, ?> read(Pipeline pipeline) {
  Read.Unbounded<Long> transform = Read.from(CountingSource.unbounded());
  PCollection<Long> pcollection = pipeline.apply(transform);
  return AppliedPTransform.of(
      "ReadTheCount", pipeline.begin().expand(), pcollection.expand(), transform, pipeline);
}
 
Example 15
Source File: TransformHierarchyTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void replaceSucceeds() {
  PTransform<?, ?> enclosingPT =
      new PTransform<PInput, POutput>() {
        @Override
        public POutput expand(PInput input) {
          return PDone.in(input.getPipeline());
        }
      };

  TransformHierarchy.Node enclosing =
      hierarchy.pushNode("Enclosing", PBegin.in(pipeline), enclosingPT);

  Create.Values<Long> originalTransform = Create.of(1L);
  TransformHierarchy.Node original =
      hierarchy.pushNode("Create", PBegin.in(pipeline), originalTransform);
  assertThat(hierarchy.getCurrent(), equalTo(original));
  PCollection<Long> originalOutput = pipeline.apply(originalTransform);
  hierarchy.setOutput(originalOutput);
  hierarchy.popNode();
  assertThat(original.finishedSpecifying, is(true));
  hierarchy.setOutput(PDone.in(pipeline));
  hierarchy.popNode();

  assertThat(hierarchy.getCurrent(), not(equalTo(enclosing)));
  Read.Bounded<Long> replacementTransform = Read.from(CountingSource.upTo(1L));
  PCollection<Long> replacementOutput = pipeline.apply(replacementTransform);
  Node replacement = hierarchy.replaceNode(original, PBegin.in(pipeline), replacementTransform);
  assertThat(hierarchy.getCurrent(), equalTo(replacement));
  hierarchy.setOutput(replacementOutput);

  TaggedPValue taggedReplacement = TaggedPValue.ofExpandedValue(replacementOutput);
  Map<PValue, ReplacementOutput> replacementOutputs =
      Collections.singletonMap(
          replacementOutput,
          ReplacementOutput.of(TaggedPValue.ofExpandedValue(originalOutput), taggedReplacement));
  hierarchy.replaceOutputs(replacementOutputs);

  assertThat(replacement.getInputs(), equalTo(original.getInputs()));
  assertThat(replacement.getEnclosingNode(), equalTo(original.getEnclosingNode()));
  assertThat(replacement.getEnclosingNode(), equalTo(enclosing));
  assertThat(replacement.getTransform(), equalTo(replacementTransform));
  // THe tags of the replacement transform are matched to the appropriate PValues of the original
  assertThat(replacement.getOutputs().keySet(), Matchers.contains(taggedReplacement.getTag()));
  assertThat(replacement.getOutputs().values(), Matchers.contains(originalOutput));
  hierarchy.popNode();
}
 
Example 16
Source File: InMemoryQueueIO.java    From component-runtime with Apache License 2.0 4 votes vote down vote up
public static PTransform<PBegin, PCollection<Record>> from(final LoopState state) {
    return Read.from(new UnboundedQueuedInput(state.id));
}
 
Example 17
Source File: ConfigurableHDFSFileSource.java    From components with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a {@code Read} transform that will read from an {@code HDFSFileSource} with the given file name or
 * pattern ("glob") using the given Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}, with
 * key-value types specified by the given key class and value class.
 */
public static <K, V, T extends FileInputFormat<K, V>> Read.Bounded<KV<K, V>> readFrom(String filepattern,
        Class<T> formatClass, Class<K> keyClass, Class<V> valueClass) {
    return Read.from(from(filepattern, formatClass, keyClass, valueClass));
}