org.apache.beam.sdk.io.Read Java Examples

The following examples show how to use org.apache.beam.sdk.io.Read. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: UnboundedReadFromBoundedSourceTest.java From beam with Apache License 2.0

6 votes

@Test
@Category(NeedsRunner.class)
public void testBoundedToUnboundedSourceAdapter() throws Exception {
  long numElements = 100;
  BoundedSource<Long> boundedSource = CountingSource.upTo(numElements);
  UnboundedSource<Long, Checkpoint<Long>> unboundedSource =
      new BoundedToUnboundedSourceAdapter<>(boundedSource);

  PCollection<Long> output = p.apply(Read.from(unboundedSource).withMaxNumRecords(numElements));

  // Count == numElements
  PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(numElements);
  // Unique count == numElements
  PAssert.thatSingleton(output.apply(Distinct.create()).apply("UniqueCount", Count.globally()))
      .isEqualTo(numElements);
  // Min == 0
  PAssert.thatSingleton(output.apply("Min", Min.globally())).isEqualTo(0L);
  // Max == numElements-1
  PAssert.thatSingleton(output.apply("Max", Max.globally())).isEqualTo(numElements - 1);
  p.run();
}

Example #2

Source File: BeamSumDemo.java From scotty-window-processor with Apache License 2.0

6 votes

public static void main(String[] args) {

        PipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().create();
        Pipeline p = Pipeline.create(options);
        System.out.println("Running Pipeline\n " + p.getOptions());

        PCollection<KV<Integer, Integer>> data = p.begin().apply(Read.from(new DataGeneratorSource(0, new TimeStampGenerator())));

        KeyedScottyWindowOperator<Integer, Integer> scottyWindowDoFn = new KeyedScottyWindowOperator<Integer, Integer>(0, new Sum());
        scottyWindowDoFn.addWindow(new TumblingWindow(WindowMeasure.Time, 5000));
        //scottyWindowDoFn.addWindow(new SlidingWindow(WindowMeasure.Time, 2000, 1000));
        //scottyWindowDoFn.addWindow(new SessionWindow(WindowMeasure.Time, 2000));

        //Apply Scotty Windowing
        PCollection<String> result = data.apply(ParDo.of(scottyWindowDoFn));

        //Print window results
        result.apply(ParDo.of(new printObject()));
        p.run().waitUntilFinish();

    }

Example #3

Source File: BigQueryIOIT.java From beam with Apache License 2.0

6 votes

private void testWrite(BigQueryIO.Write<byte[]> writeIO, String metricName) {
  Pipeline pipeline = Pipeline.create(options);

  BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod());
  pipeline
      .apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions)))
      .apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName)))
      .apply("Map records", ParDo.of(new MapKVToV()))
      .apply(
          "Write to BQ",
          writeIO
              .to(tableQualifier)
              .withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempRoot))
              .withMethod(method)
              .withSchema(
                  new TableSchema()
                      .setFields(
                          Collections.singletonList(
                              new TableFieldSchema().setName("data").setType("BYTES")))));

  PipelineResult pipelineResult = pipeline.run();
  pipelineResult.waitUntilFinish();
  extractAndPublishTime(pipelineResult, metricName);
}

Example #4

Source File: DirectGraphVisitorTest.java From beam with Apache License 2.0

6 votes

@Test
public void getRootTransformsContainsRootTransforms() {
  PCollection<String> created = p.apply(Create.of("foo", "bar"));
  PCollection<Long> counted = p.apply(Read.from(CountingSource.upTo(1234L)));
  PCollection<Long> unCounted = p.apply(GenerateSequence.from(0));
  p.traverseTopologically(visitor);
  DirectGraph graph = visitor.getGraph();
  assertThat(graph.getRootTransforms(), hasSize(3));
  assertThat(
      graph.getRootTransforms(),
      Matchers.containsInAnyOrder(
          new Object[] {
            graph.getProducer(created), graph.getProducer(counted), graph.getProducer(unCounted)
          }));
  for (AppliedPTransform<?, ?, ?> root : graph.getRootTransforms()) {
    // Root transforms will have no inputs
    assertThat(root.getInputs().entrySet(), emptyIterable());
    assertThat(
        Iterables.getOnlyElement(root.getOutputs().values()),
        Matchers.<POutput>isOneOf(created, counted, unCounted));
  }
}

Example #5

Source File: MyBeamJob.java From hazelcast-jet-demos with Apache License 2.0

6 votes

public static Pipeline build(PipelineOptions pipelineOptions) {
	
    Pipeline pipeline = Pipeline.create(pipelineOptions);

	pipeline
	.apply("unbounded-source", 
			Read.from(new MyUnboundedSource("beam-input")))
    .apply("reformat-and-timestamp", 
    		ParDo.of(new MyEnrichAndReformatFn()))
	.apply("window",
			 Window.<String>into(FixedWindows.of(ONE_SECOND))
			 .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()))
			 .discardingFiredPanes()
			 .withAllowedLateness(ONE_SECOND)
			)
    .apply("sink",
    		FileIO.<String>write()
    		.via(TextIO.sink())
            .to(".")
            .withPrefix("beam-output")
            .withNumShards(1)
    		)
	;

    return pipeline;
}

Example #6

Source File: BoundedReadEvaluatorFactoryTest.java From beam with Apache License 2.0

6 votes

@Test
public void boundedSourceEvaluatorClosesReader() throws Exception {
  TestSource<Long> source = new TestSource<>(BigEndianLongCoder.of(), 1L, 2L, 3L);
  PCollection<Long> pcollection = p.apply(Read.from(source));
  AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getProducer(pcollection);

  UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection);
  when(context.createBundle(pcollection)).thenReturn(output);

  TransformEvaluator<BoundedSourceShard<Long>> evaluator =
      factory.forApplication(
          sourceTransform, bundleFactory.createRootBundle().commit(Instant.now()));
  evaluator.processElement(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(source)));
  evaluator.finishBundle();
  CommittedBundle<Long> committed = output.commit(Instant.now());
  assertThat(committed.getElements(), containsInAnyOrder(gw(2L), gw(3L), gw(1L)));
  assertThat(TestSource.readerClosed, is(true));
}

Example #7

Source File: TCompBoundedSourceSinkAdapterTest.java From components with Apache License 2.0

6 votes

@Test
public void testSource() {
    Pipeline pipeline = TestPipeline.create();

    FixedFlowProperties fixedFlowProperties = new FixedFlowProperties("fixedFlowProperties");
    fixedFlowProperties.init();
    fixedFlowProperties.data.setValue("a;b;c");
    fixedFlowProperties.rowDelimited.setValue(";");


    FixedFlowSource fixedFlowSource = new FixedFlowSource();
    fixedFlowSource.initialize(null, fixedFlowProperties);

    TCompBoundedSourceAdapter source = new TCompBoundedSourceAdapter(fixedFlowSource);

    PCollection<String> result = pipeline.apply(Read.from(source)).apply(ParDo.of(new DoFn<IndexedRecord, String>() {
        @DoFn.ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(c.element().get(0).toString());
        }
    }));

    PAssert.that(result).containsInAnyOrder(Arrays.asList("a", "b", "c"));

    pipeline.run();
}

Example #8

Source File: BoundedReadEvaluatorFactoryTest.java From beam with Apache License 2.0

6 votes

@Test
public void boundedSourceEvaluatorNoElementsClosesReader() throws Exception {
  TestSource<Long> source = new TestSource<>(BigEndianLongCoder.of());

  PCollection<Long> pcollection = p.apply(Read.from(source));
  AppliedPTransform<?, ?, ?> sourceTransform = DirectGraphs.getProducer(pcollection);

  UncommittedBundle<Long> output = bundleFactory.createBundle(pcollection);
  when(context.createBundle(pcollection)).thenReturn(output);

  TransformEvaluator<BoundedSourceShard<Long>> evaluator =
      factory.forApplication(
          sourceTransform, bundleFactory.createRootBundle().commit(Instant.now()));
  evaluator.processElement(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(source)));
  evaluator.finishBundle();
  CommittedBundle<Long> committed = output.commit(Instant.now());
  assertThat(committed.getElements(), emptyIterable());
  assertThat(TestSource.readerClosed, is(true));
}

Example #9

Source File: SyntheticDataPublisher.java From beam with Apache License 2.0

6 votes

public static void main(String[] args) throws IOException {
  options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

  SyntheticSourceOptions sourceOptions =
      SyntheticOptions.fromJsonString(options.getSourceOptions(), SyntheticSourceOptions.class);

  Pipeline pipeline = Pipeline.create(options);
  PCollection<KV<byte[], byte[]>> syntheticData =
      pipeline.apply("Read synthetic data", Read.from(new SyntheticBoundedSource(sourceOptions)));

  if (options.getKafkaBootstrapServerAddress() != null && options.getKafkaTopic() != null) {
    writeToKafka(syntheticData);
  }
  if (options.getPubSubTopic() != null) {
    writeToPubSub(syntheticData);
  }
  if (allKinesisOptionsConfigured()) {
    writeToKinesis(syntheticData);
  }
  pipeline.run().waitUntilFinish();
}

Example #10

Source File: WorkerCustomSourcesTest.java From beam with Apache License 2.0

6 votes

static com.google.api.services.dataflow.model.Source translateIOToCloudSource(
    BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
  DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
  Pipeline p = Pipeline.create(options);
  p.begin().apply(Read.from(io));

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  SdkComponents sdkComponents = SdkComponents.create();
  RunnerApi.Environment defaultEnvironmentForDataflow =
      Environments.createDockerEnvironment("dummy-image-url");
  sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);

  Job workflow =
      translator
          .translate(p, pipelineProto, sdkComponents, runner, new ArrayList<DataflowPackage>())
          .getJob();
  Step step = workflow.getSteps().get(0);

  return stepToCloudSource(step);
}

Example #11

Source File: KafkaIOTest.java From beam with Apache License 2.0

6 votes

/**
 * Creates a consumer with two topics, with 10 partitions each. numElements are (round-robin)
 * assigned all the 20 partitions.
 */
private static KafkaIO.Read<Integer, Long> mkKafkaReadTransform(
    int numElements,
    int maxNumRecords,
    @Nullable SerializableFunction<KV<Integer, Long>, Instant> timestampFn) {

  List<String> topics = ImmutableList.of("topic_a", "topic_b");

  KafkaIO.Read<Integer, Long> reader =
      KafkaIO.<Integer, Long>read()
          .withBootstrapServers("myServer1:9092,myServer2:9092")
          .withTopics(topics)
          .withConsumerFactoryFn(
              new ConsumerFactoryFn(
                  topics, 10, numElements, OffsetResetStrategy.EARLIEST)) // 20 partitions
          .withKeyDeserializer(IntegerDeserializer.class)
          .withValueDeserializer(LongDeserializer.class)
          .withMaxNumRecords(maxNumRecords);

  if (timestampFn != null) {
    return reader.withTimestampFn(timestampFn);
  } else {
    return reader;
  }
}

Example #12

Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0

6 votes

@Test
public void readSourceTranslatorUnboundedWithMaxParallelism() {

  final int maxParallelism = 6;
  final int parallelism = 2;

  Read.Unbounded transform = Read.from(new TestUnboundedSource());
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.setMaxParallelism(maxParallelism);

  OneInputTransformation<?, ?> sourceTransform =
      (OneInputTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env);

  UnboundedSourceWrapper source =
      (UnboundedSourceWrapper)
          ((SourceTransformation) sourceTransform.getInput()).getOperator().getUserFunction();

  assertEquals(maxParallelism, source.getSplitSources().size());
}

Example #13

Source File: KafkaIOTest.java From beam with Apache License 2.0

6 votes

@Test
public void testUnboundedSourceWithExplicitPartitions() {
  int numElements = 1000;

  List<String> topics = ImmutableList.of("test");

  KafkaIO.Read<byte[], Long> reader =
      KafkaIO.<byte[], Long>read()
          .withBootstrapServers("none")
          .withTopicPartitions(ImmutableList.of(new TopicPartition("test", 5)))
          .withConsumerFactoryFn(
              new ConsumerFactoryFn(
                  topics, 10, numElements, OffsetResetStrategy.EARLIEST)) // 10 partitions
          .withKeyDeserializer(ByteArrayDeserializer.class)
          .withValueDeserializer(LongDeserializer.class)
          .withMaxNumRecords(numElements / 10);

  PCollection<Long> input = p.apply(reader.withoutMetadata()).apply(Values.create());

  // assert that every element is a multiple of 5.
  PAssert.that(input).satisfies(new AssertMultipleOf(5));

  PAssert.thatSingleton(input.apply(Count.globally())).isEqualTo(numElements / 10L);

  p.run();
}

Example #14

Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0

6 votes

@Test
public void readSourceTranslatorBoundedWithoutMaxParallelism() {

  final int parallelism = 2;

  Read.Bounded transform = Read.from(new TestBoundedSource(parallelism));
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);

  SourceTransformation<?> sourceTransform =
      (SourceTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.BOUNDED, env);

  UnboundedSourceWrapperNoValueWithRecordId source =
      (UnboundedSourceWrapperNoValueWithRecordId) sourceTransform.getOperator().getUserFunction();

  assertEquals(parallelism, source.getUnderlyingSource().getSplitSources().size());
}

Example #15

Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0

6 votes

@Test
public void readSourceTranslatorBoundedWithMaxParallelism() {

  final int maxParallelism = 6;
  final int parallelism = 2;

  Read.Bounded transform = Read.from(new TestBoundedSource(maxParallelism));
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.setMaxParallelism(maxParallelism);

  SourceTransformation<?> sourceTransform =
      (SourceTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.BOUNDED, env);

  UnboundedSourceWrapperNoValueWithRecordId source =
      (UnboundedSourceWrapperNoValueWithRecordId) sourceTransform.getOperator().getUserFunction();

  assertEquals(maxParallelism, source.getUnderlyingSource().getSplitSources().size());
}

Example #16

Source File: StreamingTransformTranslator.java From beam with Apache License 2.0

6 votes

private static <T> TransformEvaluator<Read.Unbounded<T>> readUnbounded() {
  return new TransformEvaluator<Read.Unbounded<T>>() {
    @Override
    public void evaluate(Read.Unbounded<T> transform, EvaluationContext context) {
      final String stepName = context.getCurrentTransform().getFullName();
      context.putDataset(
          transform,
          SparkUnboundedSource.read(
              context.getStreamingContext(),
              context.getSerializableOptions(),
              transform.getSource(),
              stepName));
    }

    @Override
    public String toNativeString() {
      return "streamingContext.<readFrom(<source>)>()";
    }
  };
}

Example #17

Source File: TransformTranslator.java From beam with Apache License 2.0

6 votes

private static <T> TransformEvaluator<Read.Bounded<T>> readBounded() {
  return new TransformEvaluator<Read.Bounded<T>>() {
    @Override
    public void evaluate(Read.Bounded<T> transform, EvaluationContext context) {
      String stepName = context.getCurrentTransform().getFullName();
      final JavaSparkContext jsc = context.getSparkContext();
      // create an RDD from a BoundedSource.
      JavaRDD<WindowedValue<T>> input =
          new SourceRDD.Bounded<>(
                  jsc.sc(), transform.getSource(), context.getSerializableOptions(), stepName)
              .toJavaRDD();

      context.putDataset(transform, new BoundedDataset<>(input));
    }

    @Override
    public String toNativeString() {
      return "sparkContext.<readFrom(<source>)>()";
    }
  };
}

Example #18

Source File: KafkaIOTest.java From beam with Apache License 2.0

6 votes

@Test
public void testSourceWithExplicitPartitionsDisplayData() {
  KafkaIO.Read<byte[], byte[]> read =
      KafkaIO.readBytes()
          .withBootstrapServers("myServer1:9092,myServer2:9092")
          .withTopicPartitions(
              ImmutableList.of(new TopicPartition("test", 5), new TopicPartition("test", 6)))
          .withConsumerFactoryFn(
              new ConsumerFactoryFn(
                  Lists.newArrayList("test"),
                  10,
                  10,
                  OffsetResetStrategy.EARLIEST)); // 10 partitions

  DisplayData displayData = DisplayData.from(read);

  assertThat(displayData, hasDisplayItem("topicPartitions", "test-5,test-6"));
  assertThat(displayData, hasDisplayItem("enable.auto.commit", false));
  assertThat(displayData, hasDisplayItem("bootstrap.servers", "myServer1:9092,myServer2:9092"));
  assertThat(displayData, hasDisplayItem("auto.offset.reset", "latest"));
  assertThat(displayData, hasDisplayItem("receive.buffer.bytes", 524288));
}

Example #19

Source File: QueryablePipelineTest.java From beam with Apache License 2.0

6 votes

@Test
public void getEnvironmentWithEnvironment() {
  Pipeline p = Pipeline.create();
  PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
  longs.apply(WithKeys.of("a")).apply("groupByKey", GroupByKey.create());

  Components components = PipelineTranslation.toProto(p).getComponents();
  QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);

  PTransformNode environmentalRead =
      PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead"));
  PTransformNode nonEnvironmentalTransform =
      PipelineNode.pTransform("groupByKey", components.getTransformsOrThrow("groupByKey"));

  assertThat(qp.getEnvironment(environmentalRead).isPresent(), is(true));
  assertThat(
      qp.getEnvironment(environmentalRead).get().getUrn(),
      equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getUrn()));
  assertThat(
      qp.getEnvironment(environmentalRead).get().getPayload(),
      equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getPayload()));
  assertThat(qp.getEnvironment(nonEnvironmentalTransform).isPresent(), is(false));
}

Example #20

Source File: QueryablePipelineTest.java From beam with Apache License 2.0

6 votes

/**
 * Tests that {@link QueryablePipeline#getPerElementConsumers(PCollectionNode)} returns a
 * transform that consumes the node more than once.
 */
@Test
public void perElementConsumersWithConsumingMultipleTimes() {
  Pipeline p = Pipeline.create();
  PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
  PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections());

  Components components = PipelineTranslation.toProto(p).getComponents();
  // This breaks if the way that IDs are assigned to PTransforms changes in PipelineTranslation
  String readOutput =
      getOnlyElement(components.getTransformsOrThrow("BoundedRead").getOutputsMap().values());
  QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
  Set<PTransformNode> consumers =
      qp.getPerElementConsumers(
          PipelineNode.pCollection(readOutput, components.getPcollectionsOrThrow(readOutput)));

  assertThat(consumers.size(), equalTo(1));
  assertThat(
      getOnlyElement(consumers).getTransform().getSpec().getUrn(),
      equalTo(PTransformTranslation.FLATTEN_TRANSFORM_URN));
}

Example #21

Source File: KafkaIOTest.java From beam with Apache License 2.0

6 votes

@Test
public void testUnboundedSourceWithSingleTopic() {
  // same as testUnboundedSource, but with single topic

  int numElements = 1000;
  String topic = "my_topic";

  KafkaIO.Read<Integer, Long> reader =
      KafkaIO.<Integer, Long>read()
          .withBootstrapServers("none")
          .withTopic("my_topic")
          .withConsumerFactoryFn(
              new ConsumerFactoryFn(
                  ImmutableList.of(topic), 10, numElements, OffsetResetStrategy.EARLIEST))
          .withMaxNumRecords(numElements)
          .withKeyDeserializer(IntegerDeserializer.class)
          .withValueDeserializer(LongDeserializer.class);

  PCollection<Long> input = p.apply(reader.withoutMetadata()).apply(Values.create());

  addCountingAsserts(input, numElements);
  p.run();
}

Example #22

Source File: QueryablePipelineTest.java From beam with Apache License 2.0

6 votes

@Test
public void rootTransforms() {
  Pipeline p = Pipeline.create();
  p.apply("UnboundedRead", Read.from(CountingSource.unbounded()))
      .apply(Window.into(FixedWindows.of(Duration.millis(5L))))
      .apply(Count.perElement());
  p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));

  Components components = PipelineTranslation.toProto(p).getComponents();
  QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);

  assertThat(qp.getRootTransforms(), hasSize(2));
  for (PTransformNode rootTransform : qp.getRootTransforms()) {
    assertThat(
        "Root transforms should have no inputs",
        rootTransform.getTransform().getInputsCount(),
        equalTo(0));
    assertThat(
        "Only added source reads to the pipeline",
        rootTransform.getTransform().getSpec().getUrn(),
        equalTo(PTransformTranslation.READ_TRANSFORM_URN));
  }
}

Example #23

Source File: UnconsumedReadsTest.java From beam with Apache License 2.0

6 votes

@Test
public void doesNotConsumeAlreadyConsumedRead() {
  Unbounded<Long> transform = Read.from(CountingSource.unbounded());
  final PCollection<Long> output = pipeline.apply(transform);
  final Flatten.PCollections<Long> consumer = Flatten.pCollections();
  PCollectionList.of(output).apply(consumer);
  UnconsumedReads.ensureAllReadsConsumed(pipeline);
  pipeline.traverseTopologically(
      new PipelineVisitor.Defaults() {
        @Override
        public void visitPrimitiveTransform(Node node) {
          // The output should only be consumed by a single consumer
          if (node.getInputs().values().contains(output)) {
            assertThat(node.getTransform(), Matchers.is(consumer));
          }
        }
      });
}

Example #24

Source File: FlinkStreamingTransformTranslatorsTest.java From beam with Apache License 2.0

6 votes

@Test
public void readSourceTranslatorUnboundedWithoutMaxParallelism() {

  final int parallelism = 2;

  Read.Unbounded transform = Read.from(new TestUnboundedSource());
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);

  OneInputTransformation<?, ?> sourceTransform =
      (OneInputTransformation)
          applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env);

  UnboundedSourceWrapper source =
      (UnboundedSourceWrapper)
          ((SourceTransformation) sourceTransform.getInput()).getOperator().getUserFunction();

  assertEquals(parallelism, source.getSplitSources().size());
}

Example #25

Source File: UnconsumedReadsTest.java From beam with Apache License 2.0

5 votes

@Test
public void matcherProducesUnconsumedValueUnboundedRead() {
  Unbounded<Long> transform = Read.from(CountingSource.unbounded());
  pipeline.apply(transform);
  UnconsumedReads.ensureAllReadsConsumed(pipeline);
  validateConsumed();
}

Example #26

Source File: UnconsumedReadsTest.java From beam with Apache License 2.0

5 votes

@Test
public void matcherProducesUnconsumedValueBoundedRead() {
  Bounded<Long> transform = Read.from(CountingSource.upTo(20L));
  pipeline.apply(transform);
  UnconsumedReads.ensureAllReadsConsumed(pipeline);
  validateConsumed();
}

Example #27

Source File: FixedFlowInputRuntime.java From components with Apache License 2.0

5 votes

@Override
public PCollection<IndexedRecord> expand(PBegin begin) {
    return begin.apply(Read.from(new FixedFlowInputBoundedSource() //
            .withSchema(properties.schemaFlow.schema.getValue())//
            .withValues(properties.values.getValue()) //
            .withNbRows(properties.nbRows.getValue())));
}

Example #28

Source File: DirectRunnerTest.java From beam with Apache License 2.0

5 votes

PTransform<PBegin, PCollection<T>> read() {
  return new PTransform<PBegin, PCollection<T>>() {
    @Override
    public PCollection<T> expand(PBegin input) {
      return input.apply("readFrom:" + name, Read.from(asSource()));
    }
  };
}

Example #29

Source File: QueryablePipelineTest.java From beam with Apache License 2.0

5 votes

@Test
public void getProducer() {
  Pipeline p = Pipeline.create();
  PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
  PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections());

  Components components = PipelineTranslation.toProto(p).getComponents();
  QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);

  String longsOutputName =
      getOnlyElement(
          PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead"))
              .getTransform()
              .getOutputsMap()
              .values());
  PTransformNode longsProducer =
      PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead"));
  PCollectionNode longsOutput =
      PipelineNode.pCollection(
          longsOutputName, components.getPcollectionsOrThrow(longsOutputName));
  String flattenOutputName =
      getOnlyElement(
          PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten"))
              .getTransform()
              .getOutputsMap()
              .values());
  PTransformNode flattenProducer =
      PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten"));
  PCollectionNode flattenOutput =
      PipelineNode.pCollection(
          flattenOutputName, components.getPcollectionsOrThrow(flattenOutputName));

  assertThat(qp.getProducer(longsOutput), equalTo(longsProducer));
  assertThat(qp.getProducer(flattenOutput), equalTo(flattenProducer));
}

Example #30

Source File: DirectRunnerTest.java From beam with Apache License 2.0

5 votes

@Test
public void splitsInputs() {
  Pipeline p = getPipeline();
  PCollection<Long> longs = p.apply(Read.from(MustSplitSource.of(CountingSource.upTo(3))));

  PAssert.that(longs).containsInAnyOrder(0L, 1L, 2L);
  p.run();
}