org.apache.beam.sdk.transforms.Count Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.Count. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigtableReadIT.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testE2EBigtableRead() throws Exception {
  PipelineOptionsFactory.register(BigtableTestOptions.class);
  BigtableTestOptions options =
      TestPipeline.testingPipelineOptions().as(BigtableTestOptions.class);

  String project = options.getBigtableProject();
  if (project.equals("")) {
    project = options.as(GcpOptions.class).getProject();
  }

  BigtableOptions.Builder bigtableOptionsBuilder =
      new BigtableOptions.Builder().setProjectId(project).setInstanceId(options.getInstanceId());

  final String tableId = "BigtableReadTest";
  final long numRows = 1000L;

  Pipeline p = Pipeline.create(options);
  PCollection<Long> count =
      p.apply(BigtableIO.read().withBigtableOptions(bigtableOptionsBuilder).withTableId(tableId))
          .apply(Count.globally());
  PAssert.thatSingleton(count).isEqualTo(numRows);
  p.run();
}
 
Example #2
Source File: HadoopFormatIOElasticIT.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * This test reads data from the Elasticsearch instance and verifies whether data is read
 * successfully.
 */
@Test
public void testHifIOWithElastic() throws SecurityException {
  // Expected hashcode is evaluated during insertion time one time and hardcoded here.
  final long expectedRowCount = 1000L;
  String expectedHashCode = "42e254c8689050ed0a617ff5e80ea392";
  Configuration conf = getConfiguration(options);
  PCollection<KV<Text, LinkedMapWritable>> esData =
      pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
  // Verify that the count of objects fetched using HIFInputFormat IO is correct.
  PCollection<Long> count = esData.apply(Count.globally());
  PAssert.thatSingleton(count).isEqualTo(expectedRowCount);
  PCollection<LinkedMapWritable> values = esData.apply(Values.create());
  PCollection<String> textValues = values.apply(transformFunc);
  // Verify the output values using checksum comparison.
  PCollection<String> consolidatedHashcode =
      textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
  PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
  pipeline.run().waitUntilFinish();
}
 
Example #3
Source File: SqsIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRead() {
  final SqsClient client = EmbeddedSqsServer.getClient();
  final String queueUrl = EmbeddedSqsServer.getQueueUrl();

  final PCollection<SqsMessage> output =
      pipeline.apply(
          SqsIO.read()
              .withSqsClientProvider(SqsClientProviderMock.of(client))
              .withQueueUrl(queueUrl)
              .withMaxNumRecords(100));

  PAssert.thatSingleton(output.apply(Count.globally())).isEqualTo(100L);

  for (int i = 0; i < 100; i++) {
    SendMessageRequest sendMessageRequest =
        SendMessageRequest.builder().queueUrl(queueUrl).messageBody("This is a test").build();
    client.sendMessage(sendMessageRequest);
  }
  pipeline.run();
}
 
Example #4
Source File: RedisIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadWithKeyPattern() {
  List<KV<String, String>> data = buildIncrementalData("pattern", 10);
  data.forEach(kv -> client.set(kv.getKey(), kv.getValue()));

  PCollection<KV<String, String>> read =
      p.apply("Read", RedisIO.read().withEndpoint(REDIS_HOST, port).withKeyPattern("pattern*"));
  PAssert.that(read).containsInAnyOrder(data);

  PCollection<KV<String, String>> readNotMatch =
      p.apply(
          "ReadNotMatch",
          RedisIO.read().withEndpoint(REDIS_HOST, port).withKeyPattern("foobar*"));
  PAssert.thatSingleton(readNotMatch.apply(Count.globally())).isEqualTo(0L);

  p.run();
}
 
Example #5
Source File: SnsIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testDataWritesToSNS() {
  ImmutableList<String> input = ImmutableList.of("message1", "message2");

  final PCollection<PublishResponse> results =
      p.apply(Create.of(input))
          .apply(
              SnsIO.<String>write()
                  .withPublishRequestFn(SnsIOTest::createSampleMessage)
                  .withTopicArn(topicArn)
                  .withRetryConfiguration(
                      SnsIO.RetryConfiguration.create(
                          5, org.joda.time.Duration.standardMinutes(1)))
                  .withSnsClientProvider(SnsClientMockSuccess::new));

  final PCollection<Long> publishedResultsSize = results.apply(Count.globally());
  PAssert.that(publishedResultsSize).containsInAnyOrder(ImmutableList.of(2L));
  p.run().waitUntilFinish();
}
 
Example #6
Source File: PubsubToPubsubTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/** Tests whether all messages flow through when no filter is provided. */
@Test
@Category(NeedsRunner.class)
public void testNoInputFilterProvided() {
  PubsubToPubsub.Options options =
      TestPipeline.testingPipelineOptions().as(PubsubToPubsub.Options.class);
  PCollection<Long> pc =
      pipeline
          .apply(Create.of(allTestMessages))
          .apply(ParDo.of(ExtractAndFilterEventsFn.newBuilder().build()))
          .apply(Count.globally());

  PAssert.thatSingleton(pc).isEqualTo(Long.valueOf(allTestMessages.size()));

  pipeline.run(options);
}
 
Example #7
Source File: DatastoreConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<String> expand(PBegin begin) {
  return begin.apply("ReadFromDatastore",
      DatastoreIO.v1().read()
          .withProjectId(projectId())
          .withLiteralGqlQuery(gqlQuery())
          .withNamespace(namespace()))
      .apply("ParseEntitySchema", ParDo.of(new EntityToSchemaJson()))
      .apply("CountUniqueSchemas", Count.<String>perElement())
      .apply("Jsonify", ParDo.of(new DoFn<KV<String, Long>, String>(){
        @ProcessElement
        public void processElement(ProcessContext c) {
          JsonObject out = new JsonObject();
          out.addProperty("schema", c.element().getKey());
          out.addProperty("count", c.element().getValue());
          c.output(out.toString());
        }
      }));
}
 
Example #8
Source File: AmqpIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRead() throws Exception {
  PCollection<Message> output =
      pipeline.apply(
          AmqpIO.read()
              .withMaxNumRecords(100)
              .withAddresses(Collections.singletonList(broker.getQueueUri("testRead"))));
  PAssert.thatSingleton(output.apply(Count.globally())).isEqualTo(100L);

  Messenger sender = Messenger.Factory.create();
  sender.start();
  for (int i = 0; i < 100; i++) {
    Message message = Message.Factory.create();
    message.setAddress(broker.getQueueUri("testRead"));
    message.setBody(new AmqpValue("Test " + i));
    sender.put(message);
    sender.send();
  }
  sender.stop();

  pipeline.run();
}
 
Example #9
Source File: ElasticsearchIOTestCommon.java    From beam with Apache License 2.0 6 votes vote down vote up
void testRead() throws Exception {
  if (!useAsITests) {
    ElasticsearchIOTestUtils.insertTestDocuments(connectionConfiguration, numDocs, restClient);
  }

  PCollection<String> output =
      pipeline.apply(
          ElasticsearchIO.read()
              .withConnectionConfiguration(connectionConfiguration)
              // set to default value, useful just to test parameter passing.
              .withScrollKeepalive("5m")
              // set to default value, useful just to test parameter passing.
              .withBatchSize(100L));
  PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(numDocs);
  pipeline.run();
}
 
Example #10
Source File: MongoDbIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFullRead() {
  PCollection<Document> output =
      pipeline.apply(
          MongoDbIO.read()
              .withUri("mongodb://localhost:" + port)
              .withDatabase(DATABASE)
              .withCollection(COLLECTION));

  PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo(1000L);

  PAssert.that(
          output
              .apply("Map Scientist", MapElements.via(new DocumentToKVFn()))
              .apply("Count Scientist", Count.perKey()))
      .satisfies(
          input -> {
            for (KV<String, Long> element : input) {
              assertEquals(100L, element.getValue().longValue());
            }
            return null;
          });

  pipeline.run();
}
 
Example #11
Source File: HadoopFormatIOElasticTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Test to read data from embedded Elasticsearch instance and verify whether data is read
 * successfully.
 */
@Test
public void testHifIOWithElastic() {
  // Expected hashcode is evaluated during insertion time one time and hardcoded here.
  String expectedHashCode = "a62a85f5f081e3840baf1028d4d6c6bc";
  Configuration conf = getConfiguration();
  PCollection<KV<Text, LinkedMapWritable>> esData =
      pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
  PCollection<Long> count = esData.apply(Count.globally());
  // Verify that the count of objects fetched using HIFInputFormat IO is correct.
  PAssert.thatSingleton(count).isEqualTo((long) TEST_DATA_ROW_COUNT);
  PCollection<LinkedMapWritable> values = esData.apply(Values.create());
  PCollection<String> textValues = values.apply(transformFunc);
  // Verify the output values using checksum comparison.
  PCollection<String> consolidatedHashcode =
      textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
  PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
  pipeline.run().waitUntilFinish();
}
 
Example #12
Source File: PubsubToPubsubTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/** Tests whether only the valid messages flow through when a filter is provided. */
@Test
@Category(NeedsRunner.class)
public void testInputFilterProvided() {
  PubsubToPubsub.Options options =
      TestPipeline.testingPipelineOptions().as(PubsubToPubsub.Options.class);
  PCollection<Long> pc =
      pipeline
          .apply(Create.of(allTestMessages))
          .apply(
              ParDo.of(
                  ExtractAndFilterEventsFn.newBuilder()
                      .withFilterKey(options.getFilterKey())
                      .withFilterValue(options.getFilterValue())
                      .build()))
          .apply(Count.globally());

  PAssert.thatSingleton(pc).isEqualTo(Long.valueOf(goodTestMessages.size()));

  options.setFilterKey(ValueProvider.StaticValueProvider.of(FILTER_KEY));
  options.setFilterValue(ValueProvider.StaticValueProvider.of(FILTER_VALUE));

  pipeline.run(options);
}
 
Example #13
Source File: MongoDBGridFSIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFullRead() {
  PCollection<String> output =
      pipeline.apply(
          MongoDbGridFSIO.read().withUri("mongodb://localhost:" + port).withDatabase(DATABASE));

  PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo(5000L);

  PAssert.that(output.apply("Count PerElement", Count.perElement()))
      .satisfies(
          input -> {
            for (KV<String, Long> element : input) {
              assertEquals(500L, element.getValue().longValue());
            }
            return null;
          });

  pipeline.run();
}
 
Example #14
Source File: JdbcIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRead() {
  PCollection<TestRow> rows =
      pipeline.apply(
          JdbcIO.<TestRow>read()
              .withFetchSize(12)
              .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(dataSource))
              .withQuery("select name,id from " + readTableName)
              .withRowMapper(new JdbcTestHelper.CreateTestRowOfNameAndId())
              .withCoder(SerializableCoder.of(TestRow.class)));

  PAssert.thatSingleton(rows.apply("Count All", Count.globally()))
      .isEqualTo((long) EXPECTED_ROW_COUNT);

  Iterable<TestRow> expectedValues = TestRow.getExpectedValues(0, EXPECTED_ROW_COUNT);
  PAssert.that(rows).containsInAnyOrder(expectedValues);

  pipeline.run();
}
 
Example #15
Source File: DynamoDBIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteDataToDynamo() {
  final List<WriteRequest> writeRequests = DynamoDBIOTestHelper.generateWriteRequests(numOfItems);

  final PCollection<Void> output =
      pipeline
          .apply(Create.of(writeRequests))
          .apply(
              DynamoDBIO.<WriteRequest>write()
                  .withWriteRequestMapperFn(
                      (SerializableFunction<WriteRequest, KV<String, WriteRequest>>)
                          writeRequest -> KV.of(tableName, writeRequest))
                  .withRetryConfiguration(
                      DynamoDBIO.RetryConfiguration.create(5, Duration.standardMinutes(1)))
                  .withAwsClientsProvider(
                      AwsClientsProviderMock.of(DynamoDBIOTestHelper.getDynamoDBClient())));

  final PCollection<Long> publishedResultsSize = output.apply(Count.globally());
  PAssert.that(publishedResultsSize).containsInAnyOrder(0L);

  pipeline.run().waitUntilFinish();
}
 
Example #16
Source File: SnsIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCustomCoder() throws Exception {
  final PublishRequest request1 = createSampleMessage("my_first_message");

  final TupleTag<PublishResult> results = new TupleTag<>();
  final AmazonSNS amazonSnsSuccess = getAmazonSnsMockSuccess();
  final MockCoder mockCoder = new MockCoder();

  final PCollectionTuple snsWrites =
      p.apply(Create.of(request1))
          .apply(
              SnsIO.write()
                  .withTopicName(topicName)
                  .withAWSClientsProvider(new Provider(amazonSnsSuccess))
                  .withResultOutputTag(results)
                  .withCoder(mockCoder));

  final PCollection<Long> publishedResultsSize =
      snsWrites
          .get(results)
          .apply(MapElements.into(TypeDescriptors.strings()).via(result -> result.getMessageId()))
          .apply(Count.globally());
  PAssert.that(publishedResultsSize).containsInAnyOrder(ImmutableList.of(1L));
  p.run().waitUntilFinish();
  assertThat(mockCoder.captured).isNotNull();
}
 
Example #17
Source File: SnsIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testDataWritesToSNS() {
  final PublishRequest request1 = createSampleMessage("my_first_message");
  final PublishRequest request2 = createSampleMessage("my_second_message");

  final TupleTag<PublishResult> results = new TupleTag<>();
  final AmazonSNS amazonSnsSuccess = getAmazonSnsMockSuccess();

  final PCollectionTuple snsWrites =
      p.apply(Create.of(request1, request2))
          .apply(
              SnsIO.write()
                  .withTopicName(topicName)
                  .withRetryConfiguration(
                      SnsIO.RetryConfiguration.create(
                          5, org.joda.time.Duration.standardMinutes(1)))
                  .withAWSClientsProvider(new Provider(amazonSnsSuccess))
                  .withResultOutputTag(results));

  final PCollection<Long> publishedResultsSize = snsWrites.get(results).apply(Count.globally());
  PAssert.that(publishedResultsSize).containsInAnyOrder(ImmutableList.of(2L));
  p.run().waitUntilFinish();
}
 
Example #18
Source File: ImmutabilityEnforcementFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() {
  factory = new ImmutabilityEnforcementFactory();
  bundleFactory = ImmutableListBundleFactory.create();
  pcollection =
      p.apply(Create.of("foo".getBytes(UTF_8), "spamhameggs".getBytes(UTF_8)))
          .apply(
              ParDo.of(
                  new DoFn<byte[], byte[]>() {
                    @ProcessElement
                    public void processElement(ProcessContext c) throws Exception {
                      c.element()[0] = 'b';
                    }
                  }));
  PCollection<Long> consumer = pcollection.apply(Count.globally());
  DirectGraphs.performDirectOverrides(p);
  this.consumer = DirectGraphs.getProducer(consumer);
}
 
Example #19
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCountPerElementWithSlidingWindows() {
  PCollection<String> input =
      pipeline
          .apply(
              Create.timestamped(
                  TimestampedValue.of("a", new Instant(1)),
                  TimestampedValue.of("a", new Instant(2)),
                  TimestampedValue.of("b", new Instant(3)),
                  TimestampedValue.of("b", new Instant(4))))
          .apply(Window.into(SlidingWindows.of(Duration.millis(2)).every(Duration.millis(1))));
  PCollection<KV<String, Long>> output = input.apply(Count.perElement());
  PAssert.that(output)
      .containsInAnyOrder(
          KV.of("a", 1L),
          KV.of("a", 2L),
          KV.of("a", 1L),
          KV.of("b", 1L),
          KV.of("b", 2L),
          KV.of("b", 1L));
  pipeline.run();
}
 
Example #20
Source File: HadoopFormatIOCassandraTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Test to read data from embedded Cassandra instance and verify whether data is read
 * successfully.
 */
@Test
public void testHIFReadForCassandra() {
  // Expected hashcode is evaluated during insertion time one time and hardcoded here.
  String expectedHashCode = "1b9780833cce000138b9afa25ba63486";
  Configuration conf = getConfiguration();
  PCollection<KV<Long, String>> cassandraData =
      p.apply(
          HadoopFormatIO.<Long, String>read()
              .withConfiguration(conf)
              .withValueTranslation(myValueTranslate));
  // Verify the count of data retrieved from Cassandra matches expected count.
  PAssert.thatSingleton(cassandraData.apply("Count", Count.globally()))
      .isEqualTo(TEST_DATA_ROW_COUNT);
  PCollection<String> textValues = cassandraData.apply(Values.create());
  // Verify the output values using checksum comparison.
  PCollection<String> consolidatedHashcode =
      textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
  PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
  p.run().waitUntilFinish();
}
 
Example #21
Source File: SparkSimpleFileIOInputRuntimeTestIT.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Demonstration using the {@link SparkIntegrationTestResource}.
 */
@Category(ValidatesRunner.class)
@Test
public void testSparkIntegrationTestResource() throws IOException {
    // Use the resource to create the pipeline.
    final Pipeline p = spark.createPipeline();

    // The pipeline transformations to test.
    PCollection<String> input = p.apply("create", Create.of("a a", "b c", "a a c"));
    input = input.apply("tokenize", ParDo.of(new ExtractWord()));
    PCollection<KV<String, Long>> counts = input.apply("count", Count.<String> perElement());

    // Check the expected results in the pipeline itself.
    PAssert.that(counts).containsInAnyOrder(KV.of("a", 4L), KV.of("b", 1L), KV.of("c", 2L));

    // Go!
    p.run().waitUntilFinish();
}
 
Example #22
Source File: HL7v2IOReadIT.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testHL7v2IO_ListHL7v2Messages_filtered() throws Exception {
  final String adtFilter = "messageType = \"ADT\"";
  // Should read all messages.
  Pipeline pipeline = Pipeline.create();
  PCollection<HL7v2Message> result =
      pipeline.apply(
          HL7v2IO.readWithFilter(
              healthcareDataset + "/hl7V2Stores/" + HL7V2_STORE_NAME, adtFilter));
  PCollection<Long> numReadMessages =
      result.setCoder(HL7v2MessageCoder.of()).apply(Count.globally());
  PAssert.thatSingleton(numReadMessages).isEqualTo(NUM_ADT);

  PAssert.that(result)
      .satisfies(
          input -> {
            for (HL7v2Message elem : input) {
              assertEquals("ADT", elem.getMessageType());
            }
            return null;
          });

  pipeline.run();
}
 
Example #23
Source File: CombineLoadTest.java    From beam with Apache License 2.0 6 votes vote down vote up
public PTransform<PCollection<KV<byte[], Long>>, ? extends PCollection> getPerKeyCombiner(
    CombinerType combinerType) {
  switch (combinerType) {
    case MEAN:
      return Mean.perKey();
    case TOP_LARGEST:
      Preconditions.checkArgument(
          options.getTopCount() != null,
          "You should set \"--topCount\" option to use TOP combiners.");
      return Top.largestPerKey(options.getTopCount());
    case SUM:
      return Sum.longsPerKey();
    case COUNT:
      return Count.perKey();
    default:
      throw new IllegalArgumentException("No such combiner!");
  }
}
 
Example #24
Source File: FhirIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void test_FhirIO_failedWrites() {
  String badBundle = "bad";
  List<String> emptyMessages = Collections.singletonList(badBundle);

  PCollection<String> fhirBundles = pipeline.apply(Create.of(emptyMessages));

  FhirIO.Write.Result writeResult =
      fhirBundles.apply(
          FhirIO.Write.executeBundles(
              "projects/foo/locations/us-central1/datasets/bar/hl7V2Stores/baz"));

  PCollection<HealthcareIOError<String>> failedInserts = writeResult.getFailedBodies();

  PAssert.thatSingleton(failedInserts)
      .satisfies(
          (HealthcareIOError<String> err) -> {
            Assert.assertEquals("bad", err.getDataResource());
            return null;
          });
  PCollection<Long> numFailedInserts = failedInserts.apply(Count.globally());

  PAssert.thatSingleton(numFailedInserts).isEqualTo(1L);

  pipeline.run();
}
 
Example #25
Source File: HL7v2IOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void test_HL7v2IO_failedWrites() {
  Message msg = new Message().setData("");
  List<HL7v2Message> emptyMessages = Collections.singletonList(HL7v2Message.fromModel(msg));

  PCollection<HL7v2Message> messages =
      pipeline.apply(Create.of(emptyMessages).withCoder(new HL7v2MessageCoder()));

  HL7v2IO.Write.Result writeResult =
      messages.apply(
          HL7v2IO.ingestMessages(
              "projects/foo/locations/us-central1/datasets/bar/hl7V2Stores/baz"));

  PCollection<HealthcareIOError<HL7v2Message>> failedInserts =
      writeResult.getFailedInsertsWithErr();

  PCollection<Long> failedMsgs = failedInserts.apply(Count.globally());

  PAssert.thatSingleton(failedMsgs).isEqualTo(1L);

  pipeline.run();
}
 
Example #26
Source File: WindowTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesCustomWindowMerging.class})
public void testMergingCustomWindowsKeyedCollection() {
  Instant startInstant = new Instant(0L);
  PCollection<KV<Integer, String>> inputCollection =
      pipeline.apply(
          Create.timestamped(
              TimestampedValue.of(
                  KV.of(0, "big"), startInstant.plus(Duration.standardSeconds(10))),
              TimestampedValue.of(
                  KV.of(1, "small1"), startInstant.plus(Duration.standardSeconds(20))),
              // This element is not contained within the bigWindow and not merged
              TimestampedValue.of(
                  KV.of(2, "small2"), startInstant.plus(Duration.standardSeconds(39)))));
  PCollection<KV<Integer, String>> windowedCollection =
      inputCollection.apply(Window.into(new CustomWindowFn<>()));
  PCollection<Long> count =
      windowedCollection.apply(
          Combine.globally(Count.<KV<Integer, String>>combineFn()).withoutDefaults());
  // "small1" and "big" elements merged into bigWindow "small2" not merged
  // because it is not contained in bigWindow
  PAssert.that("Wrong number of elements in output collection", count).containsInAnyOrder(2L, 1L);
  pipeline.run();
}
 
Example #27
Source File: V1ReadIT.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * An end-to-end test for {@link DatastoreV1.Read#withQuery(Query)}
 *
 * <p>Write some test entities to datastore and then run a pipeline that reads and counts the
 * total number of entities. Verify that the count matches the number of entities written.
 */
@Test
public void testE2EV1Read() throws Exception {
  // Read from datastore
  Query query =
      V1TestUtil.makeAncestorKindQuery(options.getKind(), options.getNamespace(), ancestor);

  DatastoreV1.Read read =
      DatastoreIO.v1()
          .read()
          .withProjectId(project)
          .withQuery(query)
          .withNamespace(options.getNamespace());

  // Count the total number of entities
  Pipeline p = Pipeline.create(options);
  PCollection<Long> count = p.apply(read).apply(Count.globally());

  PAssert.thatSingleton(count).isEqualTo(numEntities);
  p.run();
}
 
Example #28
Source File: GroupTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testGlobalAggregation() {
  Collection<Basic> elements =
      ImmutableList.of(
          Basic.of("key1", 1, "value1"),
          Basic.of("key1", 1, "value2"),
          Basic.of("key2", 2, "value3"),
          Basic.of("key2", 2, "value4"));
  PCollection<Long> count =
      pipeline
          .apply(Create.of(elements))
          .apply(Group.<Basic>globally().aggregate(Count.combineFn()));
  PAssert.that(count).containsInAnyOrder(4L);

  pipeline.run();
}
 
Example #29
Source File: UnboundedReadFromBoundedSourceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testBoundedToUnboundedSourceAdapter() throws Exception {
  long numElements = 100;
  BoundedSource<Long> boundedSource = CountingSource.upTo(numElements);
  UnboundedSource<Long, Checkpoint<Long>> unboundedSource =
      new BoundedToUnboundedSourceAdapter<>(boundedSource);

  PCollection<Long> output = p.apply(Read.from(unboundedSource).withMaxNumRecords(numElements));

  // Count == numElements
  PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(numElements);
  // Unique count == numElements
  PAssert.thatSingleton(output.apply(Distinct.create()).apply("UniqueCount", Count.globally()))
      .isEqualTo(numElements);
  // Min == 0
  PAssert.thatSingleton(output.apply("Min", Min.globally())).isEqualTo(0L);
  // Max == numElements-1
  PAssert.thatSingleton(output.apply("Max", Max.globally())).isEqualTo(numElements - 1);
  p.run();
}
 
Example #30
Source File: HadoopFormatIOElasticTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Test to read data from embedded Elasticsearch instance based on query and verify whether data
 * is read successfully.
 */
@Test
public void testHifIOWithElasticQuery() {
  long expectedRowCount = 1L;
  String expectedHashCode = "cfbf3e5c993d44e57535a114e25f782d";
  Configuration conf = getConfiguration();
  String fieldValue = ELASTIC_TYPE_ID_PREFIX + "2";
  String query =
      "{"
          + "  \"query\": {"
          + "  \"match\" : {"
          + "    \"id\" : {"
          + "      \"query\" : \""
          + fieldValue
          + "\","
          + "      \"type\" : \"boolean\""
          + "    }"
          + "  }"
          + "  }"
          + "}";
  conf.set(ConfigurationOptions.ES_QUERY, query);
  PCollection<KV<Text, LinkedMapWritable>> esData =
      pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
  PCollection<Long> count = esData.apply(Count.globally());
  // Verify that the count of objects fetched using HIFInputFormat IO is correct.
  PAssert.thatSingleton(count).isEqualTo(expectedRowCount);
  PCollection<LinkedMapWritable> values = esData.apply(Values.create());
  PCollection<String> textValues = values.apply(transformFunc);
  // Verify the output values using checksum comparison.
  PCollection<String> consolidatedHashcode =
      textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
  PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
  pipeline.run().waitUntilFinish();
}