org.apache.beam.sdk.transforms.Count Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.Count.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigtableReadIT.java From beam with Apache License 2.0 | 6 votes |
@Test public void testE2EBigtableRead() throws Exception { PipelineOptionsFactory.register(BigtableTestOptions.class); BigtableTestOptions options = TestPipeline.testingPipelineOptions().as(BigtableTestOptions.class); String project = options.getBigtableProject(); if (project.equals("")) { project = options.as(GcpOptions.class).getProject(); } BigtableOptions.Builder bigtableOptionsBuilder = new BigtableOptions.Builder().setProjectId(project).setInstanceId(options.getInstanceId()); final String tableId = "BigtableReadTest"; final long numRows = 1000L; Pipeline p = Pipeline.create(options); PCollection<Long> count = p.apply(BigtableIO.read().withBigtableOptions(bigtableOptionsBuilder).withTableId(tableId)) .apply(Count.globally()); PAssert.thatSingleton(count).isEqualTo(numRows); p.run(); }
Example #2
Source File: HadoopFormatIOElasticIT.java From beam with Apache License 2.0 | 6 votes |
/** * This test reads data from the Elasticsearch instance and verifies whether data is read * successfully. */ @Test public void testHifIOWithElastic() throws SecurityException { // Expected hashcode is evaluated during insertion time one time and hardcoded here. final long expectedRowCount = 1000L; String expectedHashCode = "42e254c8689050ed0a617ff5e80ea392"; Configuration conf = getConfiguration(options); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PCollection<Long> count = esData.apply(Count.globally()); PAssert.thatSingleton(count).isEqualTo(expectedRowCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #3
Source File: SqsIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRead() { final SqsClient client = EmbeddedSqsServer.getClient(); final String queueUrl = EmbeddedSqsServer.getQueueUrl(); final PCollection<SqsMessage> output = pipeline.apply( SqsIO.read() .withSqsClientProvider(SqsClientProviderMock.of(client)) .withQueueUrl(queueUrl) .withMaxNumRecords(100)); PAssert.thatSingleton(output.apply(Count.globally())).isEqualTo(100L); for (int i = 0; i < 100; i++) { SendMessageRequest sendMessageRequest = SendMessageRequest.builder().queueUrl(queueUrl).messageBody("This is a test").build(); client.sendMessage(sendMessageRequest); } pipeline.run(); }
Example #4
Source File: RedisIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testReadWithKeyPattern() { List<KV<String, String>> data = buildIncrementalData("pattern", 10); data.forEach(kv -> client.set(kv.getKey(), kv.getValue())); PCollection<KV<String, String>> read = p.apply("Read", RedisIO.read().withEndpoint(REDIS_HOST, port).withKeyPattern("pattern*")); PAssert.that(read).containsInAnyOrder(data); PCollection<KV<String, String>> readNotMatch = p.apply( "ReadNotMatch", RedisIO.read().withEndpoint(REDIS_HOST, port).withKeyPattern("foobar*")); PAssert.thatSingleton(readNotMatch.apply(Count.globally())).isEqualTo(0L); p.run(); }
Example #5
Source File: SnsIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testDataWritesToSNS() { ImmutableList<String> input = ImmutableList.of("message1", "message2"); final PCollection<PublishResponse> results = p.apply(Create.of(input)) .apply( SnsIO.<String>write() .withPublishRequestFn(SnsIOTest::createSampleMessage) .withTopicArn(topicArn) .withRetryConfiguration( SnsIO.RetryConfiguration.create( 5, org.joda.time.Duration.standardMinutes(1))) .withSnsClientProvider(SnsClientMockSuccess::new)); final PCollection<Long> publishedResultsSize = results.apply(Count.globally()); PAssert.that(publishedResultsSize).containsInAnyOrder(ImmutableList.of(2L)); p.run().waitUntilFinish(); }
Example #6
Source File: PubsubToPubsubTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** Tests whether all messages flow through when no filter is provided. */ @Test @Category(NeedsRunner.class) public void testNoInputFilterProvided() { PubsubToPubsub.Options options = TestPipeline.testingPipelineOptions().as(PubsubToPubsub.Options.class); PCollection<Long> pc = pipeline .apply(Create.of(allTestMessages)) .apply(ParDo.of(ExtractAndFilterEventsFn.newBuilder().build())) .apply(Count.globally()); PAssert.thatSingleton(pc).isEqualTo(Long.valueOf(allTestMessages.size())); pipeline.run(options); }
Example #7
Source File: DatastoreConverters.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<String> expand(PBegin begin) { return begin.apply("ReadFromDatastore", DatastoreIO.v1().read() .withProjectId(projectId()) .withLiteralGqlQuery(gqlQuery()) .withNamespace(namespace())) .apply("ParseEntitySchema", ParDo.of(new EntityToSchemaJson())) .apply("CountUniqueSchemas", Count.<String>perElement()) .apply("Jsonify", ParDo.of(new DoFn<KV<String, Long>, String>(){ @ProcessElement public void processElement(ProcessContext c) { JsonObject out = new JsonObject(); out.addProperty("schema", c.element().getKey()); out.addProperty("count", c.element().getValue()); c.output(out.toString()); } })); }
Example #8
Source File: AmqpIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRead() throws Exception { PCollection<Message> output = pipeline.apply( AmqpIO.read() .withMaxNumRecords(100) .withAddresses(Collections.singletonList(broker.getQueueUri("testRead")))); PAssert.thatSingleton(output.apply(Count.globally())).isEqualTo(100L); Messenger sender = Messenger.Factory.create(); sender.start(); for (int i = 0; i < 100; i++) { Message message = Message.Factory.create(); message.setAddress(broker.getQueueUri("testRead")); message.setBody(new AmqpValue("Test " + i)); sender.put(message); sender.send(); } sender.stop(); pipeline.run(); }
Example #9
Source File: ElasticsearchIOTestCommon.java From beam with Apache License 2.0 | 6 votes |
void testRead() throws Exception { if (!useAsITests) { ElasticsearchIOTestUtils.insertTestDocuments(connectionConfiguration, numDocs, restClient); } PCollection<String> output = pipeline.apply( ElasticsearchIO.read() .withConnectionConfiguration(connectionConfiguration) // set to default value, useful just to test parameter passing. .withScrollKeepalive("5m") // set to default value, useful just to test parameter passing. .withBatchSize(100L)); PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(numDocs); pipeline.run(); }
Example #10
Source File: MongoDbIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testFullRead() { PCollection<Document> output = pipeline.apply( MongoDbIO.read() .withUri("mongodb://localhost:" + port) .withDatabase(DATABASE) .withCollection(COLLECTION)); PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo(1000L); PAssert.that( output .apply("Map Scientist", MapElements.via(new DocumentToKVFn())) .apply("Count Scientist", Count.perKey())) .satisfies( input -> { for (KV<String, Long> element : input) { assertEquals(100L, element.getValue().longValue()); } return null; }); pipeline.run(); }
Example #11
Source File: HadoopFormatIOElasticTest.java From beam with Apache License 2.0 | 6 votes |
/** * Test to read data from embedded Elasticsearch instance and verify whether data is read * successfully. */ @Test public void testHifIOWithElastic() { // Expected hashcode is evaluated during insertion time one time and hardcoded here. String expectedHashCode = "a62a85f5f081e3840baf1028d4d6c6bc"; Configuration conf = getConfiguration(); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); PCollection<Long> count = esData.apply(Count.globally()); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PAssert.thatSingleton(count).isEqualTo((long) TEST_DATA_ROW_COUNT); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #12
Source File: PubsubToPubsubTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** Tests whether only the valid messages flow through when a filter is provided. */ @Test @Category(NeedsRunner.class) public void testInputFilterProvided() { PubsubToPubsub.Options options = TestPipeline.testingPipelineOptions().as(PubsubToPubsub.Options.class); PCollection<Long> pc = pipeline .apply(Create.of(allTestMessages)) .apply( ParDo.of( ExtractAndFilterEventsFn.newBuilder() .withFilterKey(options.getFilterKey()) .withFilterValue(options.getFilterValue()) .build())) .apply(Count.globally()); PAssert.thatSingleton(pc).isEqualTo(Long.valueOf(goodTestMessages.size())); options.setFilterKey(ValueProvider.StaticValueProvider.of(FILTER_KEY)); options.setFilterValue(ValueProvider.StaticValueProvider.of(FILTER_VALUE)); pipeline.run(options); }
Example #13
Source File: MongoDBGridFSIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testFullRead() { PCollection<String> output = pipeline.apply( MongoDbGridFSIO.read().withUri("mongodb://localhost:" + port).withDatabase(DATABASE)); PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo(5000L); PAssert.that(output.apply("Count PerElement", Count.perElement())) .satisfies( input -> { for (KV<String, Long> element : input) { assertEquals(500L, element.getValue().longValue()); } return null; }); pipeline.run(); }
Example #14
Source File: JdbcIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRead() { PCollection<TestRow> rows = pipeline.apply( JdbcIO.<TestRow>read() .withFetchSize(12) .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(dataSource)) .withQuery("select name,id from " + readTableName) .withRowMapper(new JdbcTestHelper.CreateTestRowOfNameAndId()) .withCoder(SerializableCoder.of(TestRow.class))); PAssert.thatSingleton(rows.apply("Count All", Count.globally())) .isEqualTo((long) EXPECTED_ROW_COUNT); Iterable<TestRow> expectedValues = TestRow.getExpectedValues(0, EXPECTED_ROW_COUNT); PAssert.that(rows).containsInAnyOrder(expectedValues); pipeline.run(); }
Example #15
Source File: DynamoDBIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWriteDataToDynamo() { final List<WriteRequest> writeRequests = DynamoDBIOTestHelper.generateWriteRequests(numOfItems); final PCollection<Void> output = pipeline .apply(Create.of(writeRequests)) .apply( DynamoDBIO.<WriteRequest>write() .withWriteRequestMapperFn( (SerializableFunction<WriteRequest, KV<String, WriteRequest>>) writeRequest -> KV.of(tableName, writeRequest)) .withRetryConfiguration( DynamoDBIO.RetryConfiguration.create(5, Duration.standardMinutes(1))) .withAwsClientsProvider( AwsClientsProviderMock.of(DynamoDBIOTestHelper.getDynamoDBClient()))); final PCollection<Long> publishedResultsSize = output.apply(Count.globally()); PAssert.that(publishedResultsSize).containsInAnyOrder(0L); pipeline.run().waitUntilFinish(); }
Example #16
Source File: SnsIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCustomCoder() throws Exception { final PublishRequest request1 = createSampleMessage("my_first_message"); final TupleTag<PublishResult> results = new TupleTag<>(); final AmazonSNS amazonSnsSuccess = getAmazonSnsMockSuccess(); final MockCoder mockCoder = new MockCoder(); final PCollectionTuple snsWrites = p.apply(Create.of(request1)) .apply( SnsIO.write() .withTopicName(topicName) .withAWSClientsProvider(new Provider(amazonSnsSuccess)) .withResultOutputTag(results) .withCoder(mockCoder)); final PCollection<Long> publishedResultsSize = snsWrites .get(results) .apply(MapElements.into(TypeDescriptors.strings()).via(result -> result.getMessageId())) .apply(Count.globally()); PAssert.that(publishedResultsSize).containsInAnyOrder(ImmutableList.of(1L)); p.run().waitUntilFinish(); assertThat(mockCoder.captured).isNotNull(); }
Example #17
Source File: SnsIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testDataWritesToSNS() { final PublishRequest request1 = createSampleMessage("my_first_message"); final PublishRequest request2 = createSampleMessage("my_second_message"); final TupleTag<PublishResult> results = new TupleTag<>(); final AmazonSNS amazonSnsSuccess = getAmazonSnsMockSuccess(); final PCollectionTuple snsWrites = p.apply(Create.of(request1, request2)) .apply( SnsIO.write() .withTopicName(topicName) .withRetryConfiguration( SnsIO.RetryConfiguration.create( 5, org.joda.time.Duration.standardMinutes(1))) .withAWSClientsProvider(new Provider(amazonSnsSuccess)) .withResultOutputTag(results)); final PCollection<Long> publishedResultsSize = snsWrites.get(results).apply(Count.globally()); PAssert.that(publishedResultsSize).containsInAnyOrder(ImmutableList.of(2L)); p.run().waitUntilFinish(); }
Example #18
Source File: ImmutabilityEnforcementFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Before public void setup() { factory = new ImmutabilityEnforcementFactory(); bundleFactory = ImmutableListBundleFactory.create(); pcollection = p.apply(Create.of("foo".getBytes(UTF_8), "spamhameggs".getBytes(UTF_8))) .apply( ParDo.of( new DoFn<byte[], byte[]>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { c.element()[0] = 'b'; } })); PCollection<Long> consumer = pcollection.apply(Count.globally()); DirectGraphs.performDirectOverrides(p); this.consumer = DirectGraphs.getProducer(consumer); }
Example #19
Source File: CombineTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCountPerElementWithSlidingWindows() { PCollection<String> input = pipeline .apply( Create.timestamped( TimestampedValue.of("a", new Instant(1)), TimestampedValue.of("a", new Instant(2)), TimestampedValue.of("b", new Instant(3)), TimestampedValue.of("b", new Instant(4)))) .apply(Window.into(SlidingWindows.of(Duration.millis(2)).every(Duration.millis(1)))); PCollection<KV<String, Long>> output = input.apply(Count.perElement()); PAssert.that(output) .containsInAnyOrder( KV.of("a", 1L), KV.of("a", 2L), KV.of("a", 1L), KV.of("b", 1L), KV.of("b", 2L), KV.of("b", 1L)); pipeline.run(); }
Example #20
Source File: HadoopFormatIOCassandraTest.java From beam with Apache License 2.0 | 6 votes |
/** * Test to read data from embedded Cassandra instance and verify whether data is read * successfully. */ @Test public void testHIFReadForCassandra() { // Expected hashcode is evaluated during insertion time one time and hardcoded here. String expectedHashCode = "1b9780833cce000138b9afa25ba63486"; Configuration conf = getConfiguration(); PCollection<KV<Long, String>> cassandraData = p.apply( HadoopFormatIO.<Long, String>read() .withConfiguration(conf) .withValueTranslation(myValueTranslate)); // Verify the count of data retrieved from Cassandra matches expected count. PAssert.thatSingleton(cassandraData.apply("Count", Count.globally())) .isEqualTo(TEST_DATA_ROW_COUNT); PCollection<String> textValues = cassandraData.apply(Values.create()); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); p.run().waitUntilFinish(); }
Example #21
Source File: SparkSimpleFileIOInputRuntimeTestIT.java From components with Apache License 2.0 | 6 votes |
/** * Demonstration using the {@link SparkIntegrationTestResource}. */ @Category(ValidatesRunner.class) @Test public void testSparkIntegrationTestResource() throws IOException { // Use the resource to create the pipeline. final Pipeline p = spark.createPipeline(); // The pipeline transformations to test. PCollection<String> input = p.apply("create", Create.of("a a", "b c", "a a c")); input = input.apply("tokenize", ParDo.of(new ExtractWord())); PCollection<KV<String, Long>> counts = input.apply("count", Count.<String> perElement()); // Check the expected results in the pipeline itself. PAssert.that(counts).containsInAnyOrder(KV.of("a", 4L), KV.of("b", 1L), KV.of("c", 2L)); // Go! p.run().waitUntilFinish(); }
Example #22
Source File: HL7v2IOReadIT.java From beam with Apache License 2.0 | 6 votes |
@Test public void testHL7v2IO_ListHL7v2Messages_filtered() throws Exception { final String adtFilter = "messageType = \"ADT\""; // Should read all messages. Pipeline pipeline = Pipeline.create(); PCollection<HL7v2Message> result = pipeline.apply( HL7v2IO.readWithFilter( healthcareDataset + "/hl7V2Stores/" + HL7V2_STORE_NAME, adtFilter)); PCollection<Long> numReadMessages = result.setCoder(HL7v2MessageCoder.of()).apply(Count.globally()); PAssert.thatSingleton(numReadMessages).isEqualTo(NUM_ADT); PAssert.that(result) .satisfies( input -> { for (HL7v2Message elem : input) { assertEquals("ADT", elem.getMessageType()); } return null; }); pipeline.run(); }
Example #23
Source File: CombineLoadTest.java From beam with Apache License 2.0 | 6 votes |
public PTransform<PCollection<KV<byte[], Long>>, ? extends PCollection> getPerKeyCombiner( CombinerType combinerType) { switch (combinerType) { case MEAN: return Mean.perKey(); case TOP_LARGEST: Preconditions.checkArgument( options.getTopCount() != null, "You should set \"--topCount\" option to use TOP combiners."); return Top.largestPerKey(options.getTopCount()); case SUM: return Sum.longsPerKey(); case COUNT: return Count.perKey(); default: throw new IllegalArgumentException("No such combiner!"); } }
Example #24
Source File: FhirIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void test_FhirIO_failedWrites() { String badBundle = "bad"; List<String> emptyMessages = Collections.singletonList(badBundle); PCollection<String> fhirBundles = pipeline.apply(Create.of(emptyMessages)); FhirIO.Write.Result writeResult = fhirBundles.apply( FhirIO.Write.executeBundles( "projects/foo/locations/us-central1/datasets/bar/hl7V2Stores/baz")); PCollection<HealthcareIOError<String>> failedInserts = writeResult.getFailedBodies(); PAssert.thatSingleton(failedInserts) .satisfies( (HealthcareIOError<String> err) -> { Assert.assertEquals("bad", err.getDataResource()); return null; }); PCollection<Long> numFailedInserts = failedInserts.apply(Count.globally()); PAssert.thatSingleton(numFailedInserts).isEqualTo(1L); pipeline.run(); }
Example #25
Source File: HL7v2IOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void test_HL7v2IO_failedWrites() { Message msg = new Message().setData(""); List<HL7v2Message> emptyMessages = Collections.singletonList(HL7v2Message.fromModel(msg)); PCollection<HL7v2Message> messages = pipeline.apply(Create.of(emptyMessages).withCoder(new HL7v2MessageCoder())); HL7v2IO.Write.Result writeResult = messages.apply( HL7v2IO.ingestMessages( "projects/foo/locations/us-central1/datasets/bar/hl7V2Stores/baz")); PCollection<HealthcareIOError<HL7v2Message>> failedInserts = writeResult.getFailedInsertsWithErr(); PCollection<Long> failedMsgs = failedInserts.apply(Count.globally()); PAssert.thatSingleton(failedMsgs).isEqualTo(1L); pipeline.run(); }
Example #26
Source File: WindowTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ValidatesRunner.class, UsesCustomWindowMerging.class}) public void testMergingCustomWindowsKeyedCollection() { Instant startInstant = new Instant(0L); PCollection<KV<Integer, String>> inputCollection = pipeline.apply( Create.timestamped( TimestampedValue.of( KV.of(0, "big"), startInstant.plus(Duration.standardSeconds(10))), TimestampedValue.of( KV.of(1, "small1"), startInstant.plus(Duration.standardSeconds(20))), // This element is not contained within the bigWindow and not merged TimestampedValue.of( KV.of(2, "small2"), startInstant.plus(Duration.standardSeconds(39))))); PCollection<KV<Integer, String>> windowedCollection = inputCollection.apply(Window.into(new CustomWindowFn<>())); PCollection<Long> count = windowedCollection.apply( Combine.globally(Count.<KV<Integer, String>>combineFn()).withoutDefaults()); // "small1" and "big" elements merged into bigWindow "small2" not merged // because it is not contained in bigWindow PAssert.that("Wrong number of elements in output collection", count).containsInAnyOrder(2L, 1L); pipeline.run(); }
Example #27
Source File: V1ReadIT.java From beam with Apache License 2.0 | 6 votes |
/** * An end-to-end test for {@link DatastoreV1.Read#withQuery(Query)} * * <p>Write some test entities to datastore and then run a pipeline that reads and counts the * total number of entities. Verify that the count matches the number of entities written. */ @Test public void testE2EV1Read() throws Exception { // Read from datastore Query query = V1TestUtil.makeAncestorKindQuery(options.getKind(), options.getNamespace(), ancestor); DatastoreV1.Read read = DatastoreIO.v1() .read() .withProjectId(project) .withQuery(query) .withNamespace(options.getNamespace()); // Count the total number of entities Pipeline p = Pipeline.create(options); PCollection<Long> count = p.apply(read).apply(Count.globally()); PAssert.thatSingleton(count).isEqualTo(numEntities); p.run(); }
Example #28
Source File: GroupTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(NeedsRunner.class) public void testGlobalAggregation() { Collection<Basic> elements = ImmutableList.of( Basic.of("key1", 1, "value1"), Basic.of("key1", 1, "value2"), Basic.of("key2", 2, "value3"), Basic.of("key2", 2, "value4")); PCollection<Long> count = pipeline .apply(Create.of(elements)) .apply(Group.<Basic>globally().aggregate(Count.combineFn())); PAssert.that(count).containsInAnyOrder(4L); pipeline.run(); }
Example #29
Source File: UnboundedReadFromBoundedSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(NeedsRunner.class) public void testBoundedToUnboundedSourceAdapter() throws Exception { long numElements = 100; BoundedSource<Long> boundedSource = CountingSource.upTo(numElements); UnboundedSource<Long, Checkpoint<Long>> unboundedSource = new BoundedToUnboundedSourceAdapter<>(boundedSource); PCollection<Long> output = p.apply(Read.from(unboundedSource).withMaxNumRecords(numElements)); // Count == numElements PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(numElements); // Unique count == numElements PAssert.thatSingleton(output.apply(Distinct.create()).apply("UniqueCount", Count.globally())) .isEqualTo(numElements); // Min == 0 PAssert.thatSingleton(output.apply("Min", Min.globally())).isEqualTo(0L); // Max == numElements-1 PAssert.thatSingleton(output.apply("Max", Max.globally())).isEqualTo(numElements - 1); p.run(); }
Example #30
Source File: HadoopFormatIOElasticTest.java From beam with Apache License 2.0 | 5 votes |
/** * Test to read data from embedded Elasticsearch instance based on query and verify whether data * is read successfully. */ @Test public void testHifIOWithElasticQuery() { long expectedRowCount = 1L; String expectedHashCode = "cfbf3e5c993d44e57535a114e25f782d"; Configuration conf = getConfiguration(); String fieldValue = ELASTIC_TYPE_ID_PREFIX + "2"; String query = "{" + " \"query\": {" + " \"match\" : {" + " \"id\" : {" + " \"query\" : \"" + fieldValue + "\"," + " \"type\" : \"boolean\"" + " }" + " }" + " }" + "}"; conf.set(ConfigurationOptions.ES_QUERY, query); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); PCollection<Long> count = esData.apply(Count.globally()); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PAssert.thatSingleton(count).isEqualTo(expectedRowCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }