org.apache.beam.sdk.testing.PAssert Java Examples
The following examples show how to use
org.apache.beam.sdk.testing.PAssert.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavascriptTextTransformerTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Test {@link TransformTextViaJavascript} passes through data when null ValueProvider as args. * when hasInvocable returns false. */ @Test @Category(NeedsRunner.class) public void testDoFnPassthroughNullValueProvider() { List<String> inJson = Arrays.asList("{\"answerToLife\": 42}"); PCollection<String> transformedJson = pipeline .apply("Create", Create.of(inJson)) .apply( TransformTextViaJavascript.newBuilder() .setFunctionName(null) .setFileSystemPath(null) .build()); PAssert.that(transformedJson).containsInAnyOrder(inJson); pipeline.run(); }
Example #2
Source File: PubsubToAvroTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** Test {@link AvroPubsubMessageRecord} correctly maps the message. */ @Test @Category(NeedsRunner.class) public void testPubsubMessageToArchive() throws Exception { // Create the test input. byte[] payload = "Laces out Dan!".getBytes(); Map<String, String> attributes = ImmutableMap.of("id", "Ace"); PubsubMessage message = new PubsubMessage(payload, attributes); Instant timestamp = Instant.now(); // Apply the ParDo. PCollection<AvroPubsubMessageRecord> results = pipeline .apply(Create.timestamped(TimestampedValue.of(message, timestamp))) .apply(ParDo.of(new PubsubMessageToArchiveDoFn())); // Assert on the results. PAssert.that(results) .containsInAnyOrder( new AvroPubsubMessageRecord(payload, attributes, timestamp.getMillis())); // Run the pipeline. pipeline.run(); }
Example #3
Source File: FileBasedDeadLetterQueueReconsumerTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Test public void testAllFilesAreConsumed() throws IOException { TestStream<String> inputFiles = TestStream.create(StringUtf8Coder.of()) .addElements( createJsonFile("dlqFile1.json", JSON_FILE_CONTENTS_1), createJsonFile("dlqFile2.json", JSON_FILE_CONTENTS_1)) .addElements(createJsonFile("dlqFile3.json", JSON_FILE_CONTENTS_1)) .advanceWatermarkToInfinity(); PCollection<String> jsonData = p.apply(inputFiles) .apply(FileIO.matchAll()) .apply(FileBasedDeadLetterQueueReconsumer.moveAndConsumeMatches()); PAssert.that(jsonData) .containsInAnyOrder( Stream.of(JSON_FILE_CONTENTS_1) .flatMap(line -> Stream.of(line, line, line)) .collect(Collectors.toList())); p.run().waitUntilFinish(); }
Example #4
Source File: DecryptPioneerPayloadsTest.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Test public void testOutput() throws Exception { // minimal test for throughput of a single document ValueProvider<String> metadataLocation = pipeline .newProvider(Resources.getResource("pioneer/metadata-local.json").getPath()); ValueProvider<Boolean> kmsEnabled = pipeline.newProvider(false); ValueProvider<Boolean> decompressPayload = pipeline.newProvider(true); final List<String> input = readTestFiles(Arrays.asList("pioneer/study-foo.ciphertext.json")); PCollection<String> output = pipeline.apply(Create.of(input)) .apply(InputFileFormat.text.decode()) .apply("AddAttributes", MapElements.into(TypeDescriptor.of(PubsubMessage.class)) .via(element -> new PubsubMessage(element.getPayload(), ImmutableMap.of(Attribute.DOCUMENT_NAMESPACE, "telemetry", Attribute.DOCUMENT_TYPE, "pioneer-study", Attribute.DOCUMENT_VERSION, "4")))) .apply(DecryptPioneerPayloads.of(metadataLocation, kmsEnabled, decompressPayload)).output() .apply(OutputFileFormat.text.encode()).apply(ReformatJson.of()); final List<String> expectedMain = readTestFiles(Arrays.asList("pioneer/sample.plaintext.json")); PAssert.that(output).containsInAnyOrder(expectedMain); pipeline.run(); }
Example #5
Source File: UserScoreTest.java From deployment-examples with MIT License | 6 votes |
/** Test that bad input data is dropped appropriately. */ @Test @Category(ValidatesRunner.class) public void testUserScoresBadInput() throws Exception { PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of())); PCollection<KV<String, Integer>> extract = input .apply(ParDo.of(new ParseEventFn())) .apply( MapElements.into( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))); PAssert.that(extract).empty(); p.run().waitUntilFinish(); }
Example #6
Source File: UserScoreTest.java From deployment-examples with MIT License | 6 votes |
/** Tests ExtractAndSumScore("team"). */ @Test @Category(ValidatesRunner.class) public void testTeamScoreSums() throws Exception { PCollection<String> input = p.apply(Create.of(GAME_EVENTS)); PCollection<KV<String, Integer>> output = input .apply(ParDo.of(new ParseEventFn())) // Extract and sum teamname/score pairs from the event data. .apply("ExtractTeamScore", new ExtractAndSumScore("team")); // Check the team score sums. PAssert.that(output).containsInAnyOrder(TEAM_SUMS); p.run().waitUntilFinish(); }
Example #7
Source File: ParsePayloadTest.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Test public void testVersionInPayload() { ValueProvider<String> schemasLocation = pipeline.newProvider("schemas.tar.gz"); // printf '{"version":4}' | base64 -> eyJ2ZXJzaW9uIjo0fQ== String input = "{\"attributeMap\":" // + "{\"document_namespace\":\"telemetry\"" // + ",\"app_name\":\"Firefox\"" // + ",\"document_id\":\"2c3a0767-d84a-4d02-8a92-fa54a3376049\"" // + ",\"document_type\":\"main\"" // + "},\"payload\":\"eyJ2ZXJzaW9uIjo0fQ==\"}"; Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline.apply(Create.of(input)) .apply(InputFileFormat.json.decode()).apply(ParsePayload.of(schemasLocation)); PCollection<String> exceptions = result.failures().apply(MapElements .into(TypeDescriptors.strings()).via(message -> message.getAttribute("exception_class"))); PAssert.that(result.output()).empty(); // If we get a ValidationException here, it means we successfully extracted version from // the payload and found a valid schema; we expect the payload to not validate. PAssert.that(exceptions).containsInAnyOrder("org.everit.json.schema.ValidationException"); pipeline.run(); }
Example #8
Source File: CSVStreamingPipelineTest.java From dlp-dataflow-deidentification with Apache License 2.0 | 6 votes |
@Test public void testCSVContentProcessorDoFn() { List<String> sampleStringList = new ArrayList<String>(); sampleStringList.add("A,a"); sampleStringList.add("B,b"); sampleStringList.add("C,c"); sampleStringList.add("D,c"); PCollection<KV<String, List<String>>> input = pipeline.apply(Create.of(KV.of("test", sampleStringList))); PCollection<KV<String, Table>> outputTables = input.apply( "ContentHandler", ParDo.of(new CSVContentProcessorDoFn(ValueProvider.StaticValueProvider.of(1)))); PCollection<String> outputKeys = outputTables.apply(Keys.create()); PAssert.that(outputKeys).containsInAnyOrder("test_1", "test_2", "test_3"); pipeline.run(); }
Example #9
Source File: BigQuerySinkTest.java From feast with Apache License 2.0 | 6 votes |
@Test public void featureRowCompressShouldPackAndUnpackSuccessfully() { Stream<FeatureRow> stream1 = IntStream.range(0, 1000).mapToObj(i -> generateRow("project/fs")); Stream<FeatureRow> stream2 = IntStream.range(0, 1000).mapToObj(i -> generateRow("project/fs_2")); List<FeatureRow> input = Stream.concat(stream1, stream2).collect(Collectors.toList()); PCollection<FeatureRow> result = p.apply(Create.of(input)) .apply("KV", ParDo.of(new ExtractKV())) .apply(new CompactFeatureRows(1000)) .apply("Flat", ParDo.of(new FlatMap())); PAssert.that(result).containsInAnyOrder(input); p.run(); }
Example #10
Source File: JavascriptTextTransformerTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Test {@link TransformTextViaJavascript} passes through data when null ValueProvider as args. * when hasInvocable returns false. */ @Test @Category(NeedsRunner.class) public void testDoFnPassthroughNullValueProvider() { List<String> inJson = Arrays.asList("{\"answerToLife\": 42}"); PCollection<String> transformedJson = pipeline .apply("Create", Create.of(inJson)) .apply( TransformTextViaJavascript.newBuilder() .setFunctionName(null) .setFileSystemPath(null) .build()); PAssert.that(transformedJson).containsInAnyOrder(inJson); pipeline.run(); }
Example #11
Source File: HashClientInfoTest.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Test public void testOutputIsHashed() { String clientId = "client_id"; String clientIp = "client_ip"; Map<String, String> attributes = ImmutableMap.<String, String>builder() .put(Attribute.CLIENT_ID, clientId).put(Attribute.CLIENT_IP, clientIp).build(); PubsubMessage input = new PubsubMessage("{}".getBytes(StandardCharsets.UTF_8), attributes); PCollection<PubsubMessage> output = pipeline.apply(Create.of(input)).apply(HashClientInfo .of(pipeline.newProvider(ID_HASH_KEY_PATH), pipeline.newProvider(IP_HASH_KEY_PATH))); PAssert.that(output).satisfies((SerializableFunction<Iterable<PubsubMessage>, Void>) input1 -> { for (PubsubMessage message : input1) { Assert.assertNotEquals(message.getAttribute(Attribute.CLIENT_ID), clientId); Assert.assertNotEquals(message.getAttribute(Attribute.CLIENT_IP), clientIp); Assert.assertTrue(HashClientInfo.isHashed(message.getAttribute(Attribute.CLIENT_ID))); Assert.assertTrue(HashClientInfo.isHashed(message.getAttribute(Attribute.CLIENT_IP))); } return null; }); pipeline.run(); }
Example #12
Source File: BigQueryMergerTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Test public void testAutoValueMergeInfoClass() throws Exception { MergeInfo mergeInfo = MergeInfo.create( TIMESTAMP_META_FIELD, DELETED_META_FIELD, TABLE_1, TABLE_2, FULL_COLUMN_LIST, PRIMARY_KEY_COLUMNS); PCollection<KV<String, MergeInfo>> result = pipeline .apply(Create.of(mergeInfo)) .apply( WithKeys.<String, MergeInfo>of(mi -> mi.getReplicaTable()) .withKeyType(TypeDescriptors.strings())) .apply( new TriggerPerKeyOnFixedIntervals<>(Duration.standardMinutes(WINDOW_SIZE_MINUTES))); PAssert.that(result).containsInAnyOrder(KV.of(mergeInfo.getReplicaTable(), mergeInfo)); pipeline.run().waitUntilFinish(); }
Example #13
Source File: JavascriptTextTransformerTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Test {@link TransformTextViaJavascript} returns transformed data when a good javascript * transform given. */ @Test @Category(NeedsRunner.class) public void testDoFnGood() { List<String> inJson = Arrays.asList("{\"answerToLife\": 42}"); List<String> expectedJson = Arrays.asList("{\"answerToLife\":42,\"someProp\":\"someValue\"}"); PCollection<String> transformedJson = pipeline .apply("Create", Create.of(inJson)) .apply( TransformTextViaJavascript.newBuilder() .setFileSystemPath(TRANSFORM_FILE_PATH) .setFunctionName("transform") .build()); PAssert.that(transformedJson).containsInAnyOrder(expectedJson); pipeline.run(); }
Example #14
Source File: MergeStatementBuildingFnTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test public void testTablesBuiltInPipeline() { Pipeline p = Pipeline.create(); PCollection<KV<String, KV<Schema, Schema>>> tableSchemaS = p.apply(Create.of( KV.of(TABLE_1_NAME, KV.of(TABLE_1_PK_SCHEMA, TABLE_1_SCHEMA)), KV.of(TABLE_2_NAME, KV.of(TABLE_2_PK_SCHEMA, TABLE_2_SCHEMA)), KV.of(TABLE_1_NAME, KV.of(TABLE_1_PK_SCHEMA, TABLE_1_SCHEMA)))); PCollection<KV<String, BigQueryAction>> statementsIssued = tableSchemaS .apply(ParDo.of( new MergeStatementBuildingFn(CHANGELOG_DATASET_ID, REPLICA_DATASET_ID, PROJECT_ID))); PCollection<KV<String, Long>> tablesCreatedCount = statementsIssued .apply("GetCreateActions", Filter.by(input -> input.getValue().action.equals(BigQueryAction.CREATE_TABLE))) .apply("CountCreateActions", Count.perKey()); PCollection<KV<String, Long>> tablesMerged = statementsIssued .apply("GetMergeActions", Filter.by(input -> input.getValue().action.equals(BigQueryAction.STATEMENT))) .apply("CountMergeActions", Count.perKey()); PAssert.that(tablesCreatedCount) .containsInAnyOrder( KV.of(TABLE_1_NAME, 1L), KV.of(TABLE_2_NAME, 1L)); PAssert.that(tablesMerged) .containsInAnyOrder( KV.of(TABLE_1_NAME, 2L), KV.of(TABLE_2_NAME, 1L)); p.run().waitUntilFinish(); }
Example #15
Source File: ParsePayloadTest.java From gcp-ingestion with Mozilla Public License 2.0 | 5 votes |
@Test public void testErrors() { ValueProvider<String> schemasLocation = pipeline.newProvider("schemas.tar.gz"); final List<String> input = Arrays.asList( // non-json payload "{\"attributeMap\":" + "{\"document_namespace\":\"eng-workflow\"" + ",\"document_version\":\"1\"" + ",\"document_id\":\"2c3a0767-d84a-4d02-8a92-fa54a3376049\"" + ",\"document_type\":\"hgpush\"" + "},\"payload\":\"\"}", // incomplete attributes "{\"attributeMap\":{\"app_name\":\"Firefox\"" + ",\"app_version\":\"61.0a1\"" + ",\"document_type\":\"main\"},\"payload\":\"e30K\"}", "{\"attributeMap\":{},\"payload\":\"e30K\"}", "{\"attributeMap\":null,\"payload\":\"e30K\"}"); Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline // .apply(Create.of(input)) // .apply(InputFileFormat.json.decode()) // .apply(ParsePayload.of(schemasLocation)); PCollection<String> exceptions = result.failures().apply(MapElements .into(TypeDescriptors.strings()).via(message -> message.getAttribute("exception_class"))); PAssert.that(result.output()).empty(); PAssert.that(exceptions).containsInAnyOrder("java.io.IOException", "com.mozilla.telemetry.ingestion.core.schema.SchemaNotFoundException", "com.mozilla.telemetry.ingestion.core.schema.SchemaNotFoundException", "com.mozilla.telemetry.ingestion.core.schema.SchemaNotFoundException"); pipeline.run(); }
Example #16
Source File: TextImportTransformTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test public void readImportManifestUtfWithBOM() throws Exception { Path f11 = Files.createTempFile("table1-file", "1"); String tempDir = f11.getParent().toString(); Path manifestFile = Files.createTempFile("import-manifest", ".json"); Charset charset = Charset.forName("UTF-8"); try (BufferedWriter writer = Files.newBufferedWriter(manifestFile, charset)) { String jsonString = String.format( "\uFEFF{\"tables\": [" + "{\"table_name\": \"table1\"," + "\"file_patterns\":[\"%s\"]}" + "]}", f11.toString()); writer.write(jsonString, 0, jsonString.length()); } catch (IOException e) { e.printStackTrace(); } ValueProvider<String> importManifest = ValueProvider.StaticValueProvider.of(manifestFile.toString()); PCollectionView<Ddl> ddlView = pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton()); PCollection<KV<String, String>> tableAndFiles = pipeline .apply("Read manifest file", new ReadImportManifest(importManifest)) .apply("Resolve data files", new ResolveDataFiles(importManifest, ddlView)); PAssert.that(tableAndFiles) .containsInAnyOrder( KV.of("table1", f11.toString())); pipeline.run(); }
Example #17
Source File: ErrorConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void transformConvertsBigQueryInsertErrorToPubsubMessageWithTruncatedMessage() throws IOException { GenericRecord expectedRecord = BigQueryConvertersTest.generateNestedAvroRecord(); String errorMessage = Strings.repeat("a", 1000); BigQueryInsertError bigQueryInsertError = getBigQueryInsertError(expectedRecord, errorMessage); ErrorConverters.BigQueryInsertErrorToPubsubMessage<GenericRecord> converter = getConverter(expectedRecord.getSchema(), AvroCoder.of(expectedRecord.getSchema())); PCollection<PubsubMessage> output = pipeline .apply(Create.of(bigQueryInsertError) .withCoder(BigQueryInsertErrorCoder.of())) .apply(converter); // Expecting a truncated message with a truncation indicator suffix. String expectedErrorMessage = Ascii.truncate( bigQueryInsertError.getError().toString(), /* maxLength= */ 512, /* truncationIndicator= */ "..."); PubsubMessage expectedMessage = getPubsubMessage(expectedRecord, expectedErrorMessage); byte[] expectedPayload = expectedMessage.getPayload(); Map<String, String> expectedAttributes = expectedMessage.getAttributeMap(); PAssert.thatSingleton(output) .satisfies(input -> { assertThat(input.getPayload()).isEqualTo(expectedPayload); assertThat(input.getAttributeMap()).isEqualTo(expectedAttributes); return null; }); pipeline.run(); }
Example #18
Source File: CsvConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests {@link CsvConverters.LineToFailsafeJson} converts a line to a {@link FailsafeElement} * correctly using a JSON schema. */ @Test public void testLineToFailsafeJsonNoHeadersJsonSchema() { FailsafeElementCoder<String, String> coder = FAILSAFE_ELEMENT_CODER; CoderRegistry coderRegistry = pipeline.getCoderRegistry(); coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder); PCollection<String> lines = pipeline.apply(Create.of(RECORD_STRING).withCoder(StringUtf8Coder.of())); PCollectionTuple linesTuple = PCollectionTuple.of(CSV_LINES, lines); PCollectionTuple failsafe = linesTuple.apply( "TestLineToFailsafeJson", CsvConverters.LineToFailsafeJson.newBuilder() .setDelimiter(",") .setUdfFileSystemPath(null) .setUdfFunctionName(null) .setJsonSchemaPath(TEST_JSON_SCHEMA__PATH) .setHeaderTag(CSV_HEADERS) .setLineTag(CSV_LINES) .setUdfOutputTag(PROCESSING_OUT) .setUdfDeadletterTag(PROCESSING_DEADLETTER_OUT) .build()); PAssert.that(failsafe.get(PROCESSING_OUT)) .satisfies( collection -> { FailsafeElement<String, String> result = collection.iterator().next(); assertThat(result.getPayload(), is(equalTo(JSON_STRING_RECORD))); return null; }); pipeline.run(); }
Example #19
Source File: LeaderBoardTest.java From deployment-examples with MIT License | 5 votes |
/** * A test where elements arrive behind the watermark (late data), but before the end of the * window. These elements are emitted on time. */ @Test public void testTeamScoresUnobservablyLate() { BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION); TestStream<GameActionInfo> createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)) .advanceWatermarkTo(baseTime) .addElements( event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(8)), event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(5))) .advanceWatermarkTo( baseTime.plus(TEAM_WINDOW_DURATION).minus(Duration.standardMinutes(1))) // These events are late, but the window hasn't closed yet, so the elements are in the // on-time pane .addElements( event(TestUser.RED_TWO, 2, Duration.ZERO), event(TestUser.RED_TWO, 5, Duration.standardMinutes(1)), event(TestUser.BLUE_TWO, 2, Duration.standardSeconds(90)), event(TestUser.RED_TWO, 3, Duration.standardMinutes(3))) .advanceWatermarkTo( baseTime.plus(TEAM_WINDOW_DURATION).plus(Duration.standardMinutes(1))) .advanceWatermarkToInfinity(); PCollection<KV<String, Integer>> teamScores = p.apply(createEvents) .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS)); String blueTeam = TestUser.BLUE_ONE.getTeam(); String redTeam = TestUser.RED_ONE.getTeam(); // The On Time pane contains the late elements that arrived before the end of the window PAssert.that(teamScores) .inOnTimePane(window) .containsInAnyOrder(KV.of(redTeam, 14), KV.of(blueTeam, 13)); p.run().waitUntilFinish(); }
Example #20
Source File: SplunkConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** Test successful conversion of JSON messages with a user provided _metadata. */ @Test @Category(NeedsRunner.class) public void testFailsafeStringToSplunkEventValidSource() { FailsafeElement<String, String> input = FailsafeElement.of( "", "{\n" + "\t\"name\": \"Jim\",\n" + "\t\"_metadata\": {\"source\": \"test-log-name\"}\n" + "}"); pipeline.getCoderRegistry().registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of()); PCollectionTuple tuple = pipeline .apply( Create.of(input) .withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply( SplunkConverters.failsafeStringToSplunkEvent( SPLUNK_EVENT_OUT, SPLUNK_EVENT_DEADLETTER_OUT)); PAssert.that(tuple.get(SPLUNK_EVENT_DEADLETTER_OUT)).empty(); PAssert.that(tuple.get(SPLUNK_EVENT_OUT)) .containsInAnyOrder( SplunkEvent.newBuilder() .withEvent("{\"name\":\"Jim\"}") .withSource("test-log-name") .build()); pipeline.run(); }
Example #21
Source File: StreamingDataGeneratorTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** Tests the {@link MessageGeneratorFn} generates fake data. */ @Test public void testMessageGenerator() throws IOException { // Arrange // String schema = "{" + "\"id\": \"{{uuid()}}\", " + "\"eventTime\": \"{{timestamp()}}\", " + "\"username\": \"{{username()}}\", " + "\"score\": {{integer(0,100)}}" + "}"; File file = tempFolder.newFile(); writeToFile(file.getAbsolutePath(), schema); // Act // PCollection<PubsubMessage> results = pipeline .apply("CreateInput", Create.of(0L)) .apply("GenerateMessage", ParDo.of(new MessageGeneratorFn(file.getAbsolutePath()))); // Assert // PAssert.that(results) .satisfies( input -> { PubsubMessage message = input.iterator().next(); assertThat(message, is(notNullValue())); assertThat(message.getPayload(), is(notNullValue())); assertThat(message.getAttributeMap(), is(notNullValue())); return null; }); pipeline.run(); }
Example #22
Source File: UserScoreTest.java From deployment-examples with MIT License | 5 votes |
/** Test the {@link ParseEventFn} {@link org.apache.beam.sdk.transforms.DoFn}. */ @Test public void testParseEventFn() throws Exception { PCollection<String> input = p.apply(Create.of(GAME_EVENTS)); PCollection<GameActionInfo> output = input.apply(ParDo.of(new ParseEventFn())); PAssert.that(output).containsInAnyOrder(GAME_ACTION_INFO_LIST); p.run().waitUntilFinish(); }
Example #23
Source File: BigQuerySinkTest.java From feast with Apache License 2.0 | 5 votes |
@Test public void expectingJobResult() { FeatureRow featureRow = generateRow("myproject/fs"); TestStream<FeatureRow> featureRowTestStream = TestStream.create(ProtoCoder.of(FeatureRow.class)) .advanceWatermarkTo(Instant.now()) .addElements(featureRow) .advanceWatermarkToInfinity(); jobService.setNumFailuresExpected(3); FeatureSink sink = makeSink( ValueProvider.StaticValueProvider.of(bigQuery), p.apply( "StaticSpecs", Create.of( ImmutableMap.of( FeatureSetReference.of(spec.getProject(), spec.getName(), 1), spec)))); PTransform<PCollection<FeatureRow>, WriteResult> writer = ((BigQueryWrite) sink.writer()).withExpectingResultTime(Duration.standardSeconds(5)); PCollection<FeatureRow> inserts = p.apply(featureRowTestStream).apply(writer).getSuccessfulInserts(); PAssert.that(inserts).containsInAnyOrder(featureRow); p.run(); }
Example #24
Source File: AvroTableFileAsMutationsTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test public void testFileShardingNoSharding() throws Exception { Path path = tmpFolder.newFile("testfile").toPath(); int splitSize = 10000; Files.write(path, new byte[splitSize]); MatchResult.Metadata fileMetadata = MatchResult.Metadata.builder() .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */)) .setIsReadSeekEfficient(true) .setSizeBytes(splitSize) .build(); PAssert.that(runFileShardingPipeline(fileMetadata, splitSize)) .satisfies( input -> { LinkedList<FileShard> shards = Lists.newLinkedList(input); assertThat(shards, hasSize(1)); FileShard shard = shards.getFirst(); assertThat( shard.getFile().getMetadata().resourceId().getFilename(), equalTo("testfile")); assertThat(shard.getTableName(), equalTo("testtable")); assertThat(shard.getRange().getFrom(), equalTo(0L)); assertThat(shard.getRange().getTo(), equalTo(splitSize * 1L)); return null; }); p.run(); }
Example #25
Source File: WordCountTest.java From deployment-examples with MIT License | 5 votes |
/** Example test that tests a specific {@link DoFn}. */ @Test public void testExtractWordsFn() throws Exception { List<String> words = Arrays.asList(" some input words ", " ", " cool ", " foo", " bar"); PCollection<String> output = p.apply(Create.of(words).withCoder(StringUtf8Coder.of())) .apply(ParDo.of(new ExtractWordsFn())); PAssert.that(output).containsInAnyOrder("some", "input", "words", "cool", "foo", "bar"); p.run().waitUntilFinish(); }
Example #26
Source File: WordCountTest.java From deployment-examples with MIT License | 5 votes |
/** Example test that tests a PTransform by using an in-memory input and inspecting the output. */ @Test @Category(ValidatesRunner.class) public void testCountWords() throws Exception { PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of())); PCollection<String> output = input.apply(new CountWords()).apply(MapElements.via(new FormatAsTextFn())); PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY); p.run().waitUntilFinish(); }
Example #27
Source File: GameStatsTest.java From deployment-examples with MIT License | 5 votes |
/** Test the calculation of 'spammy users'. */ @Test @Category(ValidatesRunner.class) public void testCalculateSpammyUsers() throws Exception { PCollection<KV<String, Integer>> input = p.apply(Create.of(USER_SCORES)); PCollection<KV<String, Integer>> output = input.apply(new CalculateSpammyUsers()); // Check the set of spammers. PAssert.that(output).containsInAnyOrder(SPAMMERS); p.run().waitUntilFinish(); }
Example #28
Source File: SplunkConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** Test successful conversion of JSON messages with a user provided host. */ @Test @Category(NeedsRunner.class) public void testFailsafeStringToSplunkEventValidHost() { FailsafeElement<String, String> input = FailsafeElement.of( "", "{\n" + "\t\"name\": \"Jim\",\n" + "\t\"_metadata\": {\"host\": \"test-host\"}\n" + "}"); pipeline.getCoderRegistry().registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of()); PCollectionTuple tuple = pipeline .apply( Create.of(input) .withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply( SplunkConverters.failsafeStringToSplunkEvent( SPLUNK_EVENT_OUT, SPLUNK_EVENT_DEADLETTER_OUT)); PAssert.that(tuple.get(SPLUNK_EVENT_DEADLETTER_OUT)).empty(); PAssert.that(tuple.get(SPLUNK_EVENT_OUT)) .containsInAnyOrder( SplunkEvent.newBuilder().withEvent("{\"name\":\"Jim\"}").withHost("test-host").build()); pipeline.run(); }
Example #29
Source File: StatefulTeamScoreTest.java From deployment-examples with MIT License | 5 votes |
/** * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs * correctly for multiple teams. */ @Test public void testScoreUpdatesPerTeam() { TestStream<KV<String, GameActionInfo>> createEvents = TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) .advanceWatermarkTo(baseTime) .addElements( event(TestUser.RED_ONE, 50, Duration.standardSeconds(10)), event(TestUser.RED_TWO, 50, Duration.standardSeconds(20)), event(TestUser.BLUE_ONE, 70, Duration.standardSeconds(30)), event(TestUser.BLUE_TWO, 80, Duration.standardSeconds(40)), event(TestUser.BLUE_TWO, 50, Duration.standardSeconds(50))) .advanceWatermarkToInfinity(); PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100))); String redTeam = TestUser.RED_ONE.getTeam(); String blueTeam = TestUser.BLUE_ONE.getTeam(); PAssert.that(teamScores) .inWindow(GlobalWindow.INSTANCE) .containsInAnyOrder(KV.of(redTeam, 100), KV.of(blueTeam, 150), KV.of(blueTeam, 200)); p.run().waitUntilFinish(); }
Example #30
Source File: SplunkConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** Test successful conversion of JSON messages with an invalid timestamp. */ @Test @Category(NeedsRunner.class) public void testFailsafeStringToSplunkEventInValidTimestamp() { FailsafeElement<String, String> input = FailsafeElement.of( "", "{\n" + "\t\"name\": \"Jim\",\n" + "\t\"logName\": \"test-log-name\",\n" + "\t\"timestamp\": \"2019-1011:32:26.553Z\"\n" + "}"); pipeline.getCoderRegistry().registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of()); PCollectionTuple tuple = pipeline .apply( Create.of(input) .withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply( SplunkConverters.failsafeStringToSplunkEvent( SPLUNK_EVENT_OUT, SPLUNK_EVENT_DEADLETTER_OUT)); PAssert.that(tuple.get(SPLUNK_EVENT_DEADLETTER_OUT)).empty(); PAssert.that(tuple.get(SPLUNK_EVENT_OUT)) .containsInAnyOrder( SplunkEvent.newBuilder() .withEvent( "{\n" + "\t\"name\": \"Jim\",\n" + "\t\"logName\": \"test-log-name\",\n" + "\t\"timestamp\": \"2019-1011:32:26.553Z\"\n" + "}") .build()); pipeline.run(); }