org.apache.beam.sdk.testing.PAssert Java Examples

The following examples show how to use org.apache.beam.sdk.testing.PAssert. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavascriptTextTransformerTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Test {@link TransformTextViaJavascript} passes through data when null ValueProvider as args.
 * when hasInvocable returns false.
 */
@Test
@Category(NeedsRunner.class)
public void testDoFnPassthroughNullValueProvider() {
  List<String> inJson = Arrays.asList("{\"answerToLife\":    42}");

  PCollection<String> transformedJson =
      pipeline
          .apply("Create", Create.of(inJson))
          .apply(
              TransformTextViaJavascript.newBuilder()
                  .setFunctionName(null)
                  .setFileSystemPath(null)
                  .build());

  PAssert.that(transformedJson).containsInAnyOrder(inJson);

  pipeline.run();
}
 
Example #2
Source File: PubsubToAvroTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/** Test {@link AvroPubsubMessageRecord} correctly maps the message. */
@Test
@Category(NeedsRunner.class)
public void testPubsubMessageToArchive() throws Exception {
  // Create the test input.
  byte[] payload = "Laces out Dan!".getBytes();
  Map<String, String> attributes = ImmutableMap.of("id", "Ace");

  PubsubMessage message = new PubsubMessage(payload, attributes);
  Instant timestamp = Instant.now();

  // Apply the ParDo.
  PCollection<AvroPubsubMessageRecord> results =
      pipeline
          .apply(Create.timestamped(TimestampedValue.of(message, timestamp)))
          .apply(ParDo.of(new PubsubMessageToArchiveDoFn()));

  // Assert on the results.
  PAssert.that(results)
      .containsInAnyOrder(
          new AvroPubsubMessageRecord(payload, attributes, timestamp.getMillis()));

  // Run the pipeline.
  pipeline.run();
}
 
Example #3
Source File: FileBasedDeadLetterQueueReconsumerTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Test
public void testAllFilesAreConsumed() throws IOException {
  TestStream<String> inputFiles = TestStream.create(StringUtf8Coder.of())
      .addElements(
          createJsonFile("dlqFile1.json", JSON_FILE_CONTENTS_1),
          createJsonFile("dlqFile2.json", JSON_FILE_CONTENTS_1))
      .addElements(createJsonFile("dlqFile3.json", JSON_FILE_CONTENTS_1))
      .advanceWatermarkToInfinity();

  PCollection<String> jsonData = p.apply(inputFiles)
      .apply(FileIO.matchAll())
      .apply(FileBasedDeadLetterQueueReconsumer.moveAndConsumeMatches());

  PAssert.that(jsonData)
      .containsInAnyOrder(
          Stream.of(JSON_FILE_CONTENTS_1)
              .flatMap(line -> Stream.of(line, line, line))
              .collect(Collectors.toList()));

  p.run().waitUntilFinish();
}
 
Example #4
Source File: DecryptPioneerPayloadsTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test
public void testOutput() throws Exception {
  // minimal test for throughput of a single document
  ValueProvider<String> metadataLocation = pipeline
      .newProvider(Resources.getResource("pioneer/metadata-local.json").getPath());
  ValueProvider<Boolean> kmsEnabled = pipeline.newProvider(false);
  ValueProvider<Boolean> decompressPayload = pipeline.newProvider(true);

  final List<String> input = readTestFiles(Arrays.asList("pioneer/study-foo.ciphertext.json"));
  PCollection<String> output = pipeline.apply(Create.of(input))
      .apply(InputFileFormat.text.decode())
      .apply("AddAttributes", MapElements.into(TypeDescriptor.of(PubsubMessage.class))
          .via(element -> new PubsubMessage(element.getPayload(),
              ImmutableMap.of(Attribute.DOCUMENT_NAMESPACE, "telemetry", Attribute.DOCUMENT_TYPE,
                  "pioneer-study", Attribute.DOCUMENT_VERSION, "4"))))
      .apply(DecryptPioneerPayloads.of(metadataLocation, kmsEnabled, decompressPayload)).output()
      .apply(OutputFileFormat.text.encode()).apply(ReformatJson.of());

  final List<String> expectedMain = readTestFiles(Arrays.asList("pioneer/sample.plaintext.json"));
  PAssert.that(output).containsInAnyOrder(expectedMain);

  pipeline.run();
}
 
Example #5
Source File: UserScoreTest.java    From deployment-examples with MIT License 6 votes vote down vote up
/** Test that bad input data is dropped appropriately. */
@Test
@Category(ValidatesRunner.class)
public void testUserScoresBadInput() throws Exception {

  PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of()));

  PCollection<KV<String, Integer>> extract =
      input
          .apply(ParDo.of(new ParseEventFn()))
          .apply(
              MapElements.into(
                      TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers()))
                  .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));

  PAssert.that(extract).empty();

  p.run().waitUntilFinish();
}
 
Example #6
Source File: UserScoreTest.java    From deployment-examples with MIT License 6 votes vote down vote up
/** Tests ExtractAndSumScore("team"). */
@Test
@Category(ValidatesRunner.class)
public void testTeamScoreSums() throws Exception {

  PCollection<String> input = p.apply(Create.of(GAME_EVENTS));

  PCollection<KV<String, Integer>> output =
      input
          .apply(ParDo.of(new ParseEventFn()))
          // Extract and sum teamname/score pairs from the event data.
          .apply("ExtractTeamScore", new ExtractAndSumScore("team"));

  // Check the team score sums.
  PAssert.that(output).containsInAnyOrder(TEAM_SUMS);

  p.run().waitUntilFinish();
}
 
Example #7
Source File: ParsePayloadTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test
public void testVersionInPayload() {
  ValueProvider<String> schemasLocation = pipeline.newProvider("schemas.tar.gz");

  // printf '{"version":4}' | base64 -> eyJ2ZXJzaW9uIjo0fQ==
  String input = "{\"attributeMap\":" //
      + "{\"document_namespace\":\"telemetry\"" //
      + ",\"app_name\":\"Firefox\"" //
      + ",\"document_id\":\"2c3a0767-d84a-4d02-8a92-fa54a3376049\"" //
      + ",\"document_type\":\"main\"" //
      + "},\"payload\":\"eyJ2ZXJzaW9uIjo0fQ==\"}";

  Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline.apply(Create.of(input))
      .apply(InputFileFormat.json.decode()).apply(ParsePayload.of(schemasLocation));

  PCollection<String> exceptions = result.failures().apply(MapElements
      .into(TypeDescriptors.strings()).via(message -> message.getAttribute("exception_class")));

  PAssert.that(result.output()).empty();

  // If we get a ValidationException here, it means we successfully extracted version from
  // the payload and found a valid schema; we expect the payload to not validate.
  PAssert.that(exceptions).containsInAnyOrder("org.everit.json.schema.ValidationException");

  pipeline.run();
}
 
Example #8
Source File: CSVStreamingPipelineTest.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@Test
public void testCSVContentProcessorDoFn() {
  List<String> sampleStringList = new ArrayList<String>();
  sampleStringList.add("A,a");
  sampleStringList.add("B,b");
  sampleStringList.add("C,c");
  sampleStringList.add("D,c");

  PCollection<KV<String, List<String>>> input =
      pipeline.apply(Create.of(KV.of("test", sampleStringList)));

  PCollection<KV<String, Table>> outputTables =
      input.apply(
          "ContentHandler",
          ParDo.of(new CSVContentProcessorDoFn(ValueProvider.StaticValueProvider.of(1))));

  PCollection<String> outputKeys = outputTables.apply(Keys.create());
  PAssert.that(outputKeys).containsInAnyOrder("test_1", "test_2", "test_3");
  pipeline.run();
}
 
Example #9
Source File: BigQuerySinkTest.java    From feast with Apache License 2.0 6 votes vote down vote up
@Test
public void featureRowCompressShouldPackAndUnpackSuccessfully() {
  Stream<FeatureRow> stream1 = IntStream.range(0, 1000).mapToObj(i -> generateRow("project/fs"));
  Stream<FeatureRow> stream2 =
      IntStream.range(0, 1000).mapToObj(i -> generateRow("project/fs_2"));

  List<FeatureRow> input = Stream.concat(stream1, stream2).collect(Collectors.toList());

  PCollection<FeatureRow> result =
      p.apply(Create.of(input))
          .apply("KV", ParDo.of(new ExtractKV()))
          .apply(new CompactFeatureRows(1000))
          .apply("Flat", ParDo.of(new FlatMap()));

  PAssert.that(result).containsInAnyOrder(input);
  p.run();
}
 
Example #10
Source File: JavascriptTextTransformerTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Test {@link TransformTextViaJavascript} passes through data when null ValueProvider as args.
 * when hasInvocable returns false.
 */
@Test
@Category(NeedsRunner.class)
public void testDoFnPassthroughNullValueProvider() {
  List<String> inJson = Arrays.asList("{\"answerToLife\":    42}");

  PCollection<String> transformedJson =
      pipeline
          .apply("Create", Create.of(inJson))
          .apply(
              TransformTextViaJavascript.newBuilder()
                  .setFunctionName(null)
                  .setFileSystemPath(null)
                  .build());

  PAssert.that(transformedJson).containsInAnyOrder(inJson);

  pipeline.run();
}
 
Example #11
Source File: HashClientInfoTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test
public void testOutputIsHashed() {
  String clientId = "client_id";
  String clientIp = "client_ip";

  Map<String, String> attributes = ImmutableMap.<String, String>builder()
      .put(Attribute.CLIENT_ID, clientId).put(Attribute.CLIENT_IP, clientIp).build();
  PubsubMessage input = new PubsubMessage("{}".getBytes(StandardCharsets.UTF_8), attributes);

  PCollection<PubsubMessage> output = pipeline.apply(Create.of(input)).apply(HashClientInfo
      .of(pipeline.newProvider(ID_HASH_KEY_PATH), pipeline.newProvider(IP_HASH_KEY_PATH)));

  PAssert.that(output).satisfies((SerializableFunction<Iterable<PubsubMessage>, Void>) input1 -> {
    for (PubsubMessage message : input1) {
      Assert.assertNotEquals(message.getAttribute(Attribute.CLIENT_ID), clientId);
      Assert.assertNotEquals(message.getAttribute(Attribute.CLIENT_IP), clientIp);
      Assert.assertTrue(HashClientInfo.isHashed(message.getAttribute(Attribute.CLIENT_ID)));
      Assert.assertTrue(HashClientInfo.isHashed(message.getAttribute(Attribute.CLIENT_IP)));
    }
    return null;
  });

  pipeline.run();
}
 
Example #12
Source File: BigQueryMergerTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Test
public void testAutoValueMergeInfoClass() throws Exception {
  MergeInfo mergeInfo =
      MergeInfo.create(
          TIMESTAMP_META_FIELD,
          DELETED_META_FIELD,
          TABLE_1,
          TABLE_2,
          FULL_COLUMN_LIST,
          PRIMARY_KEY_COLUMNS);

  PCollection<KV<String, MergeInfo>> result =
      pipeline
          .apply(Create.of(mergeInfo))
          .apply(
              WithKeys.<String, MergeInfo>of(mi -> mi.getReplicaTable())
                  .withKeyType(TypeDescriptors.strings()))
          .apply(
              new TriggerPerKeyOnFixedIntervals<>(Duration.standardMinutes(WINDOW_SIZE_MINUTES)));

  PAssert.that(result).containsInAnyOrder(KV.of(mergeInfo.getReplicaTable(), mergeInfo));
  pipeline.run().waitUntilFinish();
}
 
Example #13
Source File: JavascriptTextTransformerTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Test {@link TransformTextViaJavascript} returns transformed data when a good javascript
 * transform given.
 */
@Test
@Category(NeedsRunner.class)
public void testDoFnGood() {
  List<String> inJson = Arrays.asList("{\"answerToLife\": 42}");
  List<String> expectedJson = Arrays.asList("{\"answerToLife\":42,\"someProp\":\"someValue\"}");

  PCollection<String> transformedJson =
      pipeline
          .apply("Create", Create.of(inJson))
          .apply(
              TransformTextViaJavascript.newBuilder()
                  .setFileSystemPath(TRANSFORM_FILE_PATH)
                  .setFunctionName("transform")
                  .build());

  PAssert.that(transformedJson).containsInAnyOrder(expectedJson);

  pipeline.run();
}
 
Example #14
Source File: MergeStatementBuildingFnTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
public void testTablesBuiltInPipeline() {
  Pipeline p = Pipeline.create();

  PCollection<KV<String, KV<Schema, Schema>>> tableSchemaS =
      p.apply(Create.of(
          KV.of(TABLE_1_NAME, KV.of(TABLE_1_PK_SCHEMA, TABLE_1_SCHEMA)),
          KV.of(TABLE_2_NAME, KV.of(TABLE_2_PK_SCHEMA, TABLE_2_SCHEMA)),
          KV.of(TABLE_1_NAME, KV.of(TABLE_1_PK_SCHEMA, TABLE_1_SCHEMA))));

  PCollection<KV<String, BigQueryAction>> statementsIssued =
  tableSchemaS
      .apply(ParDo.of(
          new MergeStatementBuildingFn(CHANGELOG_DATASET_ID, REPLICA_DATASET_ID, PROJECT_ID)));

  PCollection<KV<String, Long>>  tablesCreatedCount = statementsIssued
      .apply("GetCreateActions",
          Filter.by(input -> input.getValue().action.equals(BigQueryAction.CREATE_TABLE)))
      .apply("CountCreateActions", Count.perKey());

  PCollection<KV<String, Long>>  tablesMerged = statementsIssued
      .apply("GetMergeActions",
          Filter.by(input -> input.getValue().action.equals(BigQueryAction.STATEMENT)))
      .apply("CountMergeActions", Count.perKey());

  PAssert.that(tablesCreatedCount)
      .containsInAnyOrder(
          KV.of(TABLE_1_NAME, 1L),
          KV.of(TABLE_2_NAME, 1L));

  PAssert.that(tablesMerged)
      .containsInAnyOrder(
          KV.of(TABLE_1_NAME, 2L),
          KV.of(TABLE_2_NAME, 1L));

  p.run().waitUntilFinish();
}
 
Example #15
Source File: ParsePayloadTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void testErrors() {
  ValueProvider<String> schemasLocation = pipeline.newProvider("schemas.tar.gz");
  final List<String> input = Arrays.asList(
      // non-json payload
      "{\"attributeMap\":" + "{\"document_namespace\":\"eng-workflow\""
          + ",\"document_version\":\"1\""
          + ",\"document_id\":\"2c3a0767-d84a-4d02-8a92-fa54a3376049\""
          + ",\"document_type\":\"hgpush\"" + "},\"payload\":\"\"}",
      // incomplete attributes
      "{\"attributeMap\":{\"app_name\":\"Firefox\"" + ",\"app_version\":\"61.0a1\""
          + ",\"document_type\":\"main\"},\"payload\":\"e30K\"}",
      "{\"attributeMap\":{},\"payload\":\"e30K\"}",
      "{\"attributeMap\":null,\"payload\":\"e30K\"}");

  Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline //
      .apply(Create.of(input)) //
      .apply(InputFileFormat.json.decode()) //
      .apply(ParsePayload.of(schemasLocation));

  PCollection<String> exceptions = result.failures().apply(MapElements
      .into(TypeDescriptors.strings()).via(message -> message.getAttribute("exception_class")));

  PAssert.that(result.output()).empty();
  PAssert.that(exceptions).containsInAnyOrder("java.io.IOException",
      "com.mozilla.telemetry.ingestion.core.schema.SchemaNotFoundException",
      "com.mozilla.telemetry.ingestion.core.schema.SchemaNotFoundException",
      "com.mozilla.telemetry.ingestion.core.schema.SchemaNotFoundException");

  pipeline.run();
}
 
Example #16
Source File: TextImportTransformTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
public void readImportManifestUtfWithBOM() throws Exception {
  Path f11 = Files.createTempFile("table1-file", "1");
  String tempDir = f11.getParent().toString();

  Path manifestFile = Files.createTempFile("import-manifest", ".json");
  Charset charset = Charset.forName("UTF-8");
  try (BufferedWriter writer = Files.newBufferedWriter(manifestFile, charset)) {
    String jsonString =
        String.format(
            "\uFEFF{\"tables\": ["
                + "{\"table_name\": \"table1\","
                + "\"file_patterns\":[\"%s\"]}"
                + "]}",
            f11.toString());
    writer.write(jsonString, 0, jsonString.length());
  } catch (IOException e) {
    e.printStackTrace();
  }

  ValueProvider<String> importManifest =
      ValueProvider.StaticValueProvider.of(manifestFile.toString());
  PCollectionView<Ddl> ddlView =
      pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton());

  PCollection<KV<String, String>> tableAndFiles =
      pipeline
          .apply("Read manifest file", new ReadImportManifest(importManifest))
          .apply("Resolve data files", new ResolveDataFiles(importManifest, ddlView));

  PAssert.that(tableAndFiles)
      .containsInAnyOrder(
          KV.of("table1", f11.toString()));

  pipeline.run();
}
 
Example #17
Source File: ErrorConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void transformConvertsBigQueryInsertErrorToPubsubMessageWithTruncatedMessage()
    throws IOException {

  GenericRecord expectedRecord = BigQueryConvertersTest.generateNestedAvroRecord();
  String errorMessage = Strings.repeat("a", 1000);
  BigQueryInsertError bigQueryInsertError =
      getBigQueryInsertError(expectedRecord, errorMessage);
  ErrorConverters.BigQueryInsertErrorToPubsubMessage<GenericRecord> converter =
      getConverter(expectedRecord.getSchema(), AvroCoder.of(expectedRecord.getSchema()));

  PCollection<PubsubMessage> output =
      pipeline
          .apply(Create.of(bigQueryInsertError)
              .withCoder(BigQueryInsertErrorCoder.of()))
          .apply(converter);

  // Expecting a truncated message with a truncation indicator suffix.
  String expectedErrorMessage =
      Ascii.truncate(
          bigQueryInsertError.getError().toString(),
          /* maxLength= */ 512,
          /* truncationIndicator= */ "...");
  PubsubMessage expectedMessage =
      getPubsubMessage(expectedRecord, expectedErrorMessage);
  byte[] expectedPayload = expectedMessage.getPayload();
  Map<String, String> expectedAttributes = expectedMessage.getAttributeMap();
  PAssert.thatSingleton(output)
      .satisfies(input -> {
        assertThat(input.getPayload()).isEqualTo(expectedPayload);
        assertThat(input.getAttributeMap()).isEqualTo(expectedAttributes);
        return null;
      });
  pipeline.run();
}
 
Example #18
Source File: CsvConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Tests {@link CsvConverters.LineToFailsafeJson} converts a line to a {@link FailsafeElement}
 * correctly using a JSON schema.
 */
@Test
public void testLineToFailsafeJsonNoHeadersJsonSchema() {

  FailsafeElementCoder<String, String> coder = FAILSAFE_ELEMENT_CODER;

  CoderRegistry coderRegistry = pipeline.getCoderRegistry();
  coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);

  PCollection<String> lines =
      pipeline.apply(Create.of(RECORD_STRING).withCoder(StringUtf8Coder.of()));

  PCollectionTuple linesTuple = PCollectionTuple.of(CSV_LINES, lines);

  PCollectionTuple failsafe =
      linesTuple.apply(
          "TestLineToFailsafeJson",
          CsvConverters.LineToFailsafeJson.newBuilder()
              .setDelimiter(",")
              .setUdfFileSystemPath(null)
              .setUdfFunctionName(null)
              .setJsonSchemaPath(TEST_JSON_SCHEMA__PATH)
              .setHeaderTag(CSV_HEADERS)
              .setLineTag(CSV_LINES)
              .setUdfOutputTag(PROCESSING_OUT)
              .setUdfDeadletterTag(PROCESSING_DEADLETTER_OUT)
              .build());

  PAssert.that(failsafe.get(PROCESSING_OUT))
      .satisfies(
          collection -> {
            FailsafeElement<String, String> result = collection.iterator().next();
            assertThat(result.getPayload(), is(equalTo(JSON_STRING_RECORD)));
            return null;
          });

  pipeline.run();
}
 
Example #19
Source File: LeaderBoardTest.java    From deployment-examples with MIT License 5 votes vote down vote up
/**
 * A test where elements arrive behind the watermark (late data), but before the end of the
 * window. These elements are emitted on time.
 */
@Test
public void testTeamScoresUnobservablyLate() {

  BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION);
  TestStream<GameActionInfo> createEvents =
      TestStream.create(AvroCoder.of(GameActionInfo.class))
          .advanceWatermarkTo(baseTime)
          .addElements(
              event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)),
              event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(8)),
              event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)),
              event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(5)))
          .advanceWatermarkTo(
              baseTime.plus(TEAM_WINDOW_DURATION).minus(Duration.standardMinutes(1)))
          // These events are late, but the window hasn't closed yet, so the elements are in the
          // on-time pane
          .addElements(
              event(TestUser.RED_TWO, 2, Duration.ZERO),
              event(TestUser.RED_TWO, 5, Duration.standardMinutes(1)),
              event(TestUser.BLUE_TWO, 2, Duration.standardSeconds(90)),
              event(TestUser.RED_TWO, 3, Duration.standardMinutes(3)))
          .advanceWatermarkTo(
              baseTime.plus(TEAM_WINDOW_DURATION).plus(Duration.standardMinutes(1)))
          .advanceWatermarkToInfinity();
  PCollection<KV<String, Integer>> teamScores =
      p.apply(createEvents)
          .apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));

  String blueTeam = TestUser.BLUE_ONE.getTeam();
  String redTeam = TestUser.RED_ONE.getTeam();
  // The On Time pane contains the late elements that arrived before the end of the window
  PAssert.that(teamScores)
      .inOnTimePane(window)
      .containsInAnyOrder(KV.of(redTeam, 14), KV.of(blueTeam, 13));

  p.run().waitUntilFinish();
}
 
Example #20
Source File: SplunkConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Test successful conversion of JSON messages with a user provided _metadata. */
@Test
@Category(NeedsRunner.class)
public void testFailsafeStringToSplunkEventValidSource() {

  FailsafeElement<String, String> input =
      FailsafeElement.of(
          "",
          "{\n"
              + "\t\"name\": \"Jim\",\n"
              + "\t\"_metadata\": {\"source\": \"test-log-name\"}\n"
              + "}");

  pipeline.getCoderRegistry().registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of());

  PCollectionTuple tuple =
      pipeline
          .apply(
              Create.of(input)
                  .withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
          .apply(
              SplunkConverters.failsafeStringToSplunkEvent(
                  SPLUNK_EVENT_OUT, SPLUNK_EVENT_DEADLETTER_OUT));

  PAssert.that(tuple.get(SPLUNK_EVENT_DEADLETTER_OUT)).empty();
  PAssert.that(tuple.get(SPLUNK_EVENT_OUT))
      .containsInAnyOrder(
          SplunkEvent.newBuilder()
              .withEvent("{\"name\":\"Jim\"}")
              .withSource("test-log-name")
              .build());

  pipeline.run();
}
 
Example #21
Source File: StreamingDataGeneratorTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Tests the {@link MessageGeneratorFn} generates fake data. */
@Test
public void testMessageGenerator() throws IOException {
  // Arrange
  //
  String schema =
      "{"
          + "\"id\": \"{{uuid()}}\", "
          + "\"eventTime\": \"{{timestamp()}}\", "
          + "\"username\": \"{{username()}}\", "
          + "\"score\": {{integer(0,100)}}"
          + "}";

  File file = tempFolder.newFile();
  writeToFile(file.getAbsolutePath(), schema);

  // Act
  //
  PCollection<PubsubMessage> results =
      pipeline
          .apply("CreateInput", Create.of(0L))
          .apply("GenerateMessage", ParDo.of(new MessageGeneratorFn(file.getAbsolutePath())));

  // Assert
  //
  PAssert.that(results)
      .satisfies(
          input -> {
            PubsubMessage message = input.iterator().next();

            assertThat(message, is(notNullValue()));
            assertThat(message.getPayload(), is(notNullValue()));
            assertThat(message.getAttributeMap(), is(notNullValue()));

            return null;
          });

  pipeline.run();
}
 
Example #22
Source File: UserScoreTest.java    From deployment-examples with MIT License 5 votes vote down vote up
/** Test the {@link ParseEventFn} {@link org.apache.beam.sdk.transforms.DoFn}. */
@Test
public void testParseEventFn() throws Exception {
  PCollection<String> input = p.apply(Create.of(GAME_EVENTS));
  PCollection<GameActionInfo> output = input.apply(ParDo.of(new ParseEventFn()));

  PAssert.that(output).containsInAnyOrder(GAME_ACTION_INFO_LIST);

  p.run().waitUntilFinish();
}
 
Example #23
Source File: BigQuerySinkTest.java    From feast with Apache License 2.0 5 votes vote down vote up
@Test
public void expectingJobResult() {
  FeatureRow featureRow = generateRow("myproject/fs");
  TestStream<FeatureRow> featureRowTestStream =
      TestStream.create(ProtoCoder.of(FeatureRow.class))
          .advanceWatermarkTo(Instant.now())
          .addElements(featureRow)
          .advanceWatermarkToInfinity();

  jobService.setNumFailuresExpected(3);

  FeatureSink sink =
      makeSink(
          ValueProvider.StaticValueProvider.of(bigQuery),
          p.apply(
              "StaticSpecs",
              Create.of(
                  ImmutableMap.of(
                      FeatureSetReference.of(spec.getProject(), spec.getName(), 1), spec))));

  PTransform<PCollection<FeatureRow>, WriteResult> writer =
      ((BigQueryWrite) sink.writer()).withExpectingResultTime(Duration.standardSeconds(5));
  PCollection<FeatureRow> inserts =
      p.apply(featureRowTestStream).apply(writer).getSuccessfulInserts();

  PAssert.that(inserts).containsInAnyOrder(featureRow);

  p.run();
}
 
Example #24
Source File: AvroTableFileAsMutationsTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
public void testFileShardingNoSharding() throws Exception {
  Path path = tmpFolder.newFile("testfile").toPath();
  int splitSize = 10000;
  Files.write(path, new byte[splitSize]);
  MatchResult.Metadata fileMetadata =
      MatchResult.Metadata.builder()
          .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */))
          .setIsReadSeekEfficient(true)
          .setSizeBytes(splitSize)
          .build();

  PAssert.that(runFileShardingPipeline(fileMetadata, splitSize))
      .satisfies(
          input -> {
            LinkedList<FileShard> shards = Lists.newLinkedList(input);
            assertThat(shards, hasSize(1));
            FileShard shard = shards.getFirst();
            assertThat(
                shard.getFile().getMetadata().resourceId().getFilename(), equalTo("testfile"));
            assertThat(shard.getTableName(), equalTo("testtable"));
            assertThat(shard.getRange().getFrom(), equalTo(0L));
            assertThat(shard.getRange().getTo(), equalTo(splitSize * 1L));
            return null;
          });
  p.run();
}
 
Example #25
Source File: WordCountTest.java    From deployment-examples with MIT License 5 votes vote down vote up
/** Example test that tests a specific {@link DoFn}. */
@Test
public void testExtractWordsFn() throws Exception {
  List<String> words = Arrays.asList(" some  input  words ", " ", " cool ", " foo", " bar");
  PCollection<String> output =
      p.apply(Create.of(words).withCoder(StringUtf8Coder.of()))
          .apply(ParDo.of(new ExtractWordsFn()));
  PAssert.that(output).containsInAnyOrder("some", "input", "words", "cool", "foo", "bar");
  p.run().waitUntilFinish();
}
 
Example #26
Source File: WordCountTest.java    From deployment-examples with MIT License 5 votes vote down vote up
/** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
@Test
@Category(ValidatesRunner.class)
public void testCountWords() throws Exception {
  PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));

  PCollection<String> output =
      input.apply(new CountWords()).apply(MapElements.via(new FormatAsTextFn()));

  PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
  p.run().waitUntilFinish();
}
 
Example #27
Source File: GameStatsTest.java    From deployment-examples with MIT License 5 votes vote down vote up
/** Test the calculation of 'spammy users'. */
@Test
@Category(ValidatesRunner.class)
public void testCalculateSpammyUsers() throws Exception {
  PCollection<KV<String, Integer>> input = p.apply(Create.of(USER_SCORES));
  PCollection<KV<String, Integer>> output = input.apply(new CalculateSpammyUsers());

  // Check the set of spammers.
  PAssert.that(output).containsInAnyOrder(SPAMMERS);

  p.run().waitUntilFinish();
}
 
Example #28
Source File: SplunkConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Test successful conversion of JSON messages with a user provided host. */
@Test
@Category(NeedsRunner.class)
public void testFailsafeStringToSplunkEventValidHost() {

  FailsafeElement<String, String> input =
      FailsafeElement.of(
          "",
          "{\n"
              + "\t\"name\": \"Jim\",\n"
              + "\t\"_metadata\": {\"host\": \"test-host\"}\n"
              + "}");

  pipeline.getCoderRegistry().registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of());

  PCollectionTuple tuple =
      pipeline
          .apply(
              Create.of(input)
                  .withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
          .apply(
              SplunkConverters.failsafeStringToSplunkEvent(
                  SPLUNK_EVENT_OUT, SPLUNK_EVENT_DEADLETTER_OUT));

  PAssert.that(tuple.get(SPLUNK_EVENT_DEADLETTER_OUT)).empty();
  PAssert.that(tuple.get(SPLUNK_EVENT_OUT))
      .containsInAnyOrder(
          SplunkEvent.newBuilder().withEvent("{\"name\":\"Jim\"}").withHost("test-host").build());

  pipeline.run();
}
 
Example #29
Source File: StatefulTeamScoreTest.java    From deployment-examples with MIT License 5 votes vote down vote up
/**
 * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs
 * correctly for multiple teams.
 */
@Test
public void testScoreUpdatesPerTeam() {

  TestStream<KV<String, GameActionInfo>> createEvents =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class)))
          .advanceWatermarkTo(baseTime)
          .addElements(
              event(TestUser.RED_ONE, 50, Duration.standardSeconds(10)),
              event(TestUser.RED_TWO, 50, Duration.standardSeconds(20)),
              event(TestUser.BLUE_ONE, 70, Duration.standardSeconds(30)),
              event(TestUser.BLUE_TWO, 80, Duration.standardSeconds(40)),
              event(TestUser.BLUE_TWO, 50, Duration.standardSeconds(50)))
          .advanceWatermarkToInfinity();

  PCollection<KV<String, Integer>> teamScores =
      p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100)));

  String redTeam = TestUser.RED_ONE.getTeam();
  String blueTeam = TestUser.BLUE_ONE.getTeam();

  PAssert.that(teamScores)
      .inWindow(GlobalWindow.INSTANCE)
      .containsInAnyOrder(KV.of(redTeam, 100), KV.of(blueTeam, 150), KV.of(blueTeam, 200));

  p.run().waitUntilFinish();
}
 
Example #30
Source File: SplunkConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Test successful conversion of JSON messages with an invalid timestamp. */
@Test
@Category(NeedsRunner.class)
public void testFailsafeStringToSplunkEventInValidTimestamp() {

  FailsafeElement<String, String> input =
      FailsafeElement.of(
          "",
          "{\n"
              + "\t\"name\": \"Jim\",\n"
              + "\t\"logName\": \"test-log-name\",\n"
              + "\t\"timestamp\": \"2019-1011:32:26.553Z\"\n"
              + "}");

  pipeline.getCoderRegistry().registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of());

  PCollectionTuple tuple =
      pipeline
          .apply(
              Create.of(input)
                  .withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
          .apply(
              SplunkConverters.failsafeStringToSplunkEvent(
                  SPLUNK_EVENT_OUT, SPLUNK_EVENT_DEADLETTER_OUT));

  PAssert.that(tuple.get(SPLUNK_EVENT_DEADLETTER_OUT)).empty();
  PAssert.that(tuple.get(SPLUNK_EVENT_OUT))
      .containsInAnyOrder(
          SplunkEvent.newBuilder()
              .withEvent(
                  "{\n"
                      + "\t\"name\": \"Jim\",\n"
                      + "\t\"logName\": \"test-log-name\",\n"
                      + "\t\"timestamp\": \"2019-1011:32:26.553Z\"\n"
                      + "}")
              .build());

  pipeline.run();
}