Java Code Examples for org.apache.beam.sdk.testing.PAssert

The following examples show how to use org.apache.beam.sdk.testing.PAssert. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Test
public void testCSVContentProcessorDoFn() {
  List<String> sampleStringList = new ArrayList<String>();
  sampleStringList.add("A,a");
  sampleStringList.add("B,b");
  sampleStringList.add("C,c");
  sampleStringList.add("D,c");

  PCollection<KV<String, List<String>>> input =
      pipeline.apply(Create.of(KV.of("test", sampleStringList)));

  PCollection<KV<String, Table>> outputTables =
      input.apply(
          "ContentHandler",
          ParDo.of(new CSVContentProcessorDoFn(ValueProvider.StaticValueProvider.of(1))));

  PCollection<String> outputKeys = outputTables.apply(Keys.create());
  PAssert.that(outputKeys).containsInAnyOrder("test_1", "test_2", "test_3");
  pipeline.run();
}
 
Example 2
/**
 * Test {@link TransformTextViaJavascript} passes through data when null ValueProvider as args.
 * when hasInvocable returns false.
 */
@Test
@Category(NeedsRunner.class)
public void testDoFnPassthroughNullValueProvider() {
  List<String> inJson = Arrays.asList("{\"answerToLife\":    42}");

  PCollection<String> transformedJson =
      pipeline
          .apply("Create", Create.of(inJson))
          .apply(
              TransformTextViaJavascript.newBuilder()
                  .setFunctionName(null)
                  .setFileSystemPath(null)
                  .build());

  PAssert.that(transformedJson).containsInAnyOrder(inJson);

  pipeline.run();
}
 
Example 3
Source Project: DataflowTemplates   Source File: PubsubToAvroTest.java    License: Apache License 2.0 6 votes vote down vote up
/** Test {@link AvroPubsubMessageRecord} correctly maps the message. */
@Test
@Category(NeedsRunner.class)
public void testPubsubMessageToArchive() throws Exception {
  // Create the test input.
  byte[] payload = "Laces out Dan!".getBytes();
  Map<String, String> attributes = ImmutableMap.of("id", "Ace");

  PubsubMessage message = new PubsubMessage(payload, attributes);
  Instant timestamp = Instant.now();

  // Apply the ParDo.
  PCollection<AvroPubsubMessageRecord> results =
      pipeline
          .apply(Create.timestamped(TimestampedValue.of(message, timestamp)))
          .apply(ParDo.of(new PubsubMessageToArchiveDoFn()));

  // Assert on the results.
  PAssert.that(results)
      .containsInAnyOrder(
          new AvroPubsubMessageRecord(payload, attributes, timestamp.getMillis()));

  // Run the pipeline.
  pipeline.run();
}
 
Example 4
@Test
public void testOutput() throws Exception {
  // minimal test for throughput of a single document
  ValueProvider<String> metadataLocation = pipeline
      .newProvider(Resources.getResource("pioneer/metadata-local.json").getPath());
  ValueProvider<Boolean> kmsEnabled = pipeline.newProvider(false);
  ValueProvider<Boolean> decompressPayload = pipeline.newProvider(true);

  final List<String> input = readTestFiles(Arrays.asList("pioneer/study-foo.ciphertext.json"));
  PCollection<String> output = pipeline.apply(Create.of(input))
      .apply(InputFileFormat.text.decode())
      .apply("AddAttributes", MapElements.into(TypeDescriptor.of(PubsubMessage.class))
          .via(element -> new PubsubMessage(element.getPayload(),
              ImmutableMap.of(Attribute.DOCUMENT_NAMESPACE, "telemetry", Attribute.DOCUMENT_TYPE,
                  "pioneer-study", Attribute.DOCUMENT_VERSION, "4"))))
      .apply(DecryptPioneerPayloads.of(metadataLocation, kmsEnabled, decompressPayload)).output()
      .apply(OutputFileFormat.text.encode()).apply(ReformatJson.of());

  final List<String> expectedMain = readTestFiles(Arrays.asList("pioneer/sample.plaintext.json"));
  PAssert.that(output).containsInAnyOrder(expectedMain);

  pipeline.run();
}
 
Example 5
Source Project: gcp-ingestion   Source File: HashClientInfoTest.java    License: Mozilla Public License 2.0 6 votes vote down vote up
@Test
public void testOutputIsHashed() {
  String clientId = "client_id";
  String clientIp = "client_ip";

  Map<String, String> attributes = ImmutableMap.<String, String>builder()
      .put(Attribute.CLIENT_ID, clientId).put(Attribute.CLIENT_IP, clientIp).build();
  PubsubMessage input = new PubsubMessage("{}".getBytes(StandardCharsets.UTF_8), attributes);

  PCollection<PubsubMessage> output = pipeline.apply(Create.of(input)).apply(HashClientInfo
      .of(pipeline.newProvider(ID_HASH_KEY_PATH), pipeline.newProvider(IP_HASH_KEY_PATH)));

  PAssert.that(output).satisfies((SerializableFunction<Iterable<PubsubMessage>, Void>) input1 -> {
    for (PubsubMessage message : input1) {
      Assert.assertNotEquals(message.getAttribute(Attribute.CLIENT_ID), clientId);
      Assert.assertNotEquals(message.getAttribute(Attribute.CLIENT_IP), clientIp);
      Assert.assertTrue(HashClientInfo.isHashed(message.getAttribute(Attribute.CLIENT_ID)));
      Assert.assertTrue(HashClientInfo.isHashed(message.getAttribute(Attribute.CLIENT_IP)));
    }
    return null;
  });

  pipeline.run();
}
 
Example 6
/**
 * Test {@link TransformTextViaJavascript} returns transformed data when a good javascript
 * transform given.
 */
@Test
@Category(NeedsRunner.class)
public void testDoFnGood() {
  List<String> inJson = Arrays.asList("{\"answerToLife\": 42}");
  List<String> expectedJson = Arrays.asList("{\"answerToLife\":42,\"someProp\":\"someValue\"}");

  PCollection<String> transformedJson =
      pipeline
          .apply("Create", Create.of(inJson))
          .apply(
              TransformTextViaJavascript.newBuilder()
                  .setFileSystemPath(TRANSFORM_FILE_PATH)
                  .setFunctionName("transform")
                  .build());

  PAssert.that(transformedJson).containsInAnyOrder(expectedJson);

  pipeline.run();
}
 
Example 7
Source Project: DataflowTemplates   Source File: BigQueryMergerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAutoValueMergeInfoClass() throws Exception {
  MergeInfo mergeInfo =
      MergeInfo.create(
          TIMESTAMP_META_FIELD,
          DELETED_META_FIELD,
          TABLE_1,
          TABLE_2,
          FULL_COLUMN_LIST,
          PRIMARY_KEY_COLUMNS);

  PCollection<KV<String, MergeInfo>> result =
      pipeline
          .apply(Create.of(mergeInfo))
          .apply(
              WithKeys.<String, MergeInfo>of(mi -> mi.getReplicaTable())
                  .withKeyType(TypeDescriptors.strings()))
          .apply(
              new TriggerPerKeyOnFixedIntervals<>(Duration.standardMinutes(WINDOW_SIZE_MINUTES)));

  PAssert.that(result).containsInAnyOrder(KV.of(mergeInfo.getReplicaTable(), mergeInfo));
  pipeline.run().waitUntilFinish();
}
 
Example 8
Source Project: gcp-ingestion   Source File: ParsePayloadTest.java    License: Mozilla Public License 2.0 6 votes vote down vote up
@Test
public void testVersionInPayload() {
  ValueProvider<String> schemasLocation = pipeline.newProvider("schemas.tar.gz");

  // printf '{"version":4}' | base64 -> eyJ2ZXJzaW9uIjo0fQ==
  String input = "{\"attributeMap\":" //
      + "{\"document_namespace\":\"telemetry\"" //
      + ",\"app_name\":\"Firefox\"" //
      + ",\"document_id\":\"2c3a0767-d84a-4d02-8a92-fa54a3376049\"" //
      + ",\"document_type\":\"main\"" //
      + "},\"payload\":\"eyJ2ZXJzaW9uIjo0fQ==\"}";

  Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline.apply(Create.of(input))
      .apply(InputFileFormat.json.decode()).apply(ParsePayload.of(schemasLocation));

  PCollection<String> exceptions = result.failures().apply(MapElements
      .into(TypeDescriptors.strings()).via(message -> message.getAttribute("exception_class")));

  PAssert.that(result.output()).empty();

  // If we get a ValidationException here, it means we successfully extracted version from
  // the payload and found a valid schema; we expect the payload to not validate.
  PAssert.that(exceptions).containsInAnyOrder("org.everit.json.schema.ValidationException");

  pipeline.run();
}
 
Example 9
@Test
public void testAllFilesAreConsumed() throws IOException {
  TestStream<String> inputFiles = TestStream.create(StringUtf8Coder.of())
      .addElements(
          createJsonFile("dlqFile1.json", JSON_FILE_CONTENTS_1),
          createJsonFile("dlqFile2.json", JSON_FILE_CONTENTS_1))
      .addElements(createJsonFile("dlqFile3.json", JSON_FILE_CONTENTS_1))
      .advanceWatermarkToInfinity();

  PCollection<String> jsonData = p.apply(inputFiles)
      .apply(FileIO.matchAll())
      .apply(FileBasedDeadLetterQueueReconsumer.moveAndConsumeMatches());

  PAssert.that(jsonData)
      .containsInAnyOrder(
          Stream.of(JSON_FILE_CONTENTS_1)
              .flatMap(line -> Stream.of(line, line, line))
              .collect(Collectors.toList()));

  p.run().waitUntilFinish();
}
 
Example 10
Source Project: feast   Source File: BigQuerySinkTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void featureRowCompressShouldPackAndUnpackSuccessfully() {
  Stream<FeatureRow> stream1 = IntStream.range(0, 1000).mapToObj(i -> generateRow("project/fs"));
  Stream<FeatureRow> stream2 =
      IntStream.range(0, 1000).mapToObj(i -> generateRow("project/fs_2"));

  List<FeatureRow> input = Stream.concat(stream1, stream2).collect(Collectors.toList());

  PCollection<FeatureRow> result =
      p.apply(Create.of(input))
          .apply("KV", ParDo.of(new ExtractKV()))
          .apply(new CompactFeatureRows(1000))
          .apply("Flat", ParDo.of(new FlatMap()));

  PAssert.that(result).containsInAnyOrder(input);
  p.run();
}
 
Example 11
/**
 * Test {@link TransformTextViaJavascript} passes through data when null ValueProvider as args.
 * when hasInvocable returns false.
 */
@Test
@Category(NeedsRunner.class)
public void testDoFnPassthroughNullValueProvider() {
  List<String> inJson = Arrays.asList("{\"answerToLife\":    42}");

  PCollection<String> transformedJson =
      pipeline
          .apply("Create", Create.of(inJson))
          .apply(
              TransformTextViaJavascript.newBuilder()
                  .setFunctionName(null)
                  .setFileSystemPath(null)
                  .build());

  PAssert.that(transformedJson).containsInAnyOrder(inJson);

  pipeline.run();
}
 
Example 12
Source Project: deployment-examples   Source File: UserScoreTest.java    License: MIT License 6 votes vote down vote up
/** Tests ExtractAndSumScore("team"). */
@Test
@Category(ValidatesRunner.class)
public void testTeamScoreSums() throws Exception {

  PCollection<String> input = p.apply(Create.of(GAME_EVENTS));

  PCollection<KV<String, Integer>> output =
      input
          .apply(ParDo.of(new ParseEventFn()))
          // Extract and sum teamname/score pairs from the event data.
          .apply("ExtractTeamScore", new ExtractAndSumScore("team"));

  // Check the team score sums.
  PAssert.that(output).containsInAnyOrder(TEAM_SUMS);

  p.run().waitUntilFinish();
}
 
Example 13
Source Project: deployment-examples   Source File: UserScoreTest.java    License: MIT License 6 votes vote down vote up
/** Test that bad input data is dropped appropriately. */
@Test
@Category(ValidatesRunner.class)
public void testUserScoresBadInput() throws Exception {

  PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of()));

  PCollection<KV<String, Integer>> extract =
      input
          .apply(ParDo.of(new ParseEventFn()))
          .apply(
              MapElements.into(
                      TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers()))
                  .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));

  PAssert.that(extract).empty();

  p.run().waitUntilFinish();
}
 
Example 14
Source Project: streamingbook   Source File: BeamModelTest.java    License: Apache License 2.0 5 votes vote down vote up
private void runTest(PTransform<PBegin, PCollection<KV<String, Integer>>> createInput, BeamModel.ExampleTransform example) {
    PCollection<String> teamScores = p.apply(createInput)
 .apply(example);

    PAssert.that(teamScores)
        .containsInAnyOrder(example.getExpectedResults());

    p.run().waitUntilFinish();
}
 
Example 15
@Test
public void testFilesAreConsumed() throws IOException {
  String fileName = createJsonFile("dlqFile1.json", JSON_FILE_CONTENTS_1);
  folder.newFolder("tmp");

  String folderPath = Paths.get(folder.getRoot().getAbsolutePath()).resolve("*").toString();
  PCollection<String> jsonData = p
      .apply(FileIO.match()
          .filepattern(folderPath))
      .apply(FileBasedDeadLetterQueueReconsumer.moveAndConsumeMatches());
  PAssert.that(jsonData).containsInAnyOrder(JSON_FILE_CONTENTS_1);
  p.run().waitUntilFinish();

  assertFalse(new File(fileName).exists());
}
 
Example 16
@Test
public void testJson() {
  List<String> inputLines = Lines.resources("testdata/decode-pubsub-messages/input-valid*");
  List<String> validLines = Lines
      .resources("testdata/decode-pubsub-messages/output-normalized-json.ndjson");

  PCollection<String> result = pipeline //
      .apply(Create.of(inputLines)) //
      .apply(InputFileFormat.json.decode()) //
      .apply("EncodeJsonOutput", OutputFileFormat.json.encode());

  PAssert.that(result).containsInAnyOrder(validLines);

  pipeline.run();
}
 
Example 17
@Test
public void testFileShardingNotSeekable() throws Exception {
  Path path = tmpFolder.newFile("testfile").toPath();
  int splitSize = 10000;
  Files.write(path, new byte[splitSize * 2]);
  MatchResult.Metadata fileMetadata =
      MatchResult.Metadata.builder()
          .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */))
          .setIsReadSeekEfficient(false)
          .setSizeBytes(splitSize * 2)
          .build();

  PAssert.that(runFileShardingPipeline(fileMetadata, splitSize))
      .satisfies(
          input -> {
            LinkedList<FileShard> shards = Lists.newLinkedList(input);
            assertThat(shards, hasSize(1));
            FileShard shard = shards.getFirst();
            assertThat(
                shard.getFile().getMetadata().resourceId().getFilename(), equalTo("testfile"));
            assertThat(shard.getTableName(), equalTo("testtable"));
            assertThat(shard.getRange().getFrom(), equalTo(0L));
            assertThat(shard.getRange().getTo(), equalTo(splitSize * 2L));
            return null;
          });
  p.run();
}
 
Example 18
@Test
public void testTablesBuiltInPipeline() {
  Pipeline p = Pipeline.create();

  PCollection<KV<String, KV<Schema, Schema>>> tableSchemaS =
      p.apply(Create.of(
          KV.of(TABLE_1_NAME, KV.of(TABLE_1_PK_SCHEMA, TABLE_1_SCHEMA)),
          KV.of(TABLE_2_NAME, KV.of(TABLE_2_PK_SCHEMA, TABLE_2_SCHEMA)),
          KV.of(TABLE_1_NAME, KV.of(TABLE_1_PK_SCHEMA, TABLE_1_SCHEMA))));

  PCollection<KV<String, BigQueryAction>> statementsIssued =
  tableSchemaS
      .apply(ParDo.of(
          new MergeStatementBuildingFn(CHANGELOG_DATASET_ID, REPLICA_DATASET_ID, PROJECT_ID)));

  PCollection<KV<String, Long>>  tablesCreatedCount = statementsIssued
      .apply("GetCreateActions",
          Filter.by(input -> input.getValue().action.equals(BigQueryAction.CREATE_TABLE)))
      .apply("CountCreateActions", Count.perKey());

  PCollection<KV<String, Long>>  tablesMerged = statementsIssued
      .apply("GetMergeActions",
          Filter.by(input -> input.getValue().action.equals(BigQueryAction.STATEMENT)))
      .apply("CountMergeActions", Count.perKey());

  PAssert.that(tablesCreatedCount)
      .containsInAnyOrder(
          KV.of(TABLE_1_NAME, 1L),
          KV.of(TABLE_2_NAME, 1L));

  PAssert.that(tablesMerged)
      .containsInAnyOrder(
          KV.of(TABLE_1_NAME, 2L),
          KV.of(TABLE_2_NAME, 1L));

  p.run().waitUntilFinish();
}
 
Example 19
Source Project: DataflowTemplates   Source File: TextImportTransformTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void readImportManifestUtfWithBOM() throws Exception {
  Path f11 = Files.createTempFile("table1-file", "1");
  String tempDir = f11.getParent().toString();

  Path manifestFile = Files.createTempFile("import-manifest", ".json");
  Charset charset = Charset.forName("UTF-8");
  try (BufferedWriter writer = Files.newBufferedWriter(manifestFile, charset)) {
    String jsonString =
        String.format(
            "\uFEFF{\"tables\": ["
                + "{\"table_name\": \"table1\","
                + "\"file_patterns\":[\"%s\"]}"
                + "]}",
            f11.toString());
    writer.write(jsonString, 0, jsonString.length());
  } catch (IOException e) {
    e.printStackTrace();
  }

  ValueProvider<String> importManifest =
      ValueProvider.StaticValueProvider.of(manifestFile.toString());
  PCollectionView<Ddl> ddlView =
      pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton());

  PCollection<KV<String, String>> tableAndFiles =
      pipeline
          .apply("Read manifest file", new ReadImportManifest(importManifest))
          .apply("Resolve data files", new ResolveDataFiles(importManifest, ddlView));

  PAssert.that(tableAndFiles)
      .containsInAnyOrder(
          KV.of("table1", f11.toString()));

  pipeline.run();
}
 
Example 20
Source Project: DataflowTemplates   Source File: AvroToBigtableTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void applyAvroToBigtableFn() throws Exception {
  BigtableRow avroRow1 = createAvroRow("row1");
  addAvroCell(avroRow1, "family1", "column1", 1, "value1");
  addAvroCell(avroRow1, "family1", "column1", 2, "value2");
  addAvroCell(avroRow1, "family1", "column2", 1, "value3");
  addAvroCell(avroRow1, "family2", "column1", 1, "value4");
  BigtableRow avroRow2 = createAvroRow("row2");
  addAvroCell(avroRow2, "family2", "column2", 2, "value2");
  List<BigtableRow> avroRows = ImmutableList.of(avroRow1, avroRow2);

  KV<ByteString, Iterable<Mutation>> rowMutations1 = createBigtableRowMutations("row1");
  addBigtableMutation(rowMutations1, "family1", "column1", 1, "value1");
  addBigtableMutation(rowMutations1, "family1", "column1", 2, "value2");
  addBigtableMutation(rowMutations1, "family1", "column2", 1, "value3");
  addBigtableMutation(rowMutations1, "family2", "column1", 1, "value4");
  KV<ByteString, Iterable<Mutation>> rowMutations2 = createBigtableRowMutations("row2");
  addBigtableMutation(rowMutations2, "family2", "column2", 2, "value2");
  List<KV<ByteString, Iterable<Mutation>>> expectedBigtableRows =
      ImmutableList.of(rowMutations1, rowMutations2);

  PCollection<KV<ByteString, Iterable<Mutation>>> bigtableRows =
      pipeline
          .apply("Create", Create.of(avroRows))
          .apply("Transform to Bigtable", ParDo.of(AvroToBigtableFn.create()));

  PAssert.that(bigtableRows).containsInAnyOrder(expectedBigtableRows);
  pipeline.run();
}
 
Example 21
Source Project: DataflowTemplates   Source File: SplunkConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/** Test successful conversion of JSON messages with provided overrides for time and source. */
@Test
@Category(NeedsRunner.class)
public void testFailsafeStringToSplunkEventValidTimeOverride() {

  FailsafeElement<String, String> input =
      FailsafeElement.of(
          "",
          "{\n"
              + "\t\"timestamp\": \"2019-10-15T11:32:26.553Z\",\n"
              + "\t\"_metadata\": {\"time\": \"2019-11-22T11:32:26.553Z\", "
              + "\"source\": \"test-source-name\"}\n"
              + "}");

  pipeline.getCoderRegistry().registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of());

  PCollectionTuple tuple =
      pipeline
          .apply(
              Create.of(input)
                  .withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
          .apply(
              SplunkConverters.failsafeStringToSplunkEvent(
                  SPLUNK_EVENT_OUT, SPLUNK_EVENT_DEADLETTER_OUT));

  PAssert.that(tuple.get(SPLUNK_EVENT_DEADLETTER_OUT)).empty();
  PAssert.that(tuple.get(SPLUNK_EVENT_OUT))
      .containsInAnyOrder(
          SplunkEvent.newBuilder()
              .withEvent(
                  "{" + "\"timestamp\":\"2019-10-15T11:32:26.553Z\"" + "}")
              .withSource("test-source-name")
              .withTime(DateTime.parseRfc3339("2019-11-22T11:32:26.553Z").getValue())
              .build());

  pipeline.run();
}
 
Example 22
Source Project: DataflowTemplates   Source File: CsvConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/** Tests {@link CsvConverters.ReadCsv} reads a Csv with no headers correctly. */
@Test
public void testReadNoHeadersCsv() {

  CsvConverters.CsvPipelineOptions options =
      PipelineOptionsFactory.create().as(CsvConverters.CsvPipelineOptions.class);

  options.setContainsHeaders(false);
  options.setDelimiter(",");
  options.setCsvFormat("Default");
  options.setInputFileSpec(NO_HEADER_CSV_FILE_PATH);

  // Build pipeline with no headers.
  PCollectionTuple readCsvOut =
      pipeline.apply(
          "TestReadCsvNoHeaders",
          CsvConverters.ReadCsv.newBuilder()
              .setCsvFormat(options.getCsvFormat())
              .setDelimiter(options.getDelimiter())
              .setHasHeaders(options.getContainsHeaders())
              .setInputFileSpec(options.getInputFileSpec())
              .setHeaderTag(CSV_HEADERS)
              .setLineTag(CSV_LINES)
              .build());

  PAssert.that(readCsvOut.get(CSV_LINES))
      .satisfies(
          collection -> {
            String result = collection.iterator().next();
            assertThat(result, is(equalTo(RECORD_STRING)));
            return null;
          });

  //  Execute pipeline
  pipeline.run();
}
 
Example 23
Source Project: gcp-ingestion   Source File: ParsePayloadTest.java    License: Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void testErrors() {
  ValueProvider<String> schemasLocation = pipeline.newProvider("schemas.tar.gz");
  final List<String> input = Arrays.asList(
      // non-json payload
      "{\"attributeMap\":" + "{\"document_namespace\":\"eng-workflow\""
          + ",\"document_version\":\"1\""
          + ",\"document_id\":\"2c3a0767-d84a-4d02-8a92-fa54a3376049\""
          + ",\"document_type\":\"hgpush\"" + "},\"payload\":\"\"}",
      // incomplete attributes
      "{\"attributeMap\":{\"app_name\":\"Firefox\"" + ",\"app_version\":\"61.0a1\""
          + ",\"document_type\":\"main\"},\"payload\":\"e30K\"}",
      "{\"attributeMap\":{},\"payload\":\"e30K\"}",
      "{\"attributeMap\":null,\"payload\":\"e30K\"}");

  Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline //
      .apply(Create.of(input)) //
      .apply(InputFileFormat.json.decode()) //
      .apply(ParsePayload.of(schemasLocation));

  PCollection<String> exceptions = result.failures().apply(MapElements
      .into(TypeDescriptors.strings()).via(message -> message.getAttribute("exception_class")));

  PAssert.that(result.output()).empty();
  PAssert.that(exceptions).containsInAnyOrder("java.io.IOException",
      "com.mozilla.telemetry.ingestion.core.schema.SchemaNotFoundException",
      "com.mozilla.telemetry.ingestion.core.schema.SchemaNotFoundException",
      "com.mozilla.telemetry.ingestion.core.schema.SchemaNotFoundException");

  pipeline.run();
}
 
Example 24
Source Project: DataflowTemplates   Source File: SplunkConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/** Test successful conversion of JSON messages with a user provided _metadata. */
@Test
@Category(NeedsRunner.class)
public void testFailsafeStringToSplunkEventValidSource() {

  FailsafeElement<String, String> input =
      FailsafeElement.of(
          "",
          "{\n"
              + "\t\"name\": \"Jim\",\n"
              + "\t\"_metadata\": {\"source\": \"test-log-name\"}\n"
              + "}");

  pipeline.getCoderRegistry().registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of());

  PCollectionTuple tuple =
      pipeline
          .apply(
              Create.of(input)
                  .withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
          .apply(
              SplunkConverters.failsafeStringToSplunkEvent(
                  SPLUNK_EVENT_OUT, SPLUNK_EVENT_DEADLETTER_OUT));

  PAssert.that(tuple.get(SPLUNK_EVENT_DEADLETTER_OUT)).empty();
  PAssert.that(tuple.get(SPLUNK_EVENT_OUT))
      .containsInAnyOrder(
          SplunkEvent.newBuilder()
              .withEvent("{\"name\":\"Jim\"}")
              .withSource("test-log-name")
              .build());

  pipeline.run();
}
 
Example 25
Source Project: feast   Source File: BigQuerySinkTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void expectingJobResult() {
  FeatureRow featureRow = generateRow("myproject/fs");
  TestStream<FeatureRow> featureRowTestStream =
      TestStream.create(ProtoCoder.of(FeatureRow.class))
          .advanceWatermarkTo(Instant.now())
          .addElements(featureRow)
          .advanceWatermarkToInfinity();

  jobService.setNumFailuresExpected(3);

  FeatureSink sink =
      makeSink(
          ValueProvider.StaticValueProvider.of(bigQuery),
          p.apply(
              "StaticSpecs",
              Create.of(
                  ImmutableMap.of(
                      FeatureSetReference.of(spec.getProject(), spec.getName(), 1), spec))));

  PTransform<PCollection<FeatureRow>, WriteResult> writer =
      ((BigQueryWrite) sink.writer()).withExpectingResultTime(Duration.standardSeconds(5));
  PCollection<FeatureRow> inserts =
      p.apply(featureRowTestStream).apply(writer).getSuccessfulInserts();

  PAssert.that(inserts).containsInAnyOrder(featureRow);

  p.run();
}
 
Example 26
@Test
public void testFileShardingNoSharding() throws Exception {
  Path path = tmpFolder.newFile("testfile").toPath();
  int splitSize = 10000;
  Files.write(path, new byte[splitSize]);
  MatchResult.Metadata fileMetadata =
      MatchResult.Metadata.builder()
          .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */))
          .setIsReadSeekEfficient(true)
          .setSizeBytes(splitSize)
          .build();

  PAssert.that(runFileShardingPipeline(fileMetadata, splitSize))
      .satisfies(
          input -> {
            LinkedList<FileShard> shards = Lists.newLinkedList(input);
            assertThat(shards, hasSize(1));
            FileShard shard = shards.getFirst();
            assertThat(
                shard.getFile().getMetadata().resourceId().getFilename(), equalTo("testfile"));
            assertThat(shard.getTableName(), equalTo("testtable"));
            assertThat(shard.getRange().getFrom(), equalTo(0L));
            assertThat(shard.getRange().getTo(), equalTo(splitSize * 1L));
            return null;
          });
  p.run();
}
 
Example 27
Source Project: deployment-examples   Source File: WordCountTest.java    License: MIT License 5 votes vote down vote up
/** Example test that tests a specific {@link DoFn}. */
@Test
public void testExtractWordsFn() throws Exception {
  List<String> words = Arrays.asList(" some  input  words ", " ", " cool ", " foo", " bar");
  PCollection<String> output =
      p.apply(Create.of(words).withCoder(StringUtf8Coder.of()))
          .apply(ParDo.of(new ExtractWordsFn()));
  PAssert.that(output).containsInAnyOrder("some", "input", "words", "cool", "foo", "bar");
  p.run().waitUntilFinish();
}
 
Example 28
Source Project: deployment-examples   Source File: WordCountTest.java    License: MIT License 5 votes vote down vote up
/** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
@Test
@Category(ValidatesRunner.class)
public void testCountWords() throws Exception {
  PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));

  PCollection<String> output =
      input.apply(new CountWords()).apply(MapElements.via(new FormatAsTextFn()));

  PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
  p.run().waitUntilFinish();
}
 
Example 29
Source Project: deployment-examples   Source File: GameStatsTest.java    License: MIT License 5 votes vote down vote up
/** Test the calculation of 'spammy users'. */
@Test
@Category(ValidatesRunner.class)
public void testCalculateSpammyUsers() throws Exception {
  PCollection<KV<String, Integer>> input = p.apply(Create.of(USER_SCORES));
  PCollection<KV<String, Integer>> output = input.apply(new CalculateSpammyUsers());

  // Check the set of spammers.
  PAssert.that(output).containsInAnyOrder(SPAMMERS);

  p.run().waitUntilFinish();
}
 
Example 30
Source Project: DataflowTemplates   Source File: SplunkConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/** Test successful conversion of JSON messages with a user provided host. */
@Test
@Category(NeedsRunner.class)
public void testFailsafeStringToSplunkEventValidHost() {

  FailsafeElement<String, String> input =
      FailsafeElement.of(
          "",
          "{\n"
              + "\t\"name\": \"Jim\",\n"
              + "\t\"_metadata\": {\"host\": \"test-host\"}\n"
              + "}");

  pipeline.getCoderRegistry().registerCoderForClass(SplunkEvent.class, SplunkEventCoder.of());

  PCollectionTuple tuple =
      pipeline
          .apply(
              Create.of(input)
                  .withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
          .apply(
              SplunkConverters.failsafeStringToSplunkEvent(
                  SPLUNK_EVENT_OUT, SPLUNK_EVENT_DEADLETTER_OUT));

  PAssert.that(tuple.get(SPLUNK_EVENT_DEADLETTER_OUT)).empty();
  PAssert.that(tuple.get(SPLUNK_EVENT_OUT))
      .containsInAnyOrder(
          SplunkEvent.newBuilder().withEvent("{\"name\":\"Jim\"}").withHost("test-host").build());

  pipeline.run();
}