org.apache.beam.sdk.coders.StringUtf8Coder Java Examples

The following examples show how to use org.apache.beam.sdk.coders.StringUtf8Coder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParDoTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testProcessContext() throws Exception {
  Pipeline p = Pipeline.create();
  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
  ParDoPayload payload =
      ParDoTranslation.translateParDo(
          ParDo.of(new ProcessContextDoFn())
              .withOutputTags(new TupleTag<>(), TupleTagList.empty()),
          PCollection.createPrimitiveOutputInternal(
              p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()),
          DoFnSchemaInformation.create(),
          TestPipeline.create(),
          sdkComponents);

  assertTrue(payload.getRequestsFinalization());
}
 
Example #2
Source File: ReaderFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateUnknownReader() throws Exception {
  CloudObject spec = CloudObject.forClassName("UnknownSource");
  Source cloudSource = new Source();
  cloudSource.setSpec(spec);
  cloudSource.setCodec(CloudObjects.asCloudObject(StringUtf8Coder.of(), /*sdkComponents=*/ null));
  try {
    PipelineOptions options = PipelineOptionsFactory.create();
    ReaderRegistry.defaultRegistry()
        .create(
            cloudSource,
            options,
            BatchModeExecutionContext.forTesting(options, "testStage"),
            null);
    Assert.fail("should have thrown an exception");
  } catch (Exception exn) {
    Assert.assertThat(exn.toString(), CoreMatchers.containsString("Unable to create a Reader"));
  }
}
 
Example #3
Source File: FlinkStreamingTransformTranslatorsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private Object applyReadSourceTransform(
    PTransform<?, ?> transform, PCollection.IsBounded isBounded, StreamExecutionEnvironment env) {

  FlinkStreamingPipelineTranslator.StreamTransformTranslator<PTransform<?, ?>> translator =
      getReadSourceTranslator();
  FlinkStreamingTranslationContext ctx =
      new FlinkStreamingTranslationContext(env, PipelineOptionsFactory.create());

  Pipeline pipeline = Pipeline.create();
  PCollection<String> pc =
      PCollection.createPrimitiveOutputInternal(
          pipeline, WindowingStrategy.globalDefault(), isBounded, StringUtf8Coder.of());
  pc.setName("output");

  Map<TupleTag<?>, PValue> outputs = new HashMap<>();
  outputs.put(new TupleTag<>(), pc);
  AppliedPTransform<?, ?, ?> appliedTransform =
      AppliedPTransform.of(
          "test-transform", Collections.emptyMap(), outputs, transform, Pipeline.create());

  ctx.setCurrentTransform(appliedTransform);
  translator.translateNode(transform, ctx);

  return ctx.getInputDataStream(pc).getTransformation();
}
 
Example #4
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesTimersInParDo.class,
  UsesTestStream.class,
  UsesStatefulParDo.class,
  UsesStrictTimerOrdering.class
})
public void testEventTimeTimerOrdering() throws Exception {
  final int numTestElements = 100;
  final Instant now = new Instant(1500000000000L);
  TestStream.Builder<KV<String, String>> builder =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))
          .advanceWatermarkTo(new Instant(0));

  for (int i = 0; i < numTestElements; i++) {
    builder =
        builder.addElements(TimestampedValue.of(KV.of("dummy", "" + i), now.plus(i * 1000)));
    if ((i + 1) % 10 == 0) {
      builder = builder.advanceWatermarkTo(now.plus((i + 1) * 1000));
    }
  }

  testEventTimeTimerOrderingWithInputPTransform(
      now, numTestElements, builder.advanceWatermarkToInfinity());
}
 
Example #5
Source File: EvaluationContextTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void getExecutionContextDifferentKeysIndependentState() {
  DirectExecutionContext fooContext =
      context.getExecutionContext(createdProducer, StructuralKey.of("foo", StringUtf8Coder.of()));

  StateTag<BagState<Integer>> intBag = StateTags.bag("myBag", VarIntCoder.of());

  fooContext.getStepContext("s1").stateInternals().state(StateNamespaces.global(), intBag).add(1);

  DirectExecutionContext barContext =
      context.getExecutionContext(createdProducer, StructuralKey.of("bar", StringUtf8Coder.of()));
  assertThat(barContext, not(equalTo(fooContext)));
  assertThat(
      barContext
          .getStepContext("s1")
          .stateInternals()
          .state(StateNamespaces.global(), intBag)
          .read(),
      emptyIterable());
}
 
Example #6
Source File: WatchTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCoder() throws Exception {
  GrowthState pollingState =
      PollingGrowthState.of(
          ImmutableMap.of(
              HashCode.fromString("0123456789abcdef0123456789abcdef"), Instant.now(),
              HashCode.fromString("01230123012301230123012301230123"), Instant.now()),
          Instant.now(),
          "STATE");
  GrowthState nonPollingState =
      NonPollingGrowthState.of(
          Growth.PollResult.incomplete(Instant.now(), Arrays.asList("A", "B")));
  Coder<GrowthState> coder =
      Watch.GrowthStateCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());

  CoderProperties.coderDecodeEncodeEqual(coder, pollingState);
  CoderProperties.coderDecodeEncodeEqual(coder, nonPollingState);
}
 
Example #7
Source File: ProvidedSparkContextTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void testWithValidProvidedContext(JavaSparkContext jsc) throws Exception {
  SparkContextOptions options = getSparkContextOptions(jsc);

  Pipeline p = Pipeline.create(options);
  PCollection<String> inputWords = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
  PCollection<String> output =
      inputWords
          .apply(new WordCount.CountWords())
          .apply(MapElements.via(new WordCount.FormatAsTextFn()));

  PAssert.that(output).containsInAnyOrder(EXPECTED_COUNT_SET);

  // Run test from pipeline
  PipelineResult result = p.run();

  TestPipeline.verifyPAssertsSucceeded(p, result);
}
 
Example #8
Source File: StatefulTeamScoreTest.java    From deployment-examples with MIT License 6 votes vote down vote up
/**
 * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs
 * correctly for one team.
 */
@Test
public void testScoreUpdatesOneTeam() {

  TestStream<KV<String, GameActionInfo>> createEvents =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class)))
          .advanceWatermarkTo(baseTime)
          .addElements(
              event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)),
              event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)),
              event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)),
              event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)),
              event(TestUser.RED_TWO, 201, Duration.standardSeconds(50)))
          .advanceWatermarkToInfinity();

  PCollection<KV<String, Integer>> teamScores =
      p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100)));

  String redTeam = TestUser.RED_ONE.getTeam();

  PAssert.that(teamScores)
      .inWindow(GlobalWindow.INSTANCE)
      .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401));

  p.run().waitUntilFinish();
}
 
Example #9
Source File: WriteToGCSAvroTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Test whether {@link WriteToGCSAvro} throws an exception if temporary directory is not provided.
 */
@Test
public void testWriteWithoutTempLocation() {
  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("withTempLocation(tempLocation) called with null input. ");

  pipeline
      .apply(
          "CreateInput",
          Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())))
      .apply(
          "WriteTextFile(s)",
          WriteToGCSAvro.newBuilder()
              .withOutputDirectory(FAKE_DIR)
              .withOutputFilenamePrefix(AVRO_FILENAME_PREFIX)
              .setNumShards(NUM_SHARDS)
              .withTempLocation(null)
              .build());
  pipeline.run();
}
 
Example #10
Source File: WriteFiles.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public WriteFilesResult<DestinationT> expand(
    PCollection<List<FileResult<DestinationT>>> input) {

  List<PCollectionView<?>> finalizeSideInputs = Lists.newArrayList(getSideInputs());
  if (numShardsView != null) {
    finalizeSideInputs.add(numShardsView);
  }
  PCollection<KV<DestinationT, String>> outputFilenames =
      input
          .apply("Finalize", ParDo.of(new FinalizeFn()).withSideInputs(finalizeSideInputs))
          .setCoder(KvCoder.of(destinationCoder, StringUtf8Coder.of()))
          // Reshuffle the filenames to make sure they are observable downstream
          // only after each one is done finalizing.
          .apply(Reshuffle.viaRandomKey());

  TupleTag<KV<DestinationT, String>> perDestinationOutputFilenamesTag =
      new TupleTag<>("perDestinationOutputFilenames");
  return WriteFilesResult.in(
      input.getPipeline(), perDestinationOutputFilenamesTag, outputFilenames);
}
 
Example #11
Source File: StreamingDataflowWorkerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testHugeCommits() throws Exception {
  List<ParallelInstruction> instructions =
      Arrays.asList(
          makeSourceInstruction(StringUtf8Coder.of()),
          makeDoFnInstruction(new FanoutFn(), 0, StringUtf8Coder.of()),
          makeSinkInstruction(StringUtf8Coder.of(), 0));

  FakeWindmillServer server = new FakeWindmillServer(errorCollector);
  StreamingDataflowWorkerOptions options = createTestingPipelineOptions(server);
  StreamingDataflowWorker worker = makeWorker(instructions, options, true /* publishCounters */);
  worker.start();

  server.addWorkToOffer(makeInput(0, TimeUnit.MILLISECONDS.toMicros(0)));

  server.waitForAndGetCommits(0);
  worker.stop();
}
 
Example #12
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class})
public void testFixedWindowsCombine() {
  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              Create.timestamped(
                      TimestampedValue.of(KV.of("a", 1), new Instant(0L)),
                      TimestampedValue.of(KV.of("a", 1), new Instant(1L)),
                      TimestampedValue.of(KV.of("a", 4), new Instant(6L)),
                      TimestampedValue.of(KV.of("b", 1), new Instant(7L)),
                      TimestampedValue.of(KV.of("b", 13), new Instant(8L)))
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.into(FixedWindows.of(Duration.millis(2))));

  PCollection<Integer> sum =
      input.apply(Values.create()).apply(Combine.globally(new SumInts()).withoutDefaults());

  PCollection<KV<String, String>> sumPerKey = input.apply(Combine.perKey(new TestCombineFn()));

  PAssert.that(sum).containsInAnyOrder(2, 5, 13);
  PAssert.that(sumPerKey)
      .containsInAnyOrder(
          Arrays.asList(KV.of("a", "11"), KV.of("a", "4"), KV.of("b", "1"), KV.of("b", "13")));
  pipeline.run();
}
 
Example #13
Source File: ToStringTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testToStringIterableWithDelimiter() {
  ArrayList<Iterable<String>> iterables = new ArrayList<>();
  iterables.add(Arrays.asList(new String[] {"one", "two", "three"}));
  iterables.add(Arrays.asList(new String[] {"four", "five", "six"}));

  ArrayList<String> expected = new ArrayList<>();
  expected.add("one\ttwo\tthree");
  expected.add("four\tfive\tsix");

  PCollection<Iterable<String>> input =
      p.apply(Create.of(iterables).withCoder(IterableCoder.of(StringUtf8Coder.of())));
  PCollection<String> output = input.apply(ToString.iterables("\t"));
  PAssert.that(output).containsInAnyOrder(expected);
  p.run();
}
 
Example #14
Source File: CopyOnAccessInMemoryStateInternalsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCommitWithoutUnderlying() {
  CopyOnAccessInMemoryStateInternals<String> internals =
      CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
  StateNamespace namespace = new StateNamespaceForTest("foo");
  StateTag<BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
  BagState<String> stringBag = internals.state(namespace, bagTag);
  assertThat(stringBag.read(), emptyIterable());

  stringBag.add("bar");
  stringBag.add("baz");
  assertThat(stringBag.read(), containsInAnyOrder("baz", "bar"));

  internals.commit();

  BagState<String> reReadStringBag = internals.state(namespace, bagTag);
  assertThat(reReadStringBag.read(), containsInAnyOrder("baz", "bar"));
  assertThat(internals.isEmpty(), is(false));
}
 
Example #15
Source File: CountTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGlobalWindowErrorMessageShows() {
  PCollection<String> input = p.apply(Create.of(NO_LINES).withCoder(StringUtf8Coder.of()));
  PCollection<String> windowed =
      input.apply(Window.into(FixedWindows.of(Duration.standardDays(1))));

  String expected = Count.combineFn().getIncompatibleGlobalWindowErrorMessage();
  exceptionRule.expect(IllegalStateException.class);
  exceptionRule.expectMessage(expected);
  windowed.apply(Count.globally());
}
 
Example #16
Source File: ThriftIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests {@link ThriftIO#sink(TProtocolFactory)} and {@link ThriftIO#readFiles(Class)} with {@link
 * TJSONProtocol}.
 */
@Test
public void testReadWriteJsonProtocol() {

  mainPipeline
      .apply(
          Create.of(testThriftStructs)
              .withCoder(ThriftCoder.of(TestThriftStruct.class, tJsonProtocolFactory)))
      .apply(
          FileIO.<TestThriftStruct>write()
              .via(ThriftIO.sink(tJsonProtocolFactory))
              .to(temporaryFolder.getRoot().getAbsolutePath()));

  // Execute write pipeline
  mainPipeline.run().waitUntilFinish();

  // Read written files
  PCollection<TestThriftStruct> readDocs =
      readPipeline
          .apply(
              Create.of(temporaryFolder.getRoot().getAbsolutePath() + "/*")
                  .withCoder(StringUtf8Coder.of()))
          .apply(FileIO.matchAll())
          .apply(FileIO.readMatches())
          .apply(ThriftIO.readFiles(TestThriftStruct.class).withProtocol(tJsonProtocolFactory));

  // Assert
  PAssert.that(readDocs).containsInAnyOrder(testThriftStructs);

  // Execute read pipeline
  readPipeline.run().waitUntilFinish();
}
 
Example #17
Source File: CoderPropertiesTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void encode(String value, OutputStream outStream) throws CoderException, IOException {
  if (lostState == 0) {
    throw new RuntimeException("I forgot something...");
  }
  StringUtf8Coder.of().encode(value, outStream);
}
 
Example #18
Source File: DataflowPTransformMatchersTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Creates a simple pipeline with a {@link Combine.PerKey}. */
private static TestPipeline createCombinePerKeyPipeline() {
  TestPipeline pipeline = TestPipeline.create().enableAbandonedNodeEnforcement(false);
  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(Create.of(KV.of("key", 1)))
          .setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
  input.apply(Combine.perKey(new SumCombineFn()));

  return pipeline;
}
 
Example #19
Source File: JacksonTransformsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(expected = Pipeline.PipelineExecutionException.class)
public void failWritingWithoutCustomMapper() {
  pipeline
      .apply(Create.of(EMPTY_BEANS))
      .apply(AsJsons.of(MyEmptyBean.class))
      .setCoder(StringUtf8Coder.of());

  pipeline.run();
}
 
Example #20
Source File: WindmillStateInternalsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testValueNoChangePersist() throws Exception {
  StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
  underTest.state(NAMESPACE, addr);

  Windmill.WorkItemCommitRequest.Builder commitBuilder =
      Windmill.WorkItemCommitRequest.newBuilder();
  underTest.persist(commitBuilder);

  assertEquals(0, commitBuilder.getValueUpdatesCount());

  Mockito.verifyNoMoreInteractions(mockReader);
}
 
Example #21
Source File: PartialGroupByKeyParDoFnsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateWithCombinerAndStreamingSideInputs() throws Exception {
  StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
  options.setStreaming(true);

  Coder keyCoder = StringUtf8Coder.of();
  Coder valueCoder = BigEndianIntegerCoder.of();
  KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);

  TestOutputReceiver receiver =
      new TestOutputReceiver(
          new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)),
          counterSet,
          NameContextsForTests.nameContextForTest());

  when(mockSideInputReader.isEmpty()).thenReturn(false);
  when(mockStreamingStepContext.stateInternals()).thenReturn((StateInternals) mockStateInternals);
  when(mockStateInternals.state(Matchers.<StateNamespace>any(), Matchers.<StateTag>any()))
      .thenReturn(mockState);
  when(mockState.read()).thenReturn(Maps.newHashMap());

  ParDoFn pgbk =
      PartialGroupByKeyParDoFns.create(
          options,
          kvCoder,
          AppliedCombineFn.withInputCoder(
              Sum.ofIntegers(),
              CoderRegistry.createDefault(),
              kvCoder,
              ImmutableList.<PCollectionView<?>>of(),
              WindowingStrategy.globalDefault()),
          mockSideInputReader,
          receiver,
          mockStreamingStepContext);
  assertTrue(pgbk instanceof StreamingSideInputPGBKParDoFn);
}
 
Example #22
Source File: WithKeysTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testConstantVoidKeys() {

  PCollection<String> input =
      p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(StringUtf8Coder.of()));

  PCollection<KV<Void, String>> output = input.apply(WithKeys.of((Void) null));
  PAssert.that(output).containsInAnyOrder(WITH_CONST_NULL_KEYS);

  p.run();
}
 
Example #23
Source File: GroupIntoBatchesTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** test behavior when the number of input elements is not evenly divisible by batch size. */
@Test
@Category({NeedsRunner.class, UsesTimersInParDo.class, UsesStatefulParDo.class})
public void testWithUnevenBatches() {
  PCollection<KV<String, Iterable<String>>> collection =
      pipeline
          .apply("Input data", Create.of(createTestData(ODD_NUM_ELEMENTS)))
          .apply(GroupIntoBatches.ofSize(BATCH_SIZE))
          // set output coder
          .setCoder(KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of())));
  PAssert.that("Incorrect batch size in one or more elements", collection)
      .satisfies(
          new SerializableFunction<Iterable<KV<String, Iterable<String>>>, Void>() {

            private boolean checkBatchSizes(Iterable<KV<String, Iterable<String>>> listToCheck) {
              for (KV<String, Iterable<String>> element : listToCheck) {
                // number of elements should be less than or equal to BATCH_SIZE
                if (Iterables.size(element.getValue()) > BATCH_SIZE) {
                  return false;
                }
              }
              return true;
            }

            @Override
            public Void apply(Iterable<KV<String, Iterable<String>>> input) {
              assertTrue(checkBatchSizes(input));
              return null;
            }
          });
  PAssert.thatSingleton("Incorrect collection size", collection.apply("Count", Count.globally()))
      .isEqualTo(
          // round up division for positive numbers
          // https://math.stackexchange.com/questions/2591316/proof-for-integer-division-algorithm-that-rounds-up.
          (ODD_NUM_ELEMENTS + BATCH_SIZE - 1) / BATCH_SIZE);
  pipeline.run();
}
 
Example #24
Source File: UnboundedReadEvaluatorFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(expected = IOException.class)
public void sourceThrowingException() throws Exception {
  final TestUnboundedSource<String> source = new TestUnboundedSource<>(StringUtf8Coder.of());
  source.advanceWatermarkToInfinity = true;
  source.throwOnClose = true;
  processElement(source);
}
 
Example #25
Source File: InnerJoinTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(expected = NullPointerException.class)
public void testJoinLeftCollectionNull() {
  p.enableAbandonedNodeEnforcement(false);
  Join.innerJoin(
      null,
      p.apply(
          Create.of(rightListOfKv)
              .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))));
}
 
Example #26
Source File: FlattenTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testFlattenPCollectionsEmpty() {
  PCollection<String> output =
      PCollectionList.<String>empty(p)
          .apply(Flatten.pCollections())
          .setCoder(StringUtf8Coder.of());

  PAssert.that(output).empty();
  p.run();
}
 
Example #27
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testGroupByKeyAndWindows() {
  List<KV<String, Integer>> ungroupedPairs =
      Arrays.asList(
          KV.of("k1", 3), // window [0, 5)
          KV.of("k5", Integer.MAX_VALUE), // window [0, 5)
          KV.of("k5", Integer.MIN_VALUE), // window [0, 5)
          KV.of("k2", 66), // window [0, 5)
          KV.of("k1", 4), // window [5, 10)
          KV.of("k2", -33), // window [5, 10)
          KV.of("k3", 0)); // window [5, 10)

  PCollection<KV<String, Integer>> input =
      p.apply(
          Create.timestamped(ungroupedPairs, Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L))
              .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
  PCollection<KV<String, Iterable<Integer>>> output =
      input.apply(Window.into(FixedWindows.of(new Duration(5)))).apply(GroupByKey.create());

  PAssert.that(output)
      .satisfies(
          containsKvs(
              kv("k1", 3),
              kv("k1", 4),
              kv("k5", Integer.MAX_VALUE, Integer.MIN_VALUE),
              kv("k2", 66),
              kv("k2", -33),
              kv("k3", 0)));
  PAssert.that(output)
      .inWindow(new IntervalWindow(new Instant(0L), Duration.millis(5L)))
      .satisfies(
          containsKvs(
              kv("k1", 3), kv("k5", Integer.MIN_VALUE, Integer.MAX_VALUE), kv("k2", 66)));
  PAssert.that(output)
      .inWindow(new IntervalWindow(new Instant(5L), Duration.millis(5L)))
      .satisfies(containsKvs(kv("k1", 4), kv("k2", -33), kv("k3", 0)));

  p.run();
}
 
Example #28
Source File: FhirIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Result expand(PCollection<String> input) {
  PCollection<HealthcareIOError<String>> failedBundles;
  PCollection<HealthcareIOError<String>> failedImports;
  switch (this.getWriteMethod()) {
    case IMPORT:
      LOG.warn(
          "Make sure the Cloud Healthcare Service Agent has permissions when using import:"
              + " https://cloud.google.com/healthcare/docs/how-tos/permissions-healthcare-api-gcp-products#fhir_store_cloud_storage_permissions");
      ValueProvider<String> deadPath =
          getImportGcsDeadLetterPath().orElseThrow(IllegalArgumentException::new);
      FhirIO.Import.ContentStructure contentStructure =
          getContentStructure().orElseThrow(IllegalArgumentException::new);
      ValueProvider<String> tempPath =
          getImportGcsTempPath()
              .orElse(
                  StaticValueProvider.of(input.getPipeline().getOptions().getTempLocation()));

      return input.apply(new Import(getFhirStore(), tempPath, deadPath, contentStructure));
    case EXECUTE_BUNDLE:
    default:
      failedBundles =
          input
              .apply(
                  "Execute FHIR Bundles",
                  ParDo.of(new ExecuteBundles.ExecuteBundlesFn(this.getFhirStore())))
              .setCoder(HealthcareIOErrorCoder.of(StringUtf8Coder.of()));
  }
  return Result.in(input.getPipeline(), failedBundles);
}
 
Example #29
Source File: ViewEvaluatorFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testInMemoryEvaluator() throws Exception {
  PCollection<String> input = p.apply(Create.of("foo", "bar"));
  PCollectionView<Iterable<String>> pCollectionView = input.apply(View.asIterable());
  PCollection<Iterable<String>> concat =
      input
          .apply(WithKeys.of((Void) null))
          .setCoder(KvCoder.of(VoidCoder.of(), StringUtf8Coder.of()))
          .apply(GroupByKey.create())
          .apply(Values.create());
  PCollection<Iterable<String>> view =
      concat.apply(new ViewOverrideFactory.WriteView<>(pCollectionView));

  EvaluationContext context = mock(EvaluationContext.class);
  TestViewWriter<String, Iterable<String>> viewWriter = new TestViewWriter<>();
  when(context.createPCollectionViewWriter(concat, pCollectionView)).thenReturn(viewWriter);

  CommittedBundle<String> inputBundle = bundleFactory.createBundle(input).commit(Instant.now());
  AppliedPTransform<?, ?, ?> producer = DirectGraphs.getProducer(view);
  TransformEvaluator<Iterable<String>> evaluator =
      new ViewEvaluatorFactory(context).forApplication(producer, inputBundle);

  evaluator.processElement(WindowedValue.valueInGlobalWindow(ImmutableList.of("foo", "bar")));
  assertThat(viewWriter.latest, nullValue());

  evaluator.finishBundle();
  assertThat(
      viewWriter.latest,
      containsInAnyOrder(
          WindowedValue.valueInGlobalWindow("foo"), WindowedValue.valueInGlobalWindow("bar")));
}
 
Example #30
Source File: BigQueryInsertErrorCoder.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void encode(BigQueryInsertError value, OutputStream outStream) throws IOException {
  String errorStrValue = MAPPER.writeValueAsString(value.getError());
  StringUtf8Coder.of().encode(errorStrValue, outStream);

  TableRowJsonCoder.of().encode(value.getRow(), outStream);

  StringUtf8Coder.of().encode(BigQueryHelpers.toTableSpec(value.getTable()), outStream);
}