org.apache.beam.sdk.coders.VoidCoder Java Examples

The following examples show how to use org.apache.beam.sdk.coders.VoidCoder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExecutableStageDoFnOperator.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("ByteBufferBackingArray")
void cleanupState(StateInternals stateInternals, Consumer<ByteBuffer> keyContextConsumer)
    throws Exception {
  while (!cleanupQueue.isEmpty()) {
    KV<ByteBuffer, BoundedWindow> kv = Preconditions.checkNotNull(cleanupQueue.remove());
    BoundedWindow window = Preconditions.checkNotNull(kv.getValue());
    keyContextConsumer.accept(kv.getKey());
    // Check whether we have pending timers which were set during the bundle.
    if (hasPendingEventTimeTimers.apply(window.maxTimestamp().getMillis())) {
      // Re-add GC timer and let remaining timers fire. Don't cleanup state yet.
      cleanupTimer.setCleanupTimer(window);
    } else {
      if (LOG.isDebugEnabled()) {
        LOG.debug("State cleanup for {} {}", Arrays.toString(kv.getKey().array()), window);
      }
      // No more timers (finally!). Time to clean up.
      for (String userState : userStateNames) {
        StateNamespace namespace = StateNamespaces.window(windowCoder, window);
        StateTag<BagState<Void>> bagStateStateTag = StateTags.bag(userState, VoidCoder.of());
        BagState<?> state = stateInternals.state(namespace, bagStateStateTag);
        state.clear();
      }
    }
  }
}
 
Example #2
Source File: ExcelHdfsFileSource.java    From components with Apache License 2.0 6 votes vote down vote up
private ExcelHdfsFileSource(UgiDoAs doAs, String filepattern, LazyAvroCoder<IndexedRecord> lac, int limit, String encoding, String sheetName, long header, long footer, String excelFormat, ExtraHadoopConfiguration extraConfig,
        SerializableSplit serializableSplit) {
    super(doAs, filepattern, ExcelFileInputFormat.class, Void.class, IndexedRecord.class, extraConfig, serializableSplit);
    
    this.lac = lac;
    setDefaultCoder(VoidCoder.of(), (LazyAvroCoder) lac);
    
    ExtraHadoopConfiguration hadoop_config = getExtraHadoopConfiguration();
    hadoop_config.set(ExcelFileInputFormat.TALEND_ENCODING, encoding);
    hadoop_config.set(ExcelFileInputFormat.TALEND_EXCEL_SHEET_NAME, sheetName);
    hadoop_config.set(ExcelFileInputFormat.TALEND_HEADER, String.valueOf(header));
    hadoop_config.set(ExcelFileInputFormat.TALEND_FOOTER, String.valueOf(footer));
    hadoop_config.set(ExcelFileInputFormat.TALEND_EXCEL_FORMAT, excelFormat);
    
    //set it to the reader for performance
    hadoop_config.set(ExcelFileInputFormat.TALEND_EXCEL_LIMIT, String.valueOf(limit));
}
 
Example #3
Source File: Combine.java    From beam with Apache License 2.0 6 votes vote down vote up
private PCollection<OutputT> insertDefaultValueIfEmpty(PCollection<OutputT> maybeEmpty) {
  final PCollectionView<Iterable<OutputT>> maybeEmptyView = maybeEmpty.apply(View.asIterable());

  final OutputT defaultValue = fn.defaultValue();
  PCollection<OutputT> defaultIfEmpty =
      maybeEmpty
          .getPipeline()
          .apply("CreateVoid", Create.of((Void) null).withCoder(VoidCoder.of()))
          .apply(
              "ProduceDefault",
              ParDo.of(
                      new DoFn<Void, OutputT>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          Iterator<OutputT> combined = c.sideInput(maybeEmptyView).iterator();
                          if (!combined.hasNext()) {
                            c.output(defaultValue);
                          }
                        }
                      })
                  .withSideInputs(maybeEmptyView))
          .setCoder(maybeEmpty.getCoder())
          .setWindowingStrategyInternal(maybeEmpty.getWindowingStrategy());

  return PCollectionList.of(maybeEmpty).and(defaultIfEmpty).apply(Flatten.pCollections());
}
 
Example #4
Source File: BatchSideInputHandlerFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void singleElementForCollection() {
  when(context.getSideInput(COLLECTION_ID))
      .thenReturn(
          Arrays.asList(WindowedValue.valueInGlobalWindow(KV.<Void, Integer>of(null, 3))));

  BatchSideInputHandlerFactory factory =
      BatchSideInputHandlerFactory.forStage(EXECUTABLE_STAGE, context);
  MultimapSideInputHandler<Void, Integer, GlobalWindow> handler =
      factory.forMultimapSideInput(
          TRANSFORM_ID,
          SIDE_INPUT_NAME,
          KvCoder.of(VoidCoder.of(), VarIntCoder.of()),
          GlobalWindow.Coder.INSTANCE);
  Iterable<Void> keys = handler.get(GlobalWindow.INSTANCE);
  assertThat(keys, contains((Void) null));
  Iterable<Integer> values = handler.get(null, GlobalWindow.INSTANCE);
  assertThat(values, contains(3));
}
 
Example #5
Source File: DisplayDataEvaluator.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Traverse the specified {@link PTransform}, collecting {@link DisplayData} registered on the
 * inner primitive {@link PTransform PTransforms}.
 *
 * @param root The root {@link PTransform} to traverse
 * @param inputCoder The coder to set for the {@link PTransform} input, or null to infer the
 *     default coder.
 * @return the set of {@link DisplayData} for primitive {@link PTransform PTransforms}.
 */
public <InputT> Set<DisplayData> displayDataForPrimitiveTransforms(
    final PTransform<? super PCollection<InputT>, ? extends POutput> root,
    Coder<InputT> inputCoder) {

  Create.Values<InputT> input;
  if (inputCoder != null) {
    input = Create.empty(inputCoder);
  } else {
    // These types don't actually work, but the pipeline will never be run
    input = (Create.Values<InputT>) Create.empty(VoidCoder.of());
  }

  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply("Input", input).apply("Transform", root);

  return displayDataForPipeline(pipeline, root);
}
 
Example #6
Source File: FlattenTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesSideInputs.class})
public void testEmptyFlattenAsSideInput() {
  final PCollectionView<Iterable<String>> view =
      PCollectionList.<String>empty(p)
          .apply(Flatten.pCollections())
          .setCoder(StringUtf8Coder.of())
          .apply(View.asIterable());

  PCollection<String> output =
      p.apply(Create.of((Void) null).withCoder(VoidCoder.of()))
          .apply(
              ParDo.of(
                      new DoFn<Void, String>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          for (String side : c.sideInput(view)) {
                            c.output(side);
                          }
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output).empty();
  p.run();
}
 
Example #7
Source File: BatchSideInputHandlerFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void emptyResultForEmptyCollection() {
  BatchSideInputHandlerFactory factory =
      BatchSideInputHandlerFactory.forStage(EXECUTABLE_STAGE, context);
  MultimapSideInputHandler<Void, Integer, GlobalWindow> handler =
      factory.forMultimapSideInput(
          TRANSFORM_ID,
          SIDE_INPUT_NAME,
          KvCoder.of(VoidCoder.of(), VarIntCoder.of()),
          GlobalWindow.Coder.INSTANCE);
  // We never populated the broadcast variable for "side-input", so the mock will return an empty
  // list.
  Iterable<Void> keys = handler.get(GlobalWindow.INSTANCE);
  assertThat(keys, emptyIterable());
  Iterable<Integer> values = handler.get(null, GlobalWindow.INSTANCE);
  assertThat(values, emptyIterable());
}
 
Example #8
Source File: IsmSinkTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteEmptyKeyWithValueLargerThanBlockSize() throws Throwable {
  IsmSink<byte[]> sink =
      new IsmSink<>(
          FileSystems.matchNewResource(tmpFolder.newFile().getPath(), false),
          IsmRecordCoder.of(
              1, // We hash using only the window
              0, // There are no metadata records
              // We specifically use a coder that encodes to 0 bytes.
              ImmutableList.<Coder<?>>of(VoidCoder.of()),
              ByteArrayCoder.of()),
          BLOOM_FILTER_SIZE_LIMIT);
  SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
  sinkWriter.add(
      new ValueInEmptyWindows<>(
          IsmRecord.of(
              Arrays.asList(new Object[] {null}), new byte[IsmSink.BLOCK_SIZE_BYTES * 2])));
  sinkWriter.close();
}
 
Example #9
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesTimersInParDo.class,
  UsesTestStream.class,
  UsesStrictTimerOrdering.class
})
public void testTwoTimersSettingEachOther() {
  Instant now = new Instant(1500000000000L);
  Instant end = now.plus(100);
  TestStream<KV<Void, Void>> input =
      TestStream.create(KvCoder.of(VoidCoder.of(), VoidCoder.of()))
          .addElements(KV.of(null, null))
          .advanceWatermarkToInfinity();
  pipeline.apply(TwoTimerTest.of(now, end, input));
  pipeline.run();
}
 
Example #10
Source File: DoFnInvokersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testDefaultWatermarkEstimatorStateAndCoder() throws Exception {
  class MockFn extends DoFn<String, String> {
    @ProcessElement
    public void processElement(
        ProcessContext c, RestrictionTracker<RestrictionWithDefaultTracker, Void> tracker) {}

    @GetInitialRestriction
    public RestrictionWithDefaultTracker getInitialRestriction(@Element String element) {
      return null;
    }
  }

  MockFn fn = mock(MockFn.class);
  DoFnInvoker<String, String> invoker = DoFnInvokers.invokerFor(fn);

  CoderRegistry coderRegistry = CoderRegistry.createDefault();
  coderRegistry.registerCoderProvider(
      CoderProviders.fromStaticMethods(
          RestrictionWithDefaultTracker.class, CoderForDefaultTracker.class));
  assertEquals(VoidCoder.of(), invoker.invokeGetWatermarkEstimatorStateCoder(coderRegistry));
  assertNull(invoker.invokeGetInitialWatermarkEstimatorState(new FakeArgumentProvider<>()));
}
 
Example #11
Source File: ReifyAsIterable.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<Iterable<T>> expand(PCollection<T> input) {
  final PCollectionView<Iterable<T>> view = input.apply(View.asIterable());
  return input
      .getPipeline()
      .apply(Create.of((Void) null).withCoder(VoidCoder.of()))
      .apply(
          ParDo.of(
                  new DoFn<Void, Iterable<T>>() {
                    @ProcessElement
                    public void processElement(ProcessContext c) {
                      c.output(c.sideInput(view));
                    }
                  })
              .withSideInputs(view))
      .setCoder(IterableCoder.of(input.getCoder()));
}
 
Example #12
Source File: DataflowSideInputHandlerFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void invalidSideInputThrowsException() {
  ImmutableMap<String, SideInputReader> sideInputReadersMap =
      ImmutableMap.<String, SideInputReader>builder().build();

  ImmutableMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>
      sideInputIdToPCollectionViewMap =
          ImmutableMap.<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>builder()
              .build();

  DataflowSideInputHandlerFactory factory =
      DataflowSideInputHandlerFactory.of(sideInputReadersMap, sideInputIdToPCollectionViewMap);
  thrown.expect(instanceOf(IllegalStateException.class));
  factory.forMultimapSideInput(
      TRANSFORM_ID,
      SIDE_INPUT_NAME,
      KvCoder.of(VoidCoder.of(), VoidCoder.of()),
      GlobalWindow.Coder.INSTANCE);
}
 
Example #13
Source File: BigQueryToTableIT.java    From beam with Apache License 2.0 6 votes vote down vote up
private void runBigQueryToTablePipeline(BigQueryToTableOptions options) {
  Pipeline p = Pipeline.create(options);
  BigQueryIO.Read bigQueryRead = BigQueryIO.read().fromQuery(options.getQuery());
  if (options.getUsingStandardSql()) {
    bigQueryRead = bigQueryRead.usingStandardSql();
  }
  PCollection<TableRow> input = p.apply(bigQueryRead);
  if (options.getReshuffle()) {
    input =
        input
            .apply(WithKeys.<Void, TableRow>of((Void) null))
            .setCoder(KvCoder.of(VoidCoder.of(), TableRowJsonCoder.of()))
            .apply(Reshuffle.<Void, TableRow>of())
            .apply(Values.<TableRow>create());
  }
  input.apply(
      BigQueryIO.writeTableRows()
          .to(options.getOutput())
          .withSchema(options.getOutputSchema())
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED));

  p.run().waitUntilFinish();
}
 
Example #14
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void testStreamingWriteOverride(PipelineOptions options, int expectedNumShards) {
  TestPipeline p = TestPipeline.fromOptions(options);

  StreamingShardedWriteFactory<Object, Void, Object> factory =
      new StreamingShardedWriteFactory<>(p.getOptions());
  WriteFiles<Object, Void, Object> original = WriteFiles.to(new TestSink(tmpFolder.toString()));
  PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));
  AppliedPTransform<PCollection<Object>, WriteFilesResult<Void>, WriteFiles<Object, Void, Object>>
      originalApplication =
          AppliedPTransform.of("writefiles", objs.expand(), Collections.emptyMap(), original, p);

  WriteFiles<Object, Void, Object> replacement =
      (WriteFiles<Object, Void, Object>)
          factory.getReplacementTransform(originalApplication).getTransform();
  assertThat(replacement, not(equalTo((Object) original)));
  assertThat(replacement.getNumShardsProvider().get(), equalTo(expectedNumShards));

  WriteFilesResult<Void> originalResult = objs.apply(original);
  WriteFilesResult<Void> replacementResult = objs.apply(replacement);
  Map<PValue, ReplacementOutput> res =
      factory.mapOutputs(originalResult.expand(), replacementResult);
  assertEquals(1, res.size());
  assertEquals(
      originalResult.getPerDestinationOutputFilenames(),
      res.get(replacementResult.getPerDestinationOutputFilenames()).getOriginal().getValue());
}
 
Example #15
Source File: TransformInputsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void nonAdditionalInputsWithAdditionalInputsSucceeds() {
  Map<TupleTag<?>, PValue> additionalInputs = new HashMap<>();
  additionalInputs.put(new TupleTag<String>() {}, pipeline.apply(Create.of("1, 2", "3")));
  additionalInputs.put(new TupleTag<Long>() {}, pipeline.apply(GenerateSequence.from(3L)));

  Map<TupleTag<?>, PValue> allInputs = new HashMap<>();
  PCollection<Integer> mainInts = pipeline.apply("MainInput", Create.of(12, 3));
  allInputs.put(new TupleTag<Integer>() {}, mainInts);
  PCollection<Void> voids = pipeline.apply("VoidInput", Create.empty(VoidCoder.of()));
  allInputs.put(new TupleTag<Void>() {}, voids);
  allInputs.putAll(additionalInputs);

  AppliedPTransform<PInput, POutput, TestTransform> transform =
      AppliedPTransform.of(
          "additional",
          allInputs,
          Collections.emptyMap(),
          new TestTransform(additionalInputs),
          pipeline);

  assertThat(
      TransformInputs.nonAdditionalInputs(transform),
      Matchers.containsInAnyOrder(mainInts, voids));
}
 
Example #16
Source File: SamzaStoreStateInternals.java    From beam with Apache License 2.0 6 votes vote down vote up
static Factory createStateInternalFactory(
    String id,
    Coder<?> keyCoder,
    TaskContext context,
    SamzaPipelineOptions pipelineOptions,
    DoFnSignature signature) {
  final int batchGetSize = pipelineOptions.getStoreBatchGetSize();
  final Map<String, KeyValueStore<ByteArray, byte[]>> stores = new HashMap<>();
  stores.put(BEAM_STORE, getBeamStore(context));

  final Coder stateKeyCoder;
  if (keyCoder != null) {
    signature
        .stateDeclarations()
        .keySet()
        .forEach(
            stateId ->
                stores.put(
                    stateId, (KeyValueStore<ByteArray, byte[]>) context.getStore(stateId)));
    stateKeyCoder = keyCoder;
  } else {
    stateKeyCoder = VoidCoder.of();
  }
  return new Factory<>(Objects.toString(id), stores, stateKeyCoder, batchGetSize);
}
 
Example #17
Source File: FlinkTransformOverridesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRunnerDeterminedSharding() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(TestFlinkRunner.class);
  options.setFlinkMaster("[auto]");
  options.setParallelism(5);

  TestPipeline p = TestPipeline.fromOptions(options);

  StreamingShardedWriteFactory<Object, Void, Object> factory =
      new StreamingShardedWriteFactory<>(p.getOptions());

  WriteFiles<Object, Void, Object> original = WriteFiles.to(new TestSink(tmpFolder.toString()));
  @SuppressWarnings("unchecked")
  PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));
  AppliedPTransform<PCollection<Object>, WriteFilesResult<Void>, WriteFiles<Object, Void, Object>>
      originalApplication =
          AppliedPTransform.of("writefiles", objs.expand(), Collections.emptyMap(), original, p);

  WriteFiles<Object, Void, Object> replacement =
      (WriteFiles<Object, Void, Object>)
          factory.getReplacementTransform(originalApplication).getTransform();

  assertThat(replacement, not(equalTo((Object) original)));
  assertThat(replacement.getNumShardsProvider().get(), is(10));
}
 
Example #18
Source File: FlinkStateInternals.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void add(T input) {
  try {
    ListState<T> partitionedState =
        flinkStateBackend.getPartitionedState(
            namespace.stringKey(), StringSerializer.INSTANCE, flinkStateDescriptor);
    if (storesVoidValues) {
      Preconditions.checkState(input == null, "Expected to a null value but was: %s", input);
      // Flink does not allow storing null values
      // If we have null values, we use the structural null value
      input = (T) VoidCoder.of().structuralValue((Void) input);
    }
    partitionedState.add(input);
  } catch (Exception e) {
    throw new RuntimeException("Error adding to bag state.", e);
  }
}
 
Example #19
Source File: ViewEvaluatorFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testInMemoryEvaluator() throws Exception {
  PCollection<String> input = p.apply(Create.of("foo", "bar"));
  PCollectionView<Iterable<String>> pCollectionView = input.apply(View.asIterable());
  PCollection<Iterable<String>> concat =
      input
          .apply(WithKeys.of((Void) null))
          .setCoder(KvCoder.of(VoidCoder.of(), StringUtf8Coder.of()))
          .apply(GroupByKey.create())
          .apply(Values.create());
  PCollection<Iterable<String>> view =
      concat.apply(new ViewOverrideFactory.WriteView<>(pCollectionView));

  EvaluationContext context = mock(EvaluationContext.class);
  TestViewWriter<String, Iterable<String>> viewWriter = new TestViewWriter<>();
  when(context.createPCollectionViewWriter(concat, pCollectionView)).thenReturn(viewWriter);

  CommittedBundle<String> inputBundle = bundleFactory.createBundle(input).commit(Instant.now());
  AppliedPTransform<?, ?, ?> producer = DirectGraphs.getProducer(view);
  TransformEvaluator<Iterable<String>> evaluator =
      new ViewEvaluatorFactory(context).forApplication(producer, inputBundle);

  evaluator.processElement(WindowedValue.valueInGlobalWindow(ImmutableList.of("foo", "bar")));
  assertThat(viewWriter.latest, nullValue());

  evaluator.finishBundle();
  assertThat(
      viewWriter.latest,
      containsInAnyOrder(
          WindowedValue.valueInGlobalWindow("foo"), WindowedValue.valueInGlobalWindow("bar")));
}
 
Example #20
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollectionTuple expand(PCollection<Integer> input) {
  PCollection<Integer> sum = input.apply(Sum.integersGlobally());

  // Fails here when attempting to construct a tuple with an unbound object.
  return PCollectionTuple.of(sumTag, sum)
      .and(
          doneTag,
          PCollection.createPrimitiveOutputInternal(
              input.getPipeline(),
              WindowingStrategy.globalDefault(),
              input.isBounded(),
              VoidCoder.of()));
}
 
Example #21
Source File: DataflowRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translate(StreamingPubsubIOWrite transform, TranslationContext context) {
  checkArgument(
      context.getPipelineOptions().isStreaming(),
      "StreamingPubsubIOWrite is only for streaming pipelines.");
  PubsubUnboundedSink overriddenTransform = transform.getOverriddenTransform();
  StepTranslationContext stepContext = context.addStep(transform, "ParallelWrite");
  stepContext.addInput(PropertyNames.FORMAT, "pubsub");
  if (overriddenTransform.getTopicProvider().isAccessible()) {
    stepContext.addInput(
        PropertyNames.PUBSUB_TOPIC, overriddenTransform.getTopic().getV1Beta1Path());
  } else {
    stepContext.addInput(
        PropertyNames.PUBSUB_TOPIC_OVERRIDE,
        ((NestedValueProvider) overriddenTransform.getTopicProvider()).propertyName());
  }
  if (overriddenTransform.getTimestampAttribute() != null) {
    stepContext.addInput(
        PropertyNames.PUBSUB_TIMESTAMP_ATTRIBUTE, overriddenTransform.getTimestampAttribute());
  }
  if (overriddenTransform.getIdAttribute() != null) {
    stepContext.addInput(
        PropertyNames.PUBSUB_ID_ATTRIBUTE, overriddenTransform.getIdAttribute());
  }
  stepContext.addInput(
      PropertyNames.PUBSUB_SERIALIZED_ATTRIBUTES_FN,
      byteArrayToJsonString(serializeToByteArray(new IdentityMessageFn())));
  // No coder is needed in this case since the collection being written is already of
  // PubsubMessage, however the Dataflow backend require a coder to be set.
  stepContext.addEncodingInput(WindowedValue.getValueOnlyCoder(VoidCoder.of()));
  stepContext.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
}
 
Example #22
Source File: ShuffleReaderFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateRichPartitioningShuffleReader() throws Exception {
  runTestCreatePartitioningShuffleReader(
      new byte[] {(byte) 0xE2},
      "aaa",
      "zzz",
      BigEndianIntegerCoder.of(),
      FullWindowedValueCoder.of(
          KvCoder.of(StringUtf8Coder.of(), VoidCoder.of()), IntervalWindow.getCoder()));
}
 
Example #23
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testStageBundleClosed() throws Exception {
  TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
  DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of());
  ExecutableStageDoFnOperator<Integer, Integer> operator =
      getOperator(mainOutput, Collections.emptyList(), outputManagerFactory);

  OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness =
      new OneInputStreamOperatorTestHarness<>(operator);

  RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
  when(bundle.getInputReceivers())
      .thenReturn(
          ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder()
              .put("input", Mockito.mock(FnDataReceiver.class))
              .build());
  when(stageBundleFactory.getBundle(any(), any(), any(), any())).thenReturn(bundle);

  testHarness.open();
  testHarness.close();

  verify(stageBundleFactory).close();
  verify(stageContext).close();
  verifyNoMoreInteractions(stageBundleFactory);

  // close() will also call dispose(), but call again to verify no new bundle
  // is created afterwards
  operator.dispose();
  verifyNoMoreInteractions(bundle);
}
 
Example #24
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void expectedInputsAreSent() throws Exception {
  TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
  DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of());
  ExecutableStageDoFnOperator<Integer, Integer> operator =
      getOperator(mainOutput, Collections.emptyList(), outputManagerFactory);

  @SuppressWarnings("unchecked")
  RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
  when(stageBundleFactory.getBundle(any(), any(), any(), any())).thenReturn(bundle);

  @SuppressWarnings("unchecked")
  FnDataReceiver<WindowedValue<?>> receiver = Mockito.mock(FnDataReceiver.class);
  when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));

  WindowedValue<Integer> one = WindowedValue.valueInGlobalWindow(1);
  WindowedValue<Integer> two = WindowedValue.valueInGlobalWindow(2);
  WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);

  OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness =
      new OneInputStreamOperatorTestHarness<>(operator);

  testHarness.open();

  testHarness.processElement(new StreamRecord<>(one));
  testHarness.processElement(new StreamRecord<>(two));
  testHarness.processElement(new StreamRecord<>(three));

  verify(receiver).accept(one);
  verify(receiver).accept(two);
  verify(receiver).accept(three);
  verifyNoMoreInteractions(receiver);

  testHarness.close();
}
 
Example #25
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void sdkErrorsSurfaceOnClose() throws Exception {
  TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
  DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of());
  ExecutableStageDoFnOperator<Integer, Integer> operator =
      getOperator(mainOutput, Collections.emptyList(), outputManagerFactory);

  OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness =
      new OneInputStreamOperatorTestHarness<>(operator);

  testHarness.open();

  @SuppressWarnings("unchecked")
  RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
  when(stageBundleFactory.getBundle(any(), any(), any(), any())).thenReturn(bundle);

  @SuppressWarnings("unchecked")
  FnDataReceiver<WindowedValue<?>> receiver = Mockito.mock(FnDataReceiver.class);
  when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));

  Exception expected = new RuntimeException(new Exception());
  doThrow(expected).when(bundle).close();
  thrown.expectCause(is(expected));

  operator.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(0)));
  testHarness.close();
}
 
Example #26
Source File: FlinkKeyUtilsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testNullKey() {
  Void key = null;
  VoidCoder coder = VoidCoder.of();

  ByteBuffer byteBuffer = FlinkKeyUtils.encodeKey(key, coder);
  assertThat(FlinkKeyUtils.decodeKey(byteBuffer, coder), is(nullValue()));
}
 
Example #27
Source File: ExcelHdfsFileSource.java    From components with Apache License 2.0 5 votes vote down vote up
private ExcelHdfsFileSource(UgiDoAs doAs, String filepattern, LazyAvroCoder<IndexedRecord> lac, ExtraHadoopConfiguration extraConfig,
        SerializableSplit serializableSplit) {
    super(doAs, filepattern, ExcelFileInputFormat.class, Void.class, IndexedRecord.class, extraConfig, serializableSplit);
    
    this.lac = lac;
    setDefaultCoder(VoidCoder.of(), (LazyAvroCoder) lac);
}
 
Example #28
Source File: ParquetHdfsFileSource.java    From components with Apache License 2.0 5 votes vote down vote up
private ParquetHdfsFileSource(UgiDoAs doAs, String filepattern, LazyAvroCoder<IndexedRecord> lac,
        ExtraHadoopConfiguration extraConfig, SerializableSplit serializableSplit) {
    super(doAs, filepattern, (Class) AvroParquetInputFormat.class, Void.class, IndexedRecord.class, extraConfig,
            serializableSplit);
    this.lac = lac;
    setDefaultCoder(VoidCoder.of(), (LazyAvroCoder) lac);
}
 
Example #29
Source File: SimpleRecordFormatParquetIO.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
public PDone write(PCollection<IndexedRecord> in) {
    ParquetHdfsFileSink sink = new ParquetHdfsFileSink(doAs, path, overwrite, mergeOutput);
    sink.getExtraHadoopConfiguration().addFrom(getExtraHadoopConfiguration());

    PCollection<KV<Void, IndexedRecord>> pc1 = in.apply(ParDo.of(new FormatParquet()));
    pc1 = pc1.setCoder(KvCoder.of(VoidCoder.of(), LazyAvroCoder.of()));
    if (in.isBounded() == PCollection.IsBounded.BOUNDED) {
        return pc1.apply(Write.to(sink));
    } else {
        return pc1.apply(UnboundedWrite.of(sink));
    }
}
 
Example #30
Source File: RunnerHarnessCoderCloudObjectTranslatorRegistrar.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, CloudObjectTranslator<? extends Coder>> classNamesToTranslators() {
  return ImmutableMap.<String, CloudObjectTranslator<? extends Coder>>builder()
      .put("kind:ism_record", new IsmRecordCoderCloudObjectTranslator())
      .put("kind:fixed_big_endian_int32", atomic(BigEndianIntegerCoder.class))
      .put("kind:fixed_big_endian_int64", atomic(BigEndianLongCoder.class))
      .put("kind:var_int32", atomic(VarIntCoder.class))
      .put("kind:varint", atomic(VarLongCoder.class))
      .put("kind:void", atomic(VoidCoder.class))
      .build();
}