org.apache.beam.sdk.values.PCollectionView Java Examples

The following examples show how to use org.apache.beam.sdk.values.PCollectionView. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopFormatIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PDone expand(PCollection<KV<KeyT, ValueT>> input) {

  // streamed pipeline must have defined configuration transformation
  if (input.isBounded().equals(PCollection.IsBounded.UNBOUNDED)
      || !input.getWindowingStrategy().equals(WindowingStrategy.globalDefault())) {
    checkArgument(
        configTransform != null,
        "Writing of unbounded data can be processed only with configuration transformation provider. See %s.withConfigurationTransform()",
        Write.class);
  }

  verifyInputWindowing(input);

  TypeDescriptor<Configuration> configType = new TypeDescriptor<Configuration>() {};
  input
      .getPipeline()
      .getCoderRegistry()
      .registerCoderForType(configType, new ConfigurationCoder());

  PCollectionView<Configuration> configView = createConfigurationView(input);

  return processJob(input, configView);
}
 
Example #2
Source File: CoGroup.java    From beam with Apache License 2.0 6 votes vote down vote up
private JoinInformation(
    KeyedPCollectionTuple<Row> keyedPCollectionTuple,
    Map<String, PCollectionView<Map<Row, Iterable<Row>>>> sideInputs,
    Schema keySchema,
    Map<String, Schema> componentSchemas,
    Map<Integer, SerializableFunction<Object, Row>> toRows,
    List<String> sortedTags,
    Map<Integer, String> tagToKeyedTag) {
  this.keyedPCollectionTuple = keyedPCollectionTuple;
  this.sideInputs = sideInputs;
  this.keySchema = keySchema;
  this.componentSchemas = componentSchemas;
  this.toRows = toRows;
  this.sortedTags = sortedTags;
  this.tagToKeyedTag = tagToKeyedTag;
}
 
Example #3
Source File: FlinkDoFnFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
public FlinkDoFnFunction(
    DoFn<InputT, OutputT> doFn,
    String stepName,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions options,
    Map<TupleTag<?>, Integer> outputMap,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {

  this.doFn = doFn;
  this.stepName = stepName;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(options);
  this.windowingStrategy = windowingStrategy;
  this.outputMap = outputMap;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.outputCoderMap = outputCoderMap;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example #4
Source File: WriteFiles.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<FileResult<DestinationT>> expand(PCollection<UserT> input) {
  List<PCollectionView<?>> shardingSideInputs = Lists.newArrayList(getSideInputs());
  if (numShardsView != null) {
    shardingSideInputs.add(numShardsView);
  }

  ShardingFunction<UserT, DestinationT> shardingFunction =
      getShardingFunction() == null
          ? new RandomShardingFunction(destinationCoder)
          : getShardingFunction();

  return input
      .apply(
          "ApplyShardingKey",
          ParDo.of(new ApplyShardingFunctionFn(shardingFunction, numShardsView))
              .withSideInputs(shardingSideInputs))
      .setCoder(KvCoder.of(ShardedKeyCoder.of(VarIntCoder.of()), input.getCoder()))
      .apply("GroupIntoShards", GroupByKey.create())
      .apply(
          "WriteShardsIntoTempFiles",
          ParDo.of(new WriteShardsIntoTempFilesFn()).withSideInputs(getSideInputs()))
      .setCoder(fileResultCoder);
}
 
Example #5
Source File: BroadcastHashJoinTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Creates new {@link PCollectionView} of given {@code pCollectionToView} iff there is no {@link
 * PCollectionView} already associated with {@code Key}.
 *
 * @param pCollectionToView a {@link PCollection} view will be created from by applying {@link
 *     View#asMultimap()}
 * @param <V> value key type
 * @return the current (already existing or computed) value associated with the specified key
 */
private <V> PCollectionView<Map<KeyT, Iterable<V>>> computeViewAsMultimapIfAbsent(
    PCollection<V> pcollection,
    UnaryFunction<?, KeyT> keyExtractor,
    final PCollection<KV<KeyT, V>> pCollectionToView) {

  PCollectionView<?> view = pViews.get(pcollection, keyExtractor);
  if (view == null) {
    view = pCollectionToView.apply(View.asMultimap());
    pViews.put(pcollection, keyExtractor, view);
  }

  @SuppressWarnings("unchecked")
  PCollectionView<Map<KeyT, Iterable<V>>> ret = (PCollectionView<Map<KeyT, Iterable<V>>>) view;
  return ret;
}
 
Example #6
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteWithMissingSchemaFromView() throws Exception {
  PCollectionView<Map<String, String>> view =
      p.apply("Create schema view", Create.of(KV.of("foo", "bar"), KV.of("bar", "boo")))
          .apply(View.asMap());
  p.apply(Create.empty(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withSchemaFromView(view)
              .withTestServices(fakeBqServices)
              .withoutValidation());

  thrown.expectMessage("does not contain data for table destination dataset-id.table-id");
  p.run();
}
 
Example #7
Source File: SplittableParDo.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * @param fn the splittable {@link DoFn}.
 * @param windowingStrategy the {@link WindowingStrategy} of the input collection.
 * @param sideInputs list of side inputs that should be available to the {@link DoFn}.
 * @param mainOutputTag {@link TupleTag Tag} of the {@link DoFn DoFn's} main output.
 * @param additionalOutputTags {@link TupleTagList Tags} of the {@link DoFn DoFn's} additional
 * @param outputTagsToCoders A map from output tag to the coder for that output, which should
 *     provide mappings for the main and all additional tags.
 */
public ProcessKeyedElements(
    DoFn<InputT, OutputT> fn,
    Coder<InputT> elementCoder,
    Coder<RestrictionT> restrictionCoder,
    Coder<WatermarkEstimatorStateT> watermarkEstimatorStateCoder,
    WindowingStrategy<InputT, ?> windowingStrategy,
    List<PCollectionView<?>> sideInputs,
    TupleTag<OutputT> mainOutputTag,
    TupleTagList additionalOutputTags,
    Map<TupleTag<?>, Coder<?>> outputTagsToCoders) {
  this.fn = fn;
  this.elementCoder = elementCoder;
  this.restrictionCoder = restrictionCoder;
  this.watermarkEstimatorStateCoder = watermarkEstimatorStateCoder;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;
  this.mainOutputTag = mainOutputTag;
  this.additionalOutputTags = additionalOutputTags;
  this.outputTagsToCoders = outputTagsToCoders;
}
 
Example #8
Source File: ViewOverrideFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PTransformReplacement<PCollection<ElemT>, PCollection<ElemT>> getReplacementTransform(
    AppliedPTransform<
            PCollection<ElemT>,
            PCollection<ElemT>,
            PTransform<PCollection<ElemT>, PCollection<ElemT>>>
        transform) {

  PCollectionView<ViewT> view;
  try {
    view = CreatePCollectionViewTranslation.getView(transform);
  } catch (IOException exc) {
    throw new RuntimeException(
        String.format(
            "Could not extract %s from transform %s",
            PCollectionView.class.getSimpleName(), transform),
        exc);
  }

  return PTransformReplacement.of(
      PTransformReplacements.getSingletonMainInput(transform), new GroupAndWriteView<>(view));
}
 
Example #9
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSideInputAsList() {
  PCollectionView<List<Integer>> sideInputView =
      pipeline.apply("Create sideInput", Create.of(1, 2, 3)).apply(View.asList());
  PCollection<Integer> input =
      pipeline
          .apply("Create input", Create.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
          .apply(
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          List<Integer> sideInputValue = c.sideInput(sideInputView);
                          if (!sideInputValue.contains(c.element())) {
                            c.output(c.element());
                          }
                        }
                      })
                  .withSideInputs(sideInputView));
  PAssert.that(input).containsInAnyOrder(4, 5, 6, 7, 8, 9, 10);
  pipeline.run();
}
 
Example #10
Source File: DoFnInfo.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link DoFnInfo} for the given {@link DoFn}.
 *
 * <p>This method exists for backwards compatibility with the Dataflow runner. Once the Dataflow
 * runner has been updated to use the new constructor, remove this one.
 */
public static <InputT, OutputT> DoFnInfo<InputT, OutputT> forFn(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    Iterable<PCollectionView<?>> sideInputViews,
    Coder<InputT> inputCoder,
    TupleTag<OutputT> mainOutput,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  return new DoFnInfo<>(
      doFn,
      windowingStrategy,
      sideInputViews,
      inputCoder,
      Collections.emptyMap(),
      mainOutput,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example #11
Source File: DoFnTesterTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void fnWithSideInputDefault() throws Exception {
  PCollection<Integer> pCollection = p.apply(Create.empty(VarIntCoder.of()));
  final PCollectionView<Integer> value =
      pCollection.apply(View.<Integer>asSingleton().withDefaultValue(0));

  try (DoFnTester<Integer, Integer> tester = DoFnTester.of(new SideInputDoFn(value))) {
    tester.processElement(1);
    tester.processElement(2);
    tester.processElement(4);
    tester.processElement(8);
    assertThat(tester.peekOutputElements(), containsInAnyOrder(0, 0, 0, 0));
  }
}
 
Example #12
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void nonKeyedParDoSideInputCheckpointing() throws Exception {
  sideInputCheckpointing(
      () -> {
        Coder<WindowedValue<String>> coder =
            WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());
        TupleTag<String> outputTag = new TupleTag<>("main-output");

        ImmutableMap<Integer, PCollectionView<?>> sideInputMapping =
            ImmutableMap.<Integer, PCollectionView<?>>builder()
                .put(1, view1)
                .put(2, view2)
                .build();

        DoFnOperator<String, String> doFnOperator =
            new DoFnOperator<>(
                new IdentityDoFn<>(),
                "stepName",
                coder,
                Collections.emptyMap(),
                outputTag,
                Collections.emptyList(),
                new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
                WindowingStrategy.globalDefault(),
                sideInputMapping, /* side-input mapping */
                ImmutableList.of(view1, view2), /* side inputs */
                PipelineOptionsFactory.as(FlinkPipelineOptions.class),
                null,
                null,
                DoFnSchemaInformation.create(),
                Collections.emptyMap());

        return new TwoInputStreamOperatorTestHarness<>(doFnOperator);
      });
}
 
Example #13
Source File: ViewTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testWindowedSideInputFixedToFixedWithDefault() {

  final PCollectionView<Integer> view =
      pipeline
          .apply(
              "CreateSideInput",
              Create.timestamped(
                  TimestampedValue.of(2, new Instant(11)),
                  TimestampedValue.of(3, new Instant(13))))
          .apply("WindowSideInput", Window.into(FixedWindows.of(Duration.millis(10))))
          .apply(Sum.integersGlobally().asSingletonView());

  PCollection<String> output =
      pipeline
          .apply(
              "CreateMainInput",
              Create.timestamped(
                  TimestampedValue.of("A", new Instant(4)),
                  TimestampedValue.of("B", new Instant(15)),
                  TimestampedValue.of("C", new Instant(7))))
          .apply("WindowMainInput", Window.into(FixedWindows.of(Duration.millis(10))))
          .apply(
              "OutputMainAndSideInputs",
              ParDo.of(
                      new DoFn<String, String>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          c.output(c.element() + c.sideInput(view));
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output).containsInAnyOrder("A0", "B5", "C0");

  pipeline.run();
}
 
Example #14
Source File: ApplyDDLTransform.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Default constructor.
 * @param spannerConfig the spanner config for database.
 * @param pendingDDLStatements the list of pending DDL statements to be applied.
 * @param waitForApply wait till all the ddl statements are committed.
 */
public ApplyDDLTransform(
    SpannerConfig spannerConfig,
    PCollectionView<List<String>> pendingDDLStatements,
    ValueProvider<Boolean> waitForApply) {
  this.spannerConfig = spannerConfig;
  this.pendingDDLStatements = pendingDDLStatements;
  this.waitForApply = waitForApply;
}
 
Example #15
Source File: AppliedCombineFn.java    From beam with Apache License 2.0 5 votes vote down vote up
private AppliedCombineFn(
    GlobalCombineFn<InputT, AccumT, OutputT> fn,
    Coder<AccumT> accumulatorCoder,
    Iterable<PCollectionView<?>> sideInputViews,
    KvCoder<K, InputT> kvCoder,
    WindowingStrategy<?, ?> windowingStrategy) {
  this.fn = fn;
  this.accumulatorCoder = accumulatorCoder;
  this.sideInputViews = sideInputViews;
  this.kvCoder = kvCoder;
  this.windowingStrategy = windowingStrategy;
}
 
Example #16
Source File: ExportTransform.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
private SchemaBasedDynamicDestinations(
    PCollectionView<Map<String, SerializableSchemaSupplier>> avroSchemas,
    PCollectionView<String> uniqueIdView,
    ValueProvider<ResourceId> baseDir) {
  this.avroSchemas = avroSchemas;
  this.uniqueIdView = uniqueIdView;
  this.baseDir = baseDir;
}
 
Example #17
Source File: AbstractParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
AbstractParDoP(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<TupleTag<?>, int[]> outputCollToOrdinals,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    Map<Integer, PCollectionView<?>> ordinalToSideInput,
    String ownerId,
    String stepId) {
  this.pipelineOptions = pipelineOptions;
  this.doFn = Utils.serde(doFn);
  this.windowingStrategy = windowingStrategy;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.outputCollToOrdinals = outputCollToOrdinals;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.sideInputCoders =
      sideInputCoders.entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey,
                  e ->
                      Utils.deriveIterableValueCoder(
                          (WindowedValue.FullWindowedValueCoder) e.getValue())));
  this.outputCoders = outputCoders;
  this.inputValueCoder = inputValueCoder;
  this.outputValueCoders = outputValueCoders;
  this.ordinalToSideInput = ordinalToSideInput;
  this.ownerId = ownerId;
  this.stepId = stepId;
  this.cooperative = isCooperativenessAllowed(pipelineOptions) && hasOutput();
}
 
Example #18
Source File: HadoopFormatIO.java    From beam with Apache License 2.0 5 votes vote down vote up
SetupJobFn(
    ExternalSynchronization externalSynchronization,
    PCollectionView<Configuration> configView,
    TypeDescriptor<KV<KeyT, ValueT>> inputTypeDescriptor) {
  this.externalSynchronization = externalSynchronization;
  this.configView = configView;
  this.inputTypeDescriptor = inputTypeDescriptor;
}
 
Example #19
Source File: FlinkMergingNonShuffleReduceFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
public FlinkMergingNonShuffleReduceFunction(
    CombineFnBase.GlobalCombineFn<InputT, AccumT, OutputT> combineFn,
    WindowingStrategy<Object, W> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions) {

  this.combineFn = combineFn;

  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;

  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
}
 
Example #20
Source File: PAssert.java    From beam with Apache License 2.0 5 votes vote down vote up
private SideInputCheckerDoFn(
    SerializableFunction<ActualT, Void> checkerFn,
    PCollectionView<ActualT> actual,
    PAssertionSite site) {
  this.checkerFn = checkerFn;
  this.actual = actual;
  this.site = site;
}
 
Example #21
Source File: ParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
Processor getEx(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<TupleTag<?>, int[]> outputCollToOrdinals,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    Map<Integer, PCollectionView<?>> ordinalToSideInput,
    String ownerId,
    String stepId) {
  return new ParDoP<>(
      doFn,
      windowingStrategy,
      doFnSchemaInformation,
      outputCollToOrdinals,
      pipelineOptions,
      mainOutputTag,
      inputCoder,
      sideInputCoders,
      outputCoders,
      inputValueCoder,
      outputValueCoders,
      ordinalToSideInput,
      ownerId,
      stepId);
}
 
Example #22
Source File: WriteGroupedRecordsToFiles.java    From beam with Apache License 2.0 5 votes vote down vote up
WriteGroupedRecordsToFiles(
    PCollectionView<String> tempFilePrefix,
    long maxFileSize,
    RowWriterFactory<ElementT, DestinationT> rowWriterFactory) {
  this.tempFilePrefix = tempFilePrefix;
  this.maxFileSize = maxFileSize;
  this.rowWriterFactory = rowWriterFactory;
}
 
Example #23
Source File: ViewTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testEmptyMapSideInput() throws Exception {

  final PCollectionView<Map<String, Integer>> view =
      pipeline
          .apply(
              "CreateEmptyView", Create.empty(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
          .apply(View.asMap());

  PCollection<Integer> results =
      pipeline
          .apply("Create1", Create.of(1))
          .apply(
              "OutputSideInputs",
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          assertTrue(c.sideInput(view).isEmpty());
                          assertTrue(c.sideInput(view).entrySet().isEmpty());
                          assertFalse(c.sideInput(view).entrySet().iterator().hasNext());
                          c.output(c.element());
                        }
                      })
                  .withSideInputs(view));

  // Pass at least one value through to guarantee that DoFn executes.
  PAssert.that(results).containsInAnyOrder(1);

  pipeline.run();
}
 
Example #24
Source File: IsmSideInputReaderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testIsmReaderReferenceCaching() throws Exception {
  Coder<WindowedValue<Long>> valueCoder =
      WindowedValue.getFullCoder(VarLongCoder.of(), GLOBAL_WINDOW_CODER);
  final WindowedValue<Long> element = valueInGlobalWindow(42L);
  final PCollectionView<Long> view =
      Pipeline.create().apply(Create.empty(VarLongCoder.of())).apply(View.asSingleton());

  final Source source =
      initInputFile(
          fromValues(Arrays.asList(element)),
          IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(GLOBAL_WINDOW_CODER), valueCoder));
  final Source emptySource =
      initInputFile(
          fromValues(Arrays.asList()),
          IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(GLOBAL_WINDOW_CODER), valueCoder));

  final IsmSideInputReader reader =
      sideInputReader(view.getTagInternal().getId(), source, emptySource);

  assertTrue(reader.tagToIsmReaderMap.containsKey(view.getTagInternal()));
  assertEquals(1, reader.tagToIsmReaderMap.get(view.getTagInternal()).size());
  assertEquals(
      FileSystems.matchSingleFileSpec(getString(source.getSpec(), WorkerPropertyNames.FILENAME))
          .resourceId(),
      reader.tagToIsmReaderMap.get(view.getTagInternal()).get(0).getResourceId());
  assertTrue(reader.tagToEmptyIsmReaderMap.containsKey(view.getTagInternal()));
  assertEquals(1, reader.tagToEmptyIsmReaderMap.get(view.getTagInternal()).size());
  assertEquals(
      FileSystems.matchSingleFileSpec(
              getString(emptySource.getSpec(), WorkerPropertyNames.FILENAME))
          .resourceId(),
      reader.tagToEmptyIsmReaderMap.get(view.getTagInternal()).get(0).getResourceId());
}
 
Example #25
Source File: AppliedCombineFn.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT> create(
    GlobalCombineFn<InputT, AccumT, OutputT> fn,
    Coder<AccumT> accumulatorCoder,
    Iterable<PCollectionView<?>> sideInputViews,
    KvCoder<K, InputT> kvCoder,
    WindowingStrategy<?, ?> windowingStrategy) {
  return new AppliedCombineFn<>(fn, accumulatorCoder, sideInputViews, kvCoder, windowingStrategy);
}
 
Example #26
Source File: LocalSpannerIO.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
BatchFn(
    long maxBatchSizeBytes,
    long maxNumMutations,
    long maxNumRows,
    PCollectionView<SpannerSchema> schemaView) {
  this.maxBatchSizeBytes = maxBatchSizeBytes;
  this.maxNumMutations = maxNumMutations;
  this.maxNumRows = maxNumRows;
  this.schemaView = schemaView;
}
 
Example #27
Source File: DirectGraph.java    From beam with Apache License 2.0 5 votes vote down vote up
private DirectGraph(
    Map<PCollection<?>, AppliedPTransform<?, ?, ?>> producers,
    Map<PCollectionView<?>, AppliedPTransform<?, ?, ?>> viewWriters,
    ListMultimap<PInput, AppliedPTransform<?, ?, ?>> perElementConsumers,
    Set<AppliedPTransform<?, ?, ?>> rootTransforms,
    Map<AppliedPTransform<?, ?, ?>, String> stepNames) {
  this.producers = producers;
  this.viewWriters = viewWriters;
  this.perElementConsumers = perElementConsumers;
  this.rootTransforms = rootTransforms;
  this.stepNames = stepNames;
}
 
Example #28
Source File: PartialGroupByKeyParDoFnsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateWithCombinerAndBatchSideInputs() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();

  Coder keyCoder = StringUtf8Coder.of();
  Coder valueCoder = BigEndianIntegerCoder.of();
  KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);

  TestOutputReceiver receiver =
      new TestOutputReceiver(
          new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)),
          counterSet,
          NameContextsForTests.nameContextForTest());

  StepContext stepContext =
      BatchModeExecutionContext.forTesting(options, "testStage")
          .getStepContext(TestOperationContext.create(counterSet));

  when(mockSideInputReader.isEmpty()).thenReturn(false);

  ParDoFn pgbk =
      PartialGroupByKeyParDoFns.create(
          options,
          kvCoder,
          AppliedCombineFn.withInputCoder(
              Sum.ofIntegers(),
              CoderRegistry.createDefault(),
              kvCoder,
              ImmutableList.<PCollectionView<?>>of(),
              WindowingStrategy.globalDefault()),
          mockSideInputReader,
          receiver,
          stepContext);
  assertTrue(pgbk instanceof BatchSideInputPGBKParDoFn);
}
 
Example #29
Source File: DoFnFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
private Map<TupleTag<?>, WindowingStrategy<?, ?>> convertToTuples(
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs) {
  Map<TupleTag<?>, WindowingStrategy<?, ?>> temp = new HashMap<>();
  for (Map.Entry<PCollectionView<?>, WindowingStrategy<?, ?>> entry : sideInputs.entrySet()) {
    temp.put(entry.getKey().getTagInternal(), entry.getValue());
  }
  return temp;
}
 
Example #30
Source File: SideInputContainerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private <ValueT> Future<ValueT> getFutureOfView(
    final SideInputReader myReader,
    final PCollectionView<ValueT> view,
    final BoundedWindow window) {
  Callable<ValueT> callable = () -> myReader.get(view, window);
  return Executors.newSingleThreadExecutor().submit(callable);
}