Java Code Examples for org.apache.beam.sdk.values.PCollectionView

The following examples show how to use org.apache.beam.sdk.values.PCollectionView. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: ParDoTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSideInputAsList() {
  PCollectionView<List<Integer>> sideInputView =
      pipeline.apply("Create sideInput", Create.of(1, 2, 3)).apply(View.asList());
  PCollection<Integer> input =
      pipeline
          .apply("Create input", Create.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
          .apply(
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          List<Integer> sideInputValue = c.sideInput(sideInputView);
                          if (!sideInputValue.contains(c.element())) {
                            c.output(c.element());
                          }
                        }
                      })
                  .withSideInputs(sideInputView));
  PAssert.that(input).containsInAnyOrder(4, 5, 6, 7, 8, 9, 10);
  pipeline.run();
}
 
Example 2
Source Project: beam   Source File: BigQueryIOWriteTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteWithMissingSchemaFromView() throws Exception {
  PCollectionView<Map<String, String>> view =
      p.apply("Create schema view", Create.of(KV.of("foo", "bar"), KV.of("bar", "boo")))
          .apply(View.asMap());
  p.apply(Create.empty(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withSchemaFromView(view)
              .withTestServices(fakeBqServices)
              .withoutValidation());

  thrown.expectMessage("does not contain data for table destination dataset-id.table-id");
  p.run();
}
 
Example 3
Source Project: beam   Source File: BroadcastHashJoinTranslator.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates new {@link PCollectionView} of given {@code pCollectionToView} iff there is no {@link
 * PCollectionView} already associated with {@code Key}.
 *
 * @param pCollectionToView a {@link PCollection} view will be created from by applying {@link
 *     View#asMultimap()}
 * @param <V> value key type
 * @return the current (already existing or computed) value associated with the specified key
 */
private <V> PCollectionView<Map<KeyT, Iterable<V>>> computeViewAsMultimapIfAbsent(
    PCollection<V> pcollection,
    UnaryFunction<?, KeyT> keyExtractor,
    final PCollection<KV<KeyT, V>> pCollectionToView) {

  PCollectionView<?> view = pViews.get(pcollection, keyExtractor);
  if (view == null) {
    view = pCollectionToView.apply(View.asMultimap());
    pViews.put(pcollection, keyExtractor, view);
  }

  @SuppressWarnings("unchecked")
  PCollectionView<Map<KeyT, Iterable<V>>> ret = (PCollectionView<Map<KeyT, Iterable<V>>>) view;
  return ret;
}
 
Example 4
Source Project: beam   Source File: WriteFiles.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<FileResult<DestinationT>> expand(PCollection<UserT> input) {
  List<PCollectionView<?>> shardingSideInputs = Lists.newArrayList(getSideInputs());
  if (numShardsView != null) {
    shardingSideInputs.add(numShardsView);
  }

  ShardingFunction<UserT, DestinationT> shardingFunction =
      getShardingFunction() == null
          ? new RandomShardingFunction(destinationCoder)
          : getShardingFunction();

  return input
      .apply(
          "ApplyShardingKey",
          ParDo.of(new ApplyShardingFunctionFn(shardingFunction, numShardsView))
              .withSideInputs(shardingSideInputs))
      .setCoder(KvCoder.of(ShardedKeyCoder.of(VarIntCoder.of()), input.getCoder()))
      .apply("GroupIntoShards", GroupByKey.create())
      .apply(
          "WriteShardsIntoTempFiles",
          ParDo.of(new WriteShardsIntoTempFilesFn()).withSideInputs(getSideInputs()))
      .setCoder(fileResultCoder);
}
 
Example 5
Source Project: beam   Source File: ViewOverrideFactory.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PTransformReplacement<PCollection<ElemT>, PCollection<ElemT>> getReplacementTransform(
    AppliedPTransform<
            PCollection<ElemT>,
            PCollection<ElemT>,
            PTransform<PCollection<ElemT>, PCollection<ElemT>>>
        transform) {

  PCollectionView<ViewT> view;
  try {
    view = CreatePCollectionViewTranslation.getView(transform);
  } catch (IOException exc) {
    throw new RuntimeException(
        String.format(
            "Could not extract %s from transform %s",
            PCollectionView.class.getSimpleName(), transform),
        exc);
  }

  return PTransformReplacement.of(
      PTransformReplacements.getSingletonMainInput(transform), new GroupAndWriteView<>(view));
}
 
Example 6
Source Project: beam   Source File: DoFnInfo.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link DoFnInfo} for the given {@link DoFn}.
 *
 * <p>This method exists for backwards compatibility with the Dataflow runner. Once the Dataflow
 * runner has been updated to use the new constructor, remove this one.
 */
public static <InputT, OutputT> DoFnInfo<InputT, OutputT> forFn(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    Iterable<PCollectionView<?>> sideInputViews,
    Coder<InputT> inputCoder,
    TupleTag<OutputT> mainOutput,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  return new DoFnInfo<>(
      doFn,
      windowingStrategy,
      sideInputViews,
      inputCoder,
      Collections.emptyMap(),
      mainOutput,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example 7
Source Project: beam   Source File: HadoopFormatIO.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PDone expand(PCollection<KV<KeyT, ValueT>> input) {

  // streamed pipeline must have defined configuration transformation
  if (input.isBounded().equals(PCollection.IsBounded.UNBOUNDED)
      || !input.getWindowingStrategy().equals(WindowingStrategy.globalDefault())) {
    checkArgument(
        configTransform != null,
        "Writing of unbounded data can be processed only with configuration transformation provider. See %s.withConfigurationTransform()",
        Write.class);
  }

  verifyInputWindowing(input);

  TypeDescriptor<Configuration> configType = new TypeDescriptor<Configuration>() {};
  input
      .getPipeline()
      .getCoderRegistry()
      .registerCoderForType(configType, new ConfigurationCoder());

  PCollectionView<Configuration> configView = createConfigurationView(input);

  return processJob(input, configView);
}
 
Example 8
Source Project: beam   Source File: SplittableParDo.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * @param fn the splittable {@link DoFn}.
 * @param windowingStrategy the {@link WindowingStrategy} of the input collection.
 * @param sideInputs list of side inputs that should be available to the {@link DoFn}.
 * @param mainOutputTag {@link TupleTag Tag} of the {@link DoFn DoFn's} main output.
 * @param additionalOutputTags {@link TupleTagList Tags} of the {@link DoFn DoFn's} additional
 * @param outputTagsToCoders A map from output tag to the coder for that output, which should
 *     provide mappings for the main and all additional tags.
 */
public ProcessKeyedElements(
    DoFn<InputT, OutputT> fn,
    Coder<InputT> elementCoder,
    Coder<RestrictionT> restrictionCoder,
    Coder<WatermarkEstimatorStateT> watermarkEstimatorStateCoder,
    WindowingStrategy<InputT, ?> windowingStrategy,
    List<PCollectionView<?>> sideInputs,
    TupleTag<OutputT> mainOutputTag,
    TupleTagList additionalOutputTags,
    Map<TupleTag<?>, Coder<?>> outputTagsToCoders) {
  this.fn = fn;
  this.elementCoder = elementCoder;
  this.restrictionCoder = restrictionCoder;
  this.watermarkEstimatorStateCoder = watermarkEstimatorStateCoder;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;
  this.mainOutputTag = mainOutputTag;
  this.additionalOutputTags = additionalOutputTags;
  this.outputTagsToCoders = outputTagsToCoders;
}
 
Example 9
Source Project: beam   Source File: CoGroup.java    License: Apache License 2.0 6 votes vote down vote up
private JoinInformation(
    KeyedPCollectionTuple<Row> keyedPCollectionTuple,
    Map<String, PCollectionView<Map<Row, Iterable<Row>>>> sideInputs,
    Schema keySchema,
    Map<String, Schema> componentSchemas,
    Map<Integer, SerializableFunction<Object, Row>> toRows,
    List<String> sortedTags,
    Map<Integer, String> tagToKeyedTag) {
  this.keyedPCollectionTuple = keyedPCollectionTuple;
  this.sideInputs = sideInputs;
  this.keySchema = keySchema;
  this.componentSchemas = componentSchemas;
  this.toRows = toRows;
  this.sortedTags = sortedTags;
  this.tagToKeyedTag = tagToKeyedTag;
}
 
Example 10
Source Project: beam   Source File: FlinkDoFnFunction.java    License: Apache License 2.0 6 votes vote down vote up
public FlinkDoFnFunction(
    DoFn<InputT, OutputT> doFn,
    String stepName,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions options,
    Map<TupleTag<?>, Integer> outputMap,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {

  this.doFn = doFn;
  this.stepName = stepName;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(options);
  this.windowingStrategy = windowingStrategy;
  this.outputMap = outputMap;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.outputCoderMap = outputCoderMap;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example 11
Source Project: beam   Source File: AppliedCombineFn.java    License: Apache License 2.0 5 votes vote down vote up
private AppliedCombineFn(
    GlobalCombineFn<InputT, AccumT, OutputT> fn,
    Coder<AccumT> accumulatorCoder,
    Iterable<PCollectionView<?>> sideInputViews,
    KvCoder<K, InputT> kvCoder,
    WindowingStrategy<?, ?> windowingStrategy) {
  this.fn = fn;
  this.accumulatorCoder = accumulatorCoder;
  this.sideInputViews = sideInputViews;
  this.kvCoder = kvCoder;
  this.windowingStrategy = windowingStrategy;
}
 
Example 12
Source Project: beam   Source File: DoFnTesterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void fnWithSideInputDefault() throws Exception {
  PCollection<Integer> pCollection = p.apply(Create.empty(VarIntCoder.of()));
  final PCollectionView<Integer> value =
      pCollection.apply(View.<Integer>asSingleton().withDefaultValue(0));

  try (DoFnTester<Integer, Integer> tester = DoFnTester.of(new SideInputDoFn(value))) {
    tester.processElement(1);
    tester.processElement(2);
    tester.processElement(4);
    tester.processElement(8);
    assertThat(tester.peekOutputElements(), containsInAnyOrder(0, 0, 0, 0));
  }
}
 
Example 13
Source Project: beam   Source File: DoFnOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void nonKeyedParDoSideInputCheckpointing() throws Exception {
  sideInputCheckpointing(
      () -> {
        Coder<WindowedValue<String>> coder =
            WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());
        TupleTag<String> outputTag = new TupleTag<>("main-output");

        ImmutableMap<Integer, PCollectionView<?>> sideInputMapping =
            ImmutableMap.<Integer, PCollectionView<?>>builder()
                .put(1, view1)
                .put(2, view2)
                .build();

        DoFnOperator<String, String> doFnOperator =
            new DoFnOperator<>(
                new IdentityDoFn<>(),
                "stepName",
                coder,
                Collections.emptyMap(),
                outputTag,
                Collections.emptyList(),
                new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
                WindowingStrategy.globalDefault(),
                sideInputMapping, /* side-input mapping */
                ImmutableList.of(view1, view2), /* side inputs */
                PipelineOptionsFactory.as(FlinkPipelineOptions.class),
                null,
                null,
                DoFnSchemaInformation.create(),
                Collections.emptyMap());

        return new TwoInputStreamOperatorTestHarness<>(doFnOperator);
      });
}
 
Example 14
Source Project: beam   Source File: HadoopFormatIO.java    License: Apache License 2.0 5 votes vote down vote up
SetupJobFn(
    ExternalSynchronization externalSynchronization,
    PCollectionView<Configuration> configView,
    TypeDescriptor<KV<KeyT, ValueT>> inputTypeDescriptor) {
  this.externalSynchronization = externalSynchronization;
  this.configView = configView;
  this.inputTypeDescriptor = inputTypeDescriptor;
}
 
Example 15
Source Project: beam   Source File: PAssert.java    License: Apache License 2.0 5 votes vote down vote up
private SideInputCheckerDoFn(
    SerializableFunction<ActualT, Void> checkerFn,
    PCollectionView<ActualT> actual,
    PAssertionSite site) {
  this.checkerFn = checkerFn;
  this.actual = actual;
  this.site = site;
}
 
Example 16
Source Project: beam   Source File: ParDoP.java    License: Apache License 2.0 5 votes vote down vote up
@Override
Processor getEx(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<TupleTag<?>, int[]> outputCollToOrdinals,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    Map<Integer, PCollectionView<?>> ordinalToSideInput,
    String ownerId,
    String stepId) {
  return new ParDoP<>(
      doFn,
      windowingStrategy,
      doFnSchemaInformation,
      outputCollToOrdinals,
      pipelineOptions,
      mainOutputTag,
      inputCoder,
      sideInputCoders,
      outputCoders,
      inputValueCoder,
      outputValueCoders,
      ordinalToSideInput,
      ownerId,
      stepId);
}
 
Example 17
Source Project: beam   Source File: ViewTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testEmptyMapSideInput() throws Exception {

  final PCollectionView<Map<String, Integer>> view =
      pipeline
          .apply(
              "CreateEmptyView", Create.empty(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
          .apply(View.asMap());

  PCollection<Integer> results =
      pipeline
          .apply("Create1", Create.of(1))
          .apply(
              "OutputSideInputs",
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          assertTrue(c.sideInput(view).isEmpty());
                          assertTrue(c.sideInput(view).entrySet().isEmpty());
                          assertFalse(c.sideInput(view).entrySet().iterator().hasNext());
                          c.output(c.element());
                        }
                      })
                  .withSideInputs(view));

  // Pass at least one value through to guarantee that DoFn executes.
  PAssert.that(results).containsInAnyOrder(1);

  pipeline.run();
}
 
Example 18
Source Project: beam   Source File: IsmSideInputReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIsmReaderReferenceCaching() throws Exception {
  Coder<WindowedValue<Long>> valueCoder =
      WindowedValue.getFullCoder(VarLongCoder.of(), GLOBAL_WINDOW_CODER);
  final WindowedValue<Long> element = valueInGlobalWindow(42L);
  final PCollectionView<Long> view =
      Pipeline.create().apply(Create.empty(VarLongCoder.of())).apply(View.asSingleton());

  final Source source =
      initInputFile(
          fromValues(Arrays.asList(element)),
          IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(GLOBAL_WINDOW_CODER), valueCoder));
  final Source emptySource =
      initInputFile(
          fromValues(Arrays.asList()),
          IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(GLOBAL_WINDOW_CODER), valueCoder));

  final IsmSideInputReader reader =
      sideInputReader(view.getTagInternal().getId(), source, emptySource);

  assertTrue(reader.tagToIsmReaderMap.containsKey(view.getTagInternal()));
  assertEquals(1, reader.tagToIsmReaderMap.get(view.getTagInternal()).size());
  assertEquals(
      FileSystems.matchSingleFileSpec(getString(source.getSpec(), WorkerPropertyNames.FILENAME))
          .resourceId(),
      reader.tagToIsmReaderMap.get(view.getTagInternal()).get(0).getResourceId());
  assertTrue(reader.tagToEmptyIsmReaderMap.containsKey(view.getTagInternal()));
  assertEquals(1, reader.tagToEmptyIsmReaderMap.get(view.getTagInternal()).size());
  assertEquals(
      FileSystems.matchSingleFileSpec(
              getString(emptySource.getSpec(), WorkerPropertyNames.FILENAME))
          .resourceId(),
      reader.tagToEmptyIsmReaderMap.get(view.getTagInternal()).get(0).getResourceId());
}
 
Example 19
Source Project: beam   Source File: DirectGraph.java    License: Apache License 2.0 5 votes vote down vote up
private DirectGraph(
    Map<PCollection<?>, AppliedPTransform<?, ?, ?>> producers,
    Map<PCollectionView<?>, AppliedPTransform<?, ?, ?>> viewWriters,
    ListMultimap<PInput, AppliedPTransform<?, ?, ?>> perElementConsumers,
    Set<AppliedPTransform<?, ?, ?>> rootTransforms,
    Map<AppliedPTransform<?, ?, ?>, String> stepNames) {
  this.producers = producers;
  this.viewWriters = viewWriters;
  this.perElementConsumers = perElementConsumers;
  this.rootTransforms = rootTransforms;
  this.stepNames = stepNames;
}
 
Example 20
Source Project: beam   Source File: PartialGroupByKeyParDoFnsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateWithCombinerAndBatchSideInputs() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();

  Coder keyCoder = StringUtf8Coder.of();
  Coder valueCoder = BigEndianIntegerCoder.of();
  KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);

  TestOutputReceiver receiver =
      new TestOutputReceiver(
          new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)),
          counterSet,
          NameContextsForTests.nameContextForTest());

  StepContext stepContext =
      BatchModeExecutionContext.forTesting(options, "testStage")
          .getStepContext(TestOperationContext.create(counterSet));

  when(mockSideInputReader.isEmpty()).thenReturn(false);

  ParDoFn pgbk =
      PartialGroupByKeyParDoFns.create(
          options,
          kvCoder,
          AppliedCombineFn.withInputCoder(
              Sum.ofIntegers(),
              CoderRegistry.createDefault(),
              kvCoder,
              ImmutableList.<PCollectionView<?>>of(),
              WindowingStrategy.globalDefault()),
          mockSideInputReader,
          receiver,
          stepContext);
  assertTrue(pgbk instanceof BatchSideInputPGBKParDoFn);
}
 
Example 21
Source Project: beam   Source File: SideInputContainerTest.java    License: Apache License 2.0 5 votes vote down vote up
private <ValueT> Future<ValueT> getFutureOfView(
    final SideInputReader myReader,
    final PCollectionView<ValueT> view,
    final BoundedWindow window) {
  Callable<ValueT> callable = () -> myReader.get(view, window);
  return Executors.newSingleThreadExecutor().submit(callable);
}
 
Example 22
Source Project: beam   Source File: DoFnFunction.java    License: Apache License 2.0 5 votes vote down vote up
private Map<TupleTag<?>, WindowingStrategy<?, ?>> convertToTuples(
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs) {
  Map<TupleTag<?>, WindowingStrategy<?, ?>> temp = new HashMap<>();
  for (Map.Entry<PCollectionView<?>, WindowingStrategy<?, ?>> entry : sideInputs.entrySet()) {
    temp.put(entry.getKey().getTagInternal(), entry.getValue());
  }
  return temp;
}
 
Example 23
Source Project: DataflowTemplates   Source File: LocalSpannerIO.java    License: Apache License 2.0 5 votes vote down vote up
BatchFn(
    long maxBatchSizeBytes,
    long maxNumMutations,
    long maxNumRows,
    PCollectionView<SpannerSchema> schemaView) {
  this.maxBatchSizeBytes = maxBatchSizeBytes;
  this.maxNumMutations = maxNumMutations;
  this.maxNumRows = maxNumRows;
  this.schemaView = schemaView;
}
 
Example 24
Source Project: beam   Source File: AppliedCombineFn.java    License: Apache License 2.0 5 votes vote down vote up
private static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT> create(
    GlobalCombineFn<InputT, AccumT, OutputT> fn,
    Coder<AccumT> accumulatorCoder,
    Iterable<PCollectionView<?>> sideInputViews,
    KvCoder<K, InputT> kvCoder,
    WindowingStrategy<?, ?> windowingStrategy) {
  return new AppliedCombineFn<>(fn, accumulatorCoder, sideInputViews, kvCoder, windowingStrategy);
}
 
Example 25
Source Project: beam   Source File: WriteGroupedRecordsToFiles.java    License: Apache License 2.0 5 votes vote down vote up
WriteGroupedRecordsToFiles(
    PCollectionView<String> tempFilePrefix,
    long maxFileSize,
    RowWriterFactory<ElementT, DestinationT> rowWriterFactory) {
  this.tempFilePrefix = tempFilePrefix;
  this.maxFileSize = maxFileSize;
  this.rowWriterFactory = rowWriterFactory;
}
 
Example 26
Source Project: DataflowTemplates   Source File: ExportTransform.java    License: Apache License 2.0 5 votes vote down vote up
private SchemaBasedDynamicDestinations(
    PCollectionView<Map<String, SerializableSchemaSupplier>> avroSchemas,
    PCollectionView<String> uniqueIdView,
    ValueProvider<ResourceId> baseDir) {
  this.avroSchemas = avroSchemas;
  this.uniqueIdView = uniqueIdView;
  this.baseDir = baseDir;
}
 
Example 27
Source Project: beam   Source File: ViewTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testWindowedSideInputFixedToFixedWithDefault() {

  final PCollectionView<Integer> view =
      pipeline
          .apply(
              "CreateSideInput",
              Create.timestamped(
                  TimestampedValue.of(2, new Instant(11)),
                  TimestampedValue.of(3, new Instant(13))))
          .apply("WindowSideInput", Window.into(FixedWindows.of(Duration.millis(10))))
          .apply(Sum.integersGlobally().asSingletonView());

  PCollection<String> output =
      pipeline
          .apply(
              "CreateMainInput",
              Create.timestamped(
                  TimestampedValue.of("A", new Instant(4)),
                  TimestampedValue.of("B", new Instant(15)),
                  TimestampedValue.of("C", new Instant(7))))
          .apply("WindowMainInput", Window.into(FixedWindows.of(Duration.millis(10))))
          .apply(
              "OutputMainAndSideInputs",
              ParDo.of(
                      new DoFn<String, String>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          c.output(c.element() + c.sideInput(view));
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output).containsInAnyOrder("A0", "B5", "C0");

  pipeline.run();
}
 
Example 28
Source Project: DataflowTemplates   Source File: ApplyDDLTransform.java    License: Apache License 2.0 5 votes vote down vote up
/** Default constructor.
 * @param spannerConfig the spanner config for database.
 * @param pendingDDLStatements the list of pending DDL statements to be applied.
 * @param waitForApply wait till all the ddl statements are committed.
 */
public ApplyDDLTransform(
    SpannerConfig spannerConfig,
    PCollectionView<List<String>> pendingDDLStatements,
    ValueProvider<Boolean> waitForApply) {
  this.spannerConfig = spannerConfig;
  this.pendingDDLStatements = pendingDDLStatements;
  this.waitForApply = waitForApply;
}
 
Example 29
Source Project: beam   Source File: AbstractParDoP.java    License: Apache License 2.0 5 votes vote down vote up
AbstractParDoP(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<TupleTag<?>, int[]> outputCollToOrdinals,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    Map<Integer, PCollectionView<?>> ordinalToSideInput,
    String ownerId,
    String stepId) {
  this.pipelineOptions = pipelineOptions;
  this.doFn = Utils.serde(doFn);
  this.windowingStrategy = windowingStrategy;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.outputCollToOrdinals = outputCollToOrdinals;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.sideInputCoders =
      sideInputCoders.entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey,
                  e ->
                      Utils.deriveIterableValueCoder(
                          (WindowedValue.FullWindowedValueCoder) e.getValue())));
  this.outputCoders = outputCoders;
  this.inputValueCoder = inputValueCoder;
  this.outputValueCoders = outputValueCoders;
  this.ordinalToSideInput = ordinalToSideInput;
  this.ownerId = ownerId;
  this.stepId = stepId;
  this.cooperative = isCooperativenessAllowed(pipelineOptions) && hasOutput();
}
 
Example 30
Source Project: beam   Source File: FlinkMergingNonShuffleReduceFunction.java    License: Apache License 2.0 5 votes vote down vote up
public FlinkMergingNonShuffleReduceFunction(
    CombineFnBase.GlobalCombineFn<InputT, AccumT, OutputT> combineFn,
    WindowingStrategy<Object, W> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions) {

  this.combineFn = combineFn;

  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;

  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
}