org.apache.beam.sdk.transforms.PTransform Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.PTransform. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BeamSideInputLookupJoinRel.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PTransform<PCollectionList<Row>, PCollection<Row>> buildPTransform() {
  // if one of the sides is Seekable & the other is non Seekable
  // then do a sideInputLookup join.
  // When doing a sideInputLookup join, the windowFn does not need to match.
  // Only support INNER JOIN & LEFT OUTER JOIN where left side of the join must be
  // non Seekable & RIGHT OUTER JOIN where right side of the join must be non Seekable
  if (joinType == JoinRelType.FULL) {
    throw new UnsupportedOperationException(
        "FULL OUTER JOIN is not supported when join "
            + "a Seekable table with a non Seekable table.");
  }

  if ((joinType == JoinRelType.LEFT && seekableInputIndex().get() == 0)
      || (joinType == JoinRelType.RIGHT && seekableInputIndex().get() == 1)) {
    throw new UnsupportedOperationException(
        String.format("%s side of an OUTER JOIN must be a non Seekable table.", joinType.name()));
  }
  return new SideInputLookupJoin();
}
 
Example #2
Source File: WriteWithShardingFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PTransformReplacement<PCollection<InputT>, WriteFilesResult<DestinationT>>
    getReplacementTransform(
        AppliedPTransform<
                PCollection<InputT>,
                WriteFilesResult<DestinationT>,
                PTransform<PCollection<InputT>, WriteFilesResult<DestinationT>>>
            transform) {
  try {
    WriteFiles<InputT, DestinationT, ?> replacement =
        WriteFiles.to(WriteFilesTranslation.getSink(transform))
            .withSideInputs(WriteFilesTranslation.getDynamicDestinationSideInputs(transform))
            .withSharding(new LogElementShardsWithDrift<>());
    if (WriteFilesTranslation.isWindowedWrites(transform)) {
      replacement = replacement.withWindowedWrites();
    }
    return PTransformReplacement.of(
        PTransformReplacements.getSingletonMainInput(transform), replacement);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #3
Source File: PTransformMatchers.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * A {@link PTransformMatcher} that matches a {@link ParDo.SingleOutput} containing a {@link DoFn}
 * that is splittable, as signified by {@link ProcessElementMethod#isSplittable()}.
 */
public static PTransformMatcher splittableProcessKeyedBounded() {
  return new PTransformMatcher() {
    @Override
    public boolean matches(AppliedPTransform<?, ?, ?> application) {
      PTransform<?, ?> transform = application.getTransform();
      if (transform instanceof SplittableParDo.ProcessKeyedElements) {
        DoFn<?, ?> fn = ((SplittableParDo.ProcessKeyedElements) transform).getFn();
        DoFnSignature signature = DoFnSignatures.signatureForDoFn(fn);
        return signature.processElement().isSplittable()
            && signature.isBoundedPerElement() == IsBounded.BOUNDED;
      }
      return false;
    }

    @Override
    public String toString() {
      return MoreObjects.toStringHelper("SplittableProcessKeyedBoundedMatcher").toString();
    }
  };
}
 
Example #4
Source File: ReadTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
public static <T> void translateReadHelper(
    Source<T> source,
    PTransform<?, ? extends PCollection<?>> transform,
    TranslationContext context) {
  try {
    StepTranslationContext stepContext = context.addStep(transform, "ParallelRead");
    stepContext.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
    stepContext.addInput(
        PropertyNames.SOURCE_STEP_INPUT,
        cloudSourceToDictionary(
            CustomSources.serializeToCloudSource(source, context.getPipelineOptions())));
    stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example #5
Source File: PTransformMatchersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Gets the {@link AppliedPTransform} that has a created {@code PCollection<KV<String, Integer>>}
 * as input.
 */
private AppliedPTransform<?, ?, ?> getAppliedTransform(PTransform pardo) {
  PCollection<KV<String, Integer>> input =
      PCollection.createPrimitiveOutputInternal(
          p,
          WindowingStrategy.globalDefault(),
          IsBounded.BOUNDED,
          KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
  input.setName("dummy input");

  PCollection<Integer> output =
      PCollection.createPrimitiveOutputInternal(
          p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of());
  output.setName("dummy output");

  return AppliedPTransform.of("pardo", input.expand(), output.expand(), pardo, p);
}
 
Example #6
Source File: PTransformTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
private static Map<Class<? extends PTransform>, TransformPayloadTranslator>
    loadTransformPayloadTranslators() {
  HashMap<Class<? extends PTransform>, TransformPayloadTranslator> translators =
      new HashMap<>();

  for (TransformPayloadTranslatorRegistrar registrar :
      ServiceLoader.load(TransformPayloadTranslatorRegistrar.class)) {

    Map<Class<? extends PTransform>, TransformPayloadTranslator> newTranslators =
        (Map) registrar.getTransformPayloadTranslators();

    Set<Class<? extends PTransform>> alreadyRegistered =
        Sets.intersection(translators.keySet(), newTranslators.keySet());

    if (!alreadyRegistered.isEmpty()) {
      throw new IllegalArgumentException(
          String.format(
              "Classes already registered: %s", Joiner.on(", ").join(alreadyRegistered)));
    }

    translators.putAll(newTranslators);
  }
  return ImmutableMap.copyOf(translators);
}
 
Example #7
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private PTransform<PBegin, PDone> outputStartTo(StaticQueue<Integer> queue) {
  return new PTransform<PBegin, PDone>() {
    @Override
    public PDone expand(PBegin input) {
      input
          .apply(Create.of(1))
          .apply(
              MapElements.into(TypeDescriptors.voids())
                  .via(
                      in -> {
                        queue.add(in);
                        return null;
                      }));
      return PDone.in(input.getPipeline());
    }
  };
}
 
Example #8
Source File: FlinkStreamingPipelineTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void visitPrimitiveTransform(TransformHierarchy.Node node) {
  LOG.info("{} visitPrimitiveTransform- {}", genSpaces(this.depth), node.getFullName());
  // get the transformation corresponding to hte node we are
  // currently visiting and translate it into its Flink alternative.

  PTransform<?, ?> transform = node.getTransform();
  StreamTransformTranslator<?> translator =
      FlinkStreamingTransformTranslators.getTranslator(transform);

  if (translator == null || !applyCanTranslate(transform, node, translator)) {
    String transformUrn = PTransformTranslation.urnForTransform(transform);
    LOG.info(transformUrn);
    throw new UnsupportedOperationException(
        "The transform " + transformUrn + " is currently not supported.");
  }
  applyStreamingTransform(transform, node, translator);
}
 
Example #9
Source File: NativeWrappedIOTest.java    From component-runtime with Apache License 2.0 6 votes vote down vote up
@Test
public void source() {
    final String plugin = COMPONENTS.getTestPlugins().iterator().next();
    final PTransform<PBegin, PCollection<JsonObject>> jdbc = PTransform.class
            .cast(COMPONENTS
                    .asManager()
                    .createComponent("beamtest", "source", ComponentManager.ComponentType.MAPPER, 1, emptyMap())
                    .orElseThrow(() -> new IllegalArgumentException("no beamtest#source component")));
    PAssert
            .that(pipeline.apply(jdbc).setCoder(JsonpJsonObjectCoder.of(plugin)))
            .satisfies((SerializableFunction<Iterable<JsonObject>, Void>) input -> {
                assertEquals("test", input.iterator().next().getString("id"));
                return null;
            });
    pipeline.run().waitUntilFinish();
}
 
Example #10
Source File: EvaluationContext.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Add output of transform to context map and possibly cache if it conforms {@link
 * #shouldCache(PTransform, PValue)}.
 *
 * @param transform from which Dataset was created
 * @param pvalue output of transform
 * @param dataset created Dataset from transform
 */
private void putDataset(
    @Nullable PTransform<?, ? extends PValue> transform, PValue pvalue, Dataset dataset) {
  try {
    dataset.setName(pvalue.getName());
  } catch (IllegalStateException e) {
    // name not set, ignore
  }
  if (shouldCache(transform, pvalue)) {
    // we cache only PCollection
    Coder<?> coder = ((PCollection<?>) pvalue).getCoder();
    Coder<? extends BoundedWindow> wCoder =
        ((PCollection<?>) pvalue).getWindowingStrategy().getWindowFn().windowCoder();
    dataset.cache(storageLevel(), WindowedValue.getFullCoder(coder, wCoder));
  }
  datasets.put(pvalue, dataset);
  leaves.add(dataset);
}
 
Example #11
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static <K, InputT, OutputT>
    SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn(
        PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform,
        Pipeline pipeline,
        KvCoder<K, InputT> kvInputCoder) {
  if (transform instanceof GroupByKey) {
    return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>)
        SystemReduceFn.buffering(kvInputCoder.getValueCoder());
  } else if (transform instanceof Combine.PerKey) {
    final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn =
        ((Combine.PerKey) transform).getFn();
    return SystemReduceFn.combining(
        kvInputCoder.getKeyCoder(),
        AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder));
  } else {
    throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey.");
  }
}
 
Example #12
Source File: Pipeline.java    From beam with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
void validate(PipelineOptions options) {
  this.traverseTopologically(new ValidateVisitor(options));
  final Collection<Map.Entry<String, Collection<PTransform<?, ?>>>> errors =
      Collections2.filter(instancePerName.asMap().entrySet(), Predicates.not(new IsUnique<>()));
  if (!errors.isEmpty()) {
    switch (options.getStableUniqueNames()) {
      case OFF:
        break;
      case WARNING:
        LOG.warn(
            "The following transforms do not have stable unique names: {}",
            Joiner.on(", ").join(transform(errors, new KeysExtractor())));
        break;
      case ERROR: // be very verbose here since it will just fail the execution
        throw new IllegalStateException(
            String.format(
                    "Pipeline update will not be possible"
                        + " because the following transforms do not have stable unique names: %s.",
                    Joiner.on(", ").join(transform(errors, new KeysExtractor())))
                + "\n\n"
                + "Conflicting instances:\n"
                + Joiner.on("\n")
                    .join(transform(errors, new UnstableNameToMessage(instancePerName)))
                + "\n\nYou can fix it adding a name when you call apply(): "
                + "pipeline.apply(<name>, <transform>).");
      default:
        throw new IllegalArgumentException(
            "Unrecognized value for stable unique names: " + options.getStableUniqueNames());
    }
  }
}
 
Example #13
Source File: FlinkStreamingTransformTranslators.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, InputT>>> transform,
    FlinkStreamingTranslationContext context) {

  DataStream<WindowedValue<KV<K, InputT>>> inputDataSet =
      context.getInputDataStream(context.getInput(transform));

  context.setOutputDataStream(context.getOutput(transform), inputDataSet.rebalance());
}
 
Example #14
Source File: AvroTable.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone buildIOWriter(PCollection<Row> input) {
  PTransform<PCollection<Row>, PCollection<GenericRecord>> writeConverter =
      GenericRecordWriteConverter.builder().beamSchema(schema).build();

  return input
      .apply("GenericRecordToRow", writeConverter)
      .apply(
          "AvroIOWrite",
          AvroIO.writeGenericRecords(AvroUtils.toAvroSchema(schema, tableName, null))
              .to(filePattern)
              .withoutSharding());
}
 
Example #15
Source File: PTransformTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Translates an {@link AppliedPTransform} by:
 *
 * <ul>
 *   <li>adding an input to the PTransform for each {@link AppliedPTransform#getInputs()}.
 *   <li>adding an output to the PTransform for each {@link AppliedPTransform#getOutputs()}.
 *   <li>adding a PCollection for each {@link AppliedPTransform#getOutputs()}.
 *   <li>adding a reference to each subtransform.
 *   <li>set the unique name.
 *   <li>set the display data.
 * </ul>
 */
static RunnerApi.PTransform.Builder translateAppliedPTransform(
    AppliedPTransform<?, ?, ?> appliedPTransform,
    List<AppliedPTransform<?, ?, ?>> subtransforms,
    SdkComponents components)
    throws IOException {
  RunnerApi.PTransform.Builder transformBuilder = RunnerApi.PTransform.newBuilder();
  for (Map.Entry<TupleTag<?>, PValue> taggedInput : appliedPTransform.getInputs().entrySet()) {
    checkArgument(
        taggedInput.getValue() instanceof PCollection,
        "Unexpected input type %s",
        taggedInput.getValue().getClass());
    transformBuilder.putInputs(
        toProto(taggedInput.getKey()),
        components.registerPCollection((PCollection<?>) taggedInput.getValue()));
  }
  for (Map.Entry<TupleTag<?>, PValue> taggedOutput : appliedPTransform.getOutputs().entrySet()) {
    // TODO: Remove gating
    if (taggedOutput.getValue() instanceof PCollection) {
      checkArgument(
          taggedOutput.getValue() instanceof PCollection,
          "Unexpected output type %s",
          taggedOutput.getValue().getClass());
      transformBuilder.putOutputs(
          toProto(taggedOutput.getKey()),
          components.registerPCollection((PCollection<?>) taggedOutput.getValue()));
    }
  }
  for (AppliedPTransform<?, ?, ?> subtransform : subtransforms) {
    transformBuilder.addSubtransforms(components.getExistingPTransformId(subtransform));
  }

  transformBuilder.setUniqueName(appliedPTransform.getFullName());
  transformBuilder.addAllDisplayData(
      DisplayDataTranslation.toProto(DisplayData.from(appliedPTransform.getTransform())));
  return transformBuilder;
}
 
Example #16
Source File: NexmarkUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Return a transform which yields a finite number of synthesized events generated on-the-fly in
 * real time.
 */
public static PTransform<PBegin, PCollection<Event>> streamEventsSource(
    NexmarkConfiguration configuration) {
  return Read.from(
      new UnboundedEventSource(
          NexmarkUtils.standardGeneratorConfig(configuration),
          configuration.numEventGenerators,
          configuration.watermarkHoldbackSec,
          configuration.isRateLimited));
}
 
Example #17
Source File: BeamSideInputJoinRel.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PTransform<PCollectionList<Row>, PCollection<Row>> buildPTransform() {
  // if one of the sides is Bounded & the other is Unbounded
  // then do a sideInput join.
  // When doing a sideInput join, the windowFn does not need to match.
  // Only support INNER JOIN & LEFT OUTER JOIN where left side of the join must be
  // the unbounded & RIGHT OUTER JOIN where right side of the join must be the unbounded
  if (joinType == JoinRelType.FULL) {
    throw new UnsupportedOperationException(
        "FULL OUTER JOIN is not supported when join "
            + "a bounded table with an unbounded table.");
  }

  BeamRelNode leftRelNode = BeamSqlRelUtils.getBeamRelInput(left);
  BeamRelNode rightRelNode = BeamSqlRelUtils.getBeamRelInput(right);

  if ((joinType == JoinRelType.LEFT && leftRelNode.isBounded() == PCollection.IsBounded.BOUNDED)
      || (joinType == JoinRelType.RIGHT
          && rightRelNode.isBounded() == PCollection.IsBounded.BOUNDED)) {
    throw new UnsupportedOperationException(
        String.format("%s side of an OUTER JOIN must be Unbounded table.", joinType.name()));
  }
  if (leftRelNode.isBounded() == IsBounded.UNBOUNDED
      && rightRelNode.isBounded() == IsBounded.UNBOUNDED) {
    throw new UnsupportedOperationException(
        "Side input join can only be used if one table is bounded.");
  }
  return new SideInputJoin();
}
 
Example #18
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translate(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform,
    TransformHierarchy.Node node,
    TranslationContext ctx) {
  doTranslate(transform, node, ctx);
}
 
Example #19
Source File: ExpansionService.java    From beam with Apache License 2.0 5 votes vote down vote up
private static PTransform buildTransform(
    Class<? extends ExternalTransformBuilder> builderClass, Object configObject)
    throws Exception {
  Constructor<? extends ExternalTransformBuilder> constructor =
      builderClass.getDeclaredConstructor();
  constructor.setAccessible(true);
  ExternalTransformBuilder<?, ?, ?> externalTransformBuilder = constructor.newInstance();
  Method buildMethod = builderClass.getMethod("buildExternal", configObject.getClass());
  buildMethod.setAccessible(true);
  return (PTransform) buildMethod.invoke(externalTransformBuilder, configObject);
}
 
Example #20
Source File: KeyedPValueTrackingVisitor.java    From beam with Apache License 2.0 5 votes vote down vote up
private static boolean isKeyPreserving(PTransform<?, ?> transform) {
  // This is a hacky check for what is considered key-preserving to the direct runner.
  // The most obvious alternative would be a package-private marker interface, but
  // better to make this obviously hacky so it is less likely to proliferate. Meanwhile
  // we intend to allow explicit expression of key-preserving DoFn in the model.
  if (transform instanceof ParDo.MultiOutput) {
    ParDo.MultiOutput<?, ?> parDo = (ParDo.MultiOutput<?, ?>) transform;
    return parDo.getFn() instanceof ParDoMultiOverrideFactory.ToKeyedWorkItem;
  } else {
    return false;
  }
}
 
Example #21
Source File: BeamAggregationRel.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PTransform<PCollectionList<Row>, PCollection<Row>> buildPTransform() {
  Schema outputSchema = CalciteUtils.toSchema(getRowType());
  List<FieldAggregation> aggregationAdapters =
      getNamedAggCalls().stream()
          .map(aggCall -> new FieldAggregation(aggCall.getKey(), aggCall.getValue()))
          .collect(toList());

  return new Transform(
      windowFn, windowFieldIndex, getGroupSet(), aggregationAdapters, outputSchema);
}
 
Example #22
Source File: Transforms.java    From nomulus with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@link PTransform} from file name patterns to file {@link Metadata Metadata records}.
 */
public static PTransform<PCollection<String>, PCollection<Metadata>> getFilesByPatterns() {
  return new PTransform<PCollection<String>, PCollection<Metadata>>() {
    @Override
    public PCollection<Metadata> expand(PCollection<String> input) {
      return input.apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW));
    }
  };
}
 
Example #23
Source File: WriteWithShardingFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void withNoShardingSpecifiedReturnsNewTransform() {
  ResourceId outputDirectory = LocalResources.fromString("/foo", true /* isDirectory */);

  PTransform<PCollection<Object>, WriteFilesResult<Void>> original =
      WriteFiles.to(
          new FileBasedSink<Object, Void, Object>(
              StaticValueProvider.of(outputDirectory),
              DynamicFileDestinations.constant(new FakeFilenamePolicy())) {
            @Override
            public WriteOperation<Void, Object> createWriteOperation() {
              throw new IllegalArgumentException("Should not be used");
            }
          });
  @SuppressWarnings("unchecked")
  PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));

  AppliedPTransform<
          PCollection<Object>,
          WriteFilesResult<Void>,
          PTransform<PCollection<Object>, WriteFilesResult<Void>>>
      originalApplication =
          AppliedPTransform.of("write", objs.expand(), Collections.emptyMap(), original, p);

  assertThat(
      factory.getReplacementTransform(originalApplication).getTransform(),
      not(equalTo((Object) original)));
}
 
Example #24
Source File: AutoKVWrapper.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
public static PTransform<PCollection<Record>, PCollection<KV<String, Record>>> of(final String plugin,
        final Function<GroupKeyProvider.GroupContext, String> idGenerator, final String component,
        final String branch) {

    return new RecordParDoTransformCoderProvider<>(KvCoder.of(StringUtf8Coder.of(), SchemaRegistryCoder.of()),
            new AutoKVWrapper(idGenerator, component, branch));
}
 
Example #25
Source File: PipelineRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesCommittedMetrics.class, UsesCounterMetrics.class})
public void testRunPTransform() {
  final String namespace = PipelineRunnerTest.class.getName();
  final Counter counter = Metrics.counter(namespace, "count");
  final PipelineResult result =
      PipelineRunner.fromOptions(p.getOptions())
          .run(
              new PTransform<PBegin, POutput>() {
                @Override
                public POutput expand(PBegin input) {
                  PCollection<Double> output =
                      input
                          .apply(Create.of(1, 2, 3, 4))
                          .apply("ScaleByTwo", MapElements.via(new ScaleFn<>(2.0, counter)));
                  PAssert.that(output).containsInAnyOrder(2.0, 4.0, 6.0, 8.0);
                  return output;
                }
              });

  // Checking counters to verify the pipeline actually ran.
  assertThat(
      result
          .metrics()
          .queryMetrics(
              MetricsFilter.builder()
                  .addNameFilter(MetricNameFilter.inNamespace(namespace))
                  .build())
          .getCounters(),
      hasItem(metricsResult(namespace, "count", "ScaleByTwo", 4L, true)));
}
 
Example #26
Source File: RecordBranchMapper.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
public static PTransform<PCollection<Record>, PCollection<Record>> of(final String plugin, final String fromBranch,
        final String toBranch) {
    final RecordBuilderFactory lookup =
            ServiceLookup.lookup(ComponentManager.instance(), plugin, RecordBuilderFactory.class);
    return new RecordParDoTransformCoderProvider<>(SchemaRegistryCoder.of(),
            new RecordBranchMapper(lookup, fromBranch, toBranch));
}
 
Example #27
Source File: QueueMapper.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
public QueueMapper(final String plugin, final String family, final String name,
        final PTransform<PBegin, PCollection<Record>> transform) {
    this.plugin = plugin;
    this.family = family;
    this.name = name;
    this.transform = transform;
    this.state = LoopState.newTracker(plugin);
    log.debug("Associating state {} to {}#{}", this.state.getId(), family, name);
}
 
Example #28
Source File: ReadTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <T> ReadPayload getReadPayload(
    AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> transform)
    throws IOException {
  SdkComponents components = SdkComponents.create(transform.getPipeline().getOptions());
  return ReadPayload.parseFrom(
      PTransformTranslation.toProto(transform, Collections.emptyList(), components)
          .getSpec()
          .getPayload());
}
 
Example #29
Source File: GroupByKeyTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> transform,
    TranslationContext context) {

  @SuppressWarnings("unchecked")
  final PCollection<KV<K, V>> inputPCollection = (PCollection<KV<K, V>>) context.getInput();
  Dataset<WindowedValue<KV<K, V>>> input = context.getDataset(inputPCollection);
  WindowingStrategy<?, ?> windowingStrategy = inputPCollection.getWindowingStrategy();
  KvCoder<K, V> kvCoder = (KvCoder<K, V>) inputPCollection.getCoder();
  Coder<V> valueCoder = kvCoder.getValueCoder();

  // group by key only
  Coder<K> keyCoder = kvCoder.getKeyCoder();
  KeyValueGroupedDataset<K, WindowedValue<KV<K, V>>> groupByKeyOnly =
      input.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder));

  // group also by windows
  WindowedValue.FullWindowedValueCoder<KV<K, Iterable<V>>> outputCoder =
      WindowedValue.FullWindowedValueCoder.of(
          KvCoder.of(keyCoder, IterableCoder.of(valueCoder)),
          windowingStrategy.getWindowFn().windowCoder());
  Dataset<WindowedValue<KV<K, Iterable<V>>>> output =
      groupByKeyOnly.flatMapGroups(
          new GroupAlsoByWindowViaOutputBufferFn<>(
              windowingStrategy,
              new InMemoryStateInternalsFactory<>(),
              SystemReduceFn.buffering(valueCoder),
              context.getSerializableOptions()),
          EncoderHelpers.fromBeamCoder(outputCoder));

  context.putDataset(context.getOutput(), output);
}
 
Example #30
Source File: ParDoMultiOverrideFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private PTransform<PCollection<? extends InputT>, PCollectionTuple> getReplacementForApplication(
    AppliedPTransform<
            PCollection<? extends InputT>,
            PCollectionTuple,
            PTransform<PCollection<? extends InputT>, PCollectionTuple>>
        application)
    throws IOException {

  DoFn<InputT, OutputT> fn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(application);

  DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());

  if (signature.processElement().isSplittable()) {
    return SplittableParDo.forAppliedParDo((AppliedPTransform) application);
  } else if (signature.stateDeclarations().size() > 0
      || signature.timerDeclarations().size() > 0
      || signature.timerFamilyDeclarations().size() > 0) {
    return new GbkThenStatefulParDo(
        fn,
        ParDoTranslation.getMainOutputTag(application),
        ParDoTranslation.getAdditionalOutputTags(application),
        ParDoTranslation.getSideInputs(application),
        ParDoTranslation.getSchemaInformation(application),
        ParDoTranslation.getSideInputMapping(application));
  } else {
    return application.getTransform();
  }
}