Java Code Examples for org.apache.beam.sdk.transforms.ParDo#SingleOutput

The following examples show how to use org.apache.beam.sdk.transforms.ParDo#SingleOutput . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: RequiresStableInputParDoOverrides.java From beam with Apache License 2.0

6 votes

@Override
public PTransformReplacement<PCollection<InputT>, PCollection<OutputT>> getReplacementTransform(
    AppliedPTransform<
            PCollection<InputT>, PCollection<OutputT>, ParDo.SingleOutput<InputT, OutputT>>
        appliedTransform) {
  return PTransformReplacement.of(
      PTransformReplacements.getSingletonMainInput(appliedTransform),
      new PTransform<PCollection<InputT>, PCollection<OutputT>>() {
        @Override
        public PCollection<OutputT> expand(PCollection<InputT> input) {
          return input
              .apply("Materialize input", Reshuffle.viaRandomKey())
              .apply("ParDo with stable input", appliedTransform.getTransform());
        }
      });
}

Example 2

Source File: PTransformMatchers.java From beam with Apache License 2.0

6 votes

/**
 * A {@link PTransformMatcher} that matches a {@link ParDo.SingleOutput} containing a {@link DoFn}
 * that is splittable, as signified by {@link ProcessElementMethod#isSplittable()}.
 */
public static PTransformMatcher splittableParDoSingle() {
  return new PTransformMatcher() {
    @Override
    public boolean matches(AppliedPTransform<?, ?, ?> application) {
      PTransform<?, ?> transform = application.getTransform();
      if (transform instanceof ParDo.SingleOutput) {
        DoFn<?, ?> fn = ((ParDo.SingleOutput<?, ?>) transform).getFn();
        DoFnSignature signature = DoFnSignatures.signatureForDoFn(fn);
        return signature.processElement().isSplittable();
      }
      return false;
    }

    @Override
    public String toString() {
      return MoreObjects.toStringHelper("SplittableParDoSingleMatcher").toString();
    }
  };
}

Example 3

Source File: NexmarkUtils.java From beam with Apache License 2.0

6 votes

/** Return a transform to keep the CPU busy for given milliseconds on every record. */
public static <T> ParDo.SingleOutput<T, T> cpuDelay(String name, final long delayMs) {
  return ParDo.of(
      new DoFn<T, T>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
          long now = System.currentTimeMillis();
          long end = now + delayMs;
          while (now < end) {
            // Find plaintext which hashes to HASH in lowest MASK bits.
            // Values chosen to roughly take 1ms on typical workstation.
            long p = INIT_PLAINTEXT;
            while (true) {
              long t = Hashing.murmur3_128().hashLong(p).asLong();
              if ((t & MASK) == (HASH & MASK)) {
                break;
              }
              p++;
            }
            now = System.currentTimeMillis();
          }
          c.output(c.element());
        }
      });
}

Example 4

Source File: AnnotateImages.java From beam with Apache License 2.0

6 votes

/**
 * Applies all necessary transforms to call the Vision API. In order to group requests into
 * batches, we assign keys to the requests, as {@link GroupIntoBatches} works only on {@link KV}s.
 */
@Override
public PCollection<List<AnnotateImageResponse>> expand(PCollection<T> input) {
  ParDo.SingleOutput<T, AnnotateImageRequest> inputToRequestMapper;
  if (contextSideInput != null) {
    inputToRequestMapper =
        ParDo.of(new MapInputToRequest(contextSideInput)).withSideInputs(contextSideInput);
  } else {
    inputToRequestMapper = ParDo.of(new MapInputToRequest(null));
  }
  return input
      .apply(inputToRequestMapper)
      .apply(
          WithKeys.of(
                  (SerializableFunction<AnnotateImageRequest, Integer>)
                      ignored -> new Random().nextInt(desiredRequestParallelism))
              .withKeyType(TypeDescriptors.integers()))
      .apply(GroupIntoBatches.ofSize(batchSize))
      .apply(ParDo.of(new PerformImageAnnotation()));
}

Example 5

Source File: PrimitiveParDoSingleFactoryTest.java From beam with Apache License 2.0

6 votes

@Test
public void getReplacementTransformGetFn() {
  DoFn<Integer, Long> originalFn = new ToLongFn();
  ParDo.SingleOutput<Integer, Long> originalTransform = ParDo.of(originalFn);
  PCollection<? extends Integer> input = pipeline.apply(Create.of(1, 2, 3));
  AppliedPTransform<
          PCollection<? extends Integer>, PCollection<Long>, ParDo.SingleOutput<Integer, Long>>
      application =
          AppliedPTransform.of(
              "original",
              input.expand(),
              input.apply(originalTransform).expand(),
              originalTransform,
              pipeline);

  PTransformReplacement<PCollection<? extends Integer>, PCollection<Long>> replacementTransform =
      factory.getReplacementTransform(application);
  ParDoSingle<Integer, Long> parDoSingle =
      (ParDoSingle<Integer, Long>) replacementTransform.getTransform();

  assertThat(parDoSingle.getFn(), equalTo(originalTransform.getFn()));
  assertThat(parDoSingle.getFn(), equalTo(originalFn));
}

Example 6

Source File: NexmarkUtils.java From beam with Apache License 2.0

5 votes

/** Return a transform to log each element, passing it through unchanged. */
public static <T> ParDo.SingleOutput<T, T> log(final String name) {
  return ParDo.of(
      new DoFn<T, T>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
          LOG.info("%s: %s", name, c.element());
          c.output(c.element());
        }
      });
}

Example 7

Source File: SparkPipelineStateTest.java From beam with Apache License 2.0

5 votes

private ParDo.SingleOutput<String, String> printParDo(final String prefix) {
  return ParDo.of(
      new DoFn<String, String>() {

        @ProcessElement
        public void processElement(final ProcessContext c) {
          System.out.println(prefix + " " + c.element());
        }
      });
}

Example 8

Source File: SnowflakeIO.java From beam with Apache License 2.0

5 votes

private ParDo.SingleOutput<Object, Object> copyToTable(
    SnowflakeService snowflakeService, String stagingBucketDir) {
  return ParDo.of(
      new CopyToTableFn<>(
          getDataSourceProviderFn(),
          getTable(),
          getQuery(),
          stagingBucketDir,
          getStorageIntegrationName(),
          getWriteDisposition(),
          snowflakeService));
}

Example 9

Source File: NexmarkUtils.java From beam with Apache License 2.0

5 votes

/** Return a transform to cast each element to {@link KnownSize}. */
private static <T extends KnownSize> ParDo.SingleOutput<T, KnownSize> castToKnownSize() {
  return ParDo.of(
      new DoFn<T, KnownSize>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
          c.output(c.element());
        }
      });
}

Example 10

Source File: NexmarkUtils.java From beam with Apache License 2.0

5 votes

/** Return a transform to make explicit the timestamp of each element. */
public static <T> ParDo.SingleOutput<T, TimestampedValue<T>> stamp(String name) {
  return ParDo.of(
      new DoFn<T, TimestampedValue<T>>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
          c.output(TimestampedValue.of(c.element(), c.timestamp()));
        }
      });
}

Example 11

Source File: NexmarkUtils.java From beam with Apache License 2.0

5 votes

/** Return a transform to pass-through events, but count them as they go by. */
public static ParDo.SingleOutput<Event, Event> snoop(final String name) {
  return ParDo.of(
      new DoFn<Event, Event>() {
        final Counter eventCounter = Metrics.counter(name, "events");
        final Counter newPersonCounter = Metrics.counter(name, "newPersons");
        final Counter newAuctionCounter = Metrics.counter(name, "newAuctions");
        final Counter bidCounter = Metrics.counter(name, "bids");
        final Counter endOfStreamCounter = Metrics.counter(name, "endOfStream");

        @ProcessElement
        public void processElement(ProcessContext c) {
          eventCounter.inc();
          if (c.element().newPerson != null) {
            newPersonCounter.inc();
          } else if (c.element().newAuction != null) {
            newAuctionCounter.inc();
          } else if (c.element().bid != null) {
            bidCounter.inc();
          } else {
            endOfStreamCounter.inc();
          }
          info("%s snooping element %s", name, c.element());
          c.output(c.element());
        }
      });
}

Example 12

Source File: TalendIOTest.java From component-runtime with Apache License 2.0

5 votes

private ParDo.SingleOutput<Sample, Record> toRecord() {
    return ParDo.of(new DoFn<Sample, Record>() {

        @ProcessElement
        public void toData(final ProcessContext sample) {
            final Sample element = sample.element();
            final RecordBuilderFactory builderFactory = new AvroRecordBuilderFactoryProvider().apply(null);
            sample.output(builderFactory.newRecordBuilder().withString("data", element.getData()).build());
        }
    });
}

Example 13

Source File: StructuredStreamingPipelineStateTest.java From beam with Apache License 2.0

5 votes

private ParDo.SingleOutput<String, String> printParDo(final String prefix) {
  return ParDo.of(
      new DoFn<String, String>() {

        @ProcessElement
        public void processElement(final ProcessContext c) {
          System.out.println(prefix + " " + c.element());
        }
      });
}

Example 14

Source File: PrimitiveParDoSingleFactoryTest.java From beam with Apache License 2.0

5 votes

@Test
public void getReplacementTransformGetSideInputs() {
  PCollectionView<Long> sideLong =
      pipeline
          .apply("LongSideInputVals", Create.of(-1L, -2L, -4L))
          .apply("SideLongView", Sum.longsGlobally().asSingletonView());
  PCollectionView<List<String>> sideStrings =
      pipeline
          .apply("StringSideInputVals", Create.of("foo", "bar", "baz"))
          .apply("SideStringsView", View.asList());
  ParDo.SingleOutput<Integer, Long> originalTransform =
      ParDo.of(new ToLongFn()).withSideInputs(sideLong, sideStrings);

  PCollection<? extends Integer> input = pipeline.apply(Create.of(1, 2, 3));
  AppliedPTransform<
          PCollection<? extends Integer>, PCollection<Long>, ParDo.SingleOutput<Integer, Long>>
      application =
          AppliedPTransform.of(
              "original",
              input.expand(),
              input.apply(originalTransform).expand(),
              originalTransform,
              pipeline);

  PTransformReplacement<PCollection<? extends Integer>, PCollection<Long>> replacementTransform =
      factory.getReplacementTransform(application);
  ParDoSingle<Integer, Long> parDoSingle =
      (ParDoSingle<Integer, Long>) replacementTransform.getTransform();
  assertThat(parDoSingle.getSideInputs().values(), containsInAnyOrder(sideStrings, sideLong));
}

Example 15

Source File: Utils.java From beam with Apache License 2.0

5 votes

static List<PCollectionView<?>> getSideInputs(AppliedPTransform<?, ?, ?> appliedTransform) {
  PTransform<?, ?> transform = appliedTransform.getTransform();
  if (transform instanceof ParDo.MultiOutput) {
    ParDo.MultiOutput multiParDo = (ParDo.MultiOutput) transform;
    return (List) multiParDo.getSideInputs().values().stream().collect(Collectors.toList());
  } else if (transform instanceof ParDo.SingleOutput) {
    ParDo.SingleOutput singleParDo = (ParDo.SingleOutput) transform;
    return (List) singleParDo.getSideInputs().values().stream().collect(Collectors.toList());
  }
  return Collections.emptyList();
}

Example 16

Source File: TransformTransform.java From hop with Apache License 2.0

4 votes

@Override public PCollectionTuple expand( PCollection<HopRow> input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamHop.init( transformPluginClasses, xpPluginClasses );

    // Similar for the output : treate a TupleTag list for the target transforms...
    //
    TupleTag<HopRow> mainOutputTupleTag = new TupleTag<HopRow>( HopBeamUtil.createMainOutputTupleId( transformName ) ) {
    };
    List<TupleTag<HopRow>> targetTupleTags = new ArrayList<>();
    TupleTagList targetTupleTagList = null;
    for ( String targetStep : targetSteps ) {
      String tupleId = HopBeamUtil.createTargetTupleId( transformName, targetStep );
      TupleTag<HopRow> tupleTag = new TupleTag<HopRow>( tupleId ) {
      };
      targetTupleTags.add( tupleTag );
      if ( targetTupleTagList == null ) {
        targetTupleTagList = TupleTagList.of( tupleTag );
      } else {
        targetTupleTagList = targetTupleTagList.and( tupleTag );
      }
    }
    if ( targetTupleTagList == null ) {
      targetTupleTagList = TupleTagList.empty();
    }

    // Create a new transform function, initializes the transform
    //
    StepFn stepFn = new StepFn( variableValues, metastoreJson, transformPluginClasses, xpPluginClasses,
      transformName, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep,
      targetSteps, infoSteps, infoRowMetaJsons );

    // The actual transform functionality
    //
    ParDo.SingleOutput<HopRow, HopRow> parDoStepFn = ParDo.of( stepFn );

    // Add optional side inputs...
    //
    if ( infoCollectionViews.size() > 0 ) {
      parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews );
    }

    // Specify the main output and targeted outputs
    //
    ParDo.MultiOutput<HopRow, HopRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList );

    // Apply the multi output parallel do transform function to the main input stream
    //
    PCollectionTuple collectionTuple = input.apply( multiOutput );

    // In the tuple is everything we need to find.
    // Just make sure to retrieve the PCollections using the correct Tuple ID
    // Use HopBeamUtil.createTargetTupleId()... to make sure
    //
    return collectionTuple;
  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error transforming data in transform '" + transformName + "'", e );
    throw new RuntimeException( "Error transforming data in transform", e );
  }

}

Example 17

Source File: BatchStatefulParDoOverrides.java From beam with Apache License 2.0

4 votes

ParDo.SingleOutput<KV<K, InputT>, OutputT> getOriginalParDo() {
  return originalParDo;
}

Example 18

Source File: SplittableParDoOverrides.java From beam with Apache License 2.0

4 votes

public ParDoSingleViaMulti(
    DataflowRunner ignored, ParDo.SingleOutput<InputT, OutputT> original) {
  this.original = original;
}

Example 19

Source File: StepTransform.java From kettle-beam with Apache License 2.0

4 votes

@Override public PCollectionTuple expand( PCollection<KettleRow> input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamKettle.init( stepPluginClasses, xpPluginClasses );

    // Similar for the output : treate a TupleTag list for the target steps...
    //
    TupleTag<KettleRow> mainOutputTupleTag = new TupleTag<KettleRow>( KettleBeamUtil.createMainOutputTupleId( stepname ) ) {
    };
    List<TupleTag<KettleRow>> targetTupleTags = new ArrayList<>();
    TupleTagList targetTupleTagList = null;
    for ( String targetStep : targetSteps ) {
      String tupleId = KettleBeamUtil.createTargetTupleId( stepname, targetStep );
      TupleTag<KettleRow> tupleTag = new TupleTag<KettleRow>( tupleId ) {
      };
      targetTupleTags.add( tupleTag );
      if ( targetTupleTagList == null ) {
        targetTupleTagList = TupleTagList.of( tupleTag );
      } else {
        targetTupleTagList = targetTupleTagList.and( tupleTag );
      }
    }
    if ( targetTupleTagList == null ) {
      targetTupleTagList = TupleTagList.empty();
    }

    // Create a new step function, initializes the step
    //
    StepFn stepFn = new StepFn( variableValues, metastoreJson, stepPluginClasses, xpPluginClasses,
      stepname, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep,
      targetSteps, infoSteps, infoRowMetaJsons );

    // The actual step functionality
    //
    ParDo.SingleOutput<KettleRow, KettleRow> parDoStepFn = ParDo.of( stepFn );

    // Add optional side inputs...
    //
    if ( infoCollectionViews.size() > 0 ) {
      parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews );
    }

    // Specify the main output and targeted outputs
    //
    ParDo.MultiOutput<KettleRow, KettleRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList );

    // Apply the multi output parallel do step function to the main input stream
    //
    PCollectionTuple collectionTuple = input.apply( multiOutput );

    // In the tuple is everything we need to find.
    // Just make sure to retrieve the PCollections using the correct Tuple ID
    // Use KettleBeamUtil.createTargetTupleId()... to make sure
    //
    return collectionTuple;
  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error transforming data in step '" + stepname + "'", e );
    throw new RuntimeException( "Error transforming data in step", e );
  }

}

Example 20

Source File: TransformBatchTransform.java From hop with Apache License 2.0

4 votes

@Override public PCollectionTuple expand( PCollection<HopRow> input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamHop.init( transformPluginClasses, xpPluginClasses );

    // Similar for the output : treate a TupleTag list for the target transforms...
    //
    TupleTag<HopRow> mainOutputTupleTag = new TupleTag<HopRow>( HopBeamUtil.createMainOutputTupleId( transformName ) ) {
    };
    List<TupleTag<HopRow>> targetTupleTags = new ArrayList<>();
    TupleTagList targetTupleTagList = null;
    for ( String targetStep : targetSteps ) {
      String tupleId = HopBeamUtil.createTargetTupleId( transformName, targetStep );
      TupleTag<HopRow> tupleTag = new TupleTag<HopRow>( tupleId ) {
      };
      targetTupleTags.add( tupleTag );
      if ( targetTupleTagList == null ) {
        targetTupleTagList = TupleTagList.of( tupleTag );
      } else {
        targetTupleTagList = targetTupleTagList.and( tupleTag );
      }
    }
    if ( targetTupleTagList == null ) {
      targetTupleTagList = TupleTagList.empty();
    }

    // Create a new transform function, initializes the transform
    //
    StepBatchFn stepBatchFn = new StepBatchFn( variableValues, metastoreJson, transformPluginClasses, xpPluginClasses,
      transformName, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep,
      targetSteps, infoSteps, infoRowMetaJsons );

    // The actual transform functionality
    //
    ParDo.SingleOutput<HopRow, HopRow> parDoStepFn = ParDo.of( stepBatchFn );

    // Add optional side inputs...
    //
    if ( infoCollectionViews.size() > 0 ) {
      parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews );
    }

    // Specify the main output and targeted outputs
    //
    ParDo.MultiOutput<HopRow, HopRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList );

    // Apply the multi output parallel do transform function to the main input stream
    //
    PCollectionTuple collectionTuple = input.apply( multiOutput );

    // In the tuple is everything we need to find.
    // Just make sure to retrieve the PCollections using the correct Tuple ID
    // Use HopBeamUtil.createTargetTupleId()... to make sure
    //
    return collectionTuple;
  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error transforming data in transform '" + transformName + "'", e );
    throw new RuntimeException( "Error transforming data in transform", e );
  }

}