Java Code Examples for org.apache.beam.sdk.transforms.ParDo#of()

The following examples show how to use org.apache.beam.sdk.transforms.ParDo#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PrimitiveParDoSingleFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void getReplacementTransformGetFn() {
  DoFn<Integer, Long> originalFn = new ToLongFn();
  ParDo.SingleOutput<Integer, Long> originalTransform = ParDo.of(originalFn);
  PCollection<? extends Integer> input = pipeline.apply(Create.of(1, 2, 3));
  AppliedPTransform<
          PCollection<? extends Integer>, PCollection<Long>, ParDo.SingleOutput<Integer, Long>>
      application =
          AppliedPTransform.of(
              "original",
              input.expand(),
              input.apply(originalTransform).expand(),
              originalTransform,
              pipeline);

  PTransformReplacement<PCollection<? extends Integer>, PCollection<Long>> replacementTransform =
      factory.getReplacementTransform(application);
  ParDoSingle<Integer, Long> parDoSingle =
      (ParDoSingle<Integer, Long>) replacementTransform.getTransform();

  assertThat(parDoSingle.getFn(), equalTo(originalTransform.getFn()));
  assertThat(parDoSingle.getFn(), equalTo(originalFn));
}
 
Example 2
Source File: DisplayDataEvaluatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testPrimitiveTransform() {
  PTransform<? super PCollection<Integer>, ? super PCollection<Integer>> myTransform =
      ParDo.of(
          new DoFn<Integer, Integer>() {
            @ProcessElement
            public void processElement(ProcessContext c) throws Exception {}

            @Override
            public void populateDisplayData(DisplayData.Builder builder) {
              builder.add(DisplayData.item("foo", "bar"));
            }
          });

  DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
  Set<DisplayData> displayData = evaluator.displayDataForPrimitiveTransforms(myTransform);

  assertThat(displayData, hasItem(hasDisplayItem("foo")));
}
 
Example 3
Source File: AnnotateImages.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Applies all necessary transforms to call the Vision API. In order to group requests into
 * batches, we assign keys to the requests, as {@link GroupIntoBatches} works only on {@link KV}s.
 */
@Override
public PCollection<List<AnnotateImageResponse>> expand(PCollection<T> input) {
  ParDo.SingleOutput<T, AnnotateImageRequest> inputToRequestMapper;
  if (contextSideInput != null) {
    inputToRequestMapper =
        ParDo.of(new MapInputToRequest(contextSideInput)).withSideInputs(contextSideInput);
  } else {
    inputToRequestMapper = ParDo.of(new MapInputToRequest(null));
  }
  return input
      .apply(inputToRequestMapper)
      .apply(
          WithKeys.of(
                  (SerializableFunction<AnnotateImageRequest, Integer>)
                      ignored -> new Random().nextInt(desiredRequestParallelism))
              .withKeyType(TypeDescriptors.integers()))
      .apply(GroupIntoBatches.ofSize(batchSize))
      .apply(ParDo.of(new PerformImageAnnotation()));
}
 
Example 4
Source File: NexmarkUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Return a transform to log each element, passing it through unchanged. */
public static <T> ParDo.SingleOutput<T, T> log(final String name) {
  return ParDo.of(
      new DoFn<T, T>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
          LOG.info("%s: %s", name, c.element());
          c.output(c.element());
        }
      });
}
 
Example 5
Source File: TypedPValueTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testFinishSpecifyingShouldFailIfNoCoderInferrable() {
  p.enableAbandonedNodeEnforcement(false);
  PCollection<Integer> created = p.apply(Create.of(1, 2, 3));
  ParDo.SingleOutput<Integer, EmptyClass> uninferrableParDo = ParDo.of(new EmptyClassDoFn());
  PCollection<EmptyClass> unencodable = created.apply(uninferrableParDo);

  thrown.expect(IllegalStateException.class);
  thrown.expectMessage("Unable to return a default Coder");
  thrown.expectMessage("Inferring a Coder from the CoderRegistry failed");

  unencodable.finishSpecifying(created, uninferrableParDo);
}
 
Example 6
Source File: StructuredStreamingPipelineStateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private ParDo.SingleOutput<String, String> printParDo(final String prefix) {
  return ParDo.of(
      new DoFn<String, String>() {

        @ProcessElement
        public void processElement(final ProcessContext c) {
          System.out.println(prefix + " " + c.element());
        }
      });
}
 
Example 7
Source File: NexmarkUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Return a transform to cast each element to {@link KnownSize}. */
private static <T extends KnownSize> ParDo.SingleOutput<T, KnownSize> castToKnownSize() {
  return ParDo.of(
      new DoFn<T, KnownSize>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
          c.output(c.element());
        }
      });
}
 
Example 8
Source File: SparkPipelineStateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private ParDo.SingleOutput<String, String> printParDo(final String prefix) {
  return ParDo.of(
      new DoFn<String, String>() {

        @ProcessElement
        public void processElement(final ProcessContext c) {
          System.out.println(prefix + " " + c.element());
        }
      });
}
 
Example 9
Source File: NexmarkUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Return a transform to make explicit the timestamp of each element. */
public static <T> ParDo.SingleOutput<T, TimestampedValue<T>> stamp(String name) {
  return ParDo.of(
      new DoFn<T, TimestampedValue<T>>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
          c.output(TimestampedValue.of(c.element(), c.timestamp()));
        }
      });
}
 
Example 10
Source File: NexmarkUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Return a transform to count and discard each element. */
public static <T> ParDo.SingleOutput<T, Void> devNull(final String name) {
  return ParDo.of(
      new DoFn<T, Void>() {
        final Counter discardedCounterMetric = Metrics.counter(name, "discarded");

        @ProcessElement
        public void processElement(ProcessContext c) {
          discardedCounterMetric.inc();
        }
      });
}
 
Example 11
Source File: NexmarkUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Return a transform to pass-through events, but count them as they go by. */
public static ParDo.SingleOutput<Event, Event> snoop(final String name) {
  return ParDo.of(
      new DoFn<Event, Event>() {
        final Counter eventCounter = Metrics.counter(name, "events");
        final Counter newPersonCounter = Metrics.counter(name, "newPersons");
        final Counter newAuctionCounter = Metrics.counter(name, "newAuctions");
        final Counter bidCounter = Metrics.counter(name, "bids");
        final Counter endOfStreamCounter = Metrics.counter(name, "endOfStream");

        @ProcessElement
        public void processElement(ProcessContext c) {
          eventCounter.inc();
          if (c.element().newPerson != null) {
            newPersonCounter.inc();
          } else if (c.element().newAuction != null) {
            newAuctionCounter.inc();
          } else if (c.element().bid != null) {
            bidCounter.inc();
          } else {
            endOfStreamCounter.inc();
          }
          info("%s snooping element %s", name, c.element());
          c.output(c.element());
        }
      });
}
 
Example 12
Source File: TalendIOTest.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
private ParDo.SingleOutput<Sample, Record> toRecord() {
    return ParDo.of(new DoFn<Sample, Record>() {

        @ProcessElement
        public void toData(final ProcessContext sample) {
            final Sample element = sample.element();
            final RecordBuilderFactory builderFactory = new AvroRecordBuilderFactoryProvider().apply(null);
            sample.output(builderFactory.newRecordBuilder().withString("data", element.getData()).build());
        }
    });
}
 
Example 13
Source File: TalendIOTest.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
private ParDo.SingleOutput<Record, SampleLength> toSampleLength() {
    return ParDo.of(new DoFn<Record, SampleLength>() {

        @ProcessElement
        public void onElement(final ProcessContext ctx) {
            final Collection<Record> array = ctx.element().getArray(Record.class, "__default__");
            ctx.output(new SampleLength(array.iterator().next().getString("data").length()));
        }
    });
}
 
Example 14
Source File: TalendIOTest.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
private ParDo.SingleOutput<SampleLength, Integer> toInt() {
    return ParDo.of(new DoFn<SampleLength, Integer>() {

        @ProcessElement
        public void toInt(final ProcessContext pc) {
            pc.output(pc.element().len);
        }
    });
}
 
Example 15
Source File: SnowflakeIO.java    From beam with Apache License 2.0 5 votes vote down vote up
private ParDo.SingleOutput<Object, Object> copyToTable(
    SnowflakeService snowflakeService, String stagingBucketDir) {
  return ParDo.of(
      new CopyToTableFn<>(
          getDataSourceProviderFn(),
          getTable(),
          getQuery(),
          stagingBucketDir,
          getStorageIntegrationName(),
          getWriteDisposition(),
          snowflakeService));
}
 
Example 16
Source File: TransformTransform.java    From hop with Apache License 2.0 4 votes vote down vote up
@Override public PCollectionTuple expand( PCollection<HopRow> input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamHop.init( transformPluginClasses, xpPluginClasses );

    // Similar for the output : treate a TupleTag list for the target transforms...
    //
    TupleTag<HopRow> mainOutputTupleTag = new TupleTag<HopRow>( HopBeamUtil.createMainOutputTupleId( transformName ) ) {
    };
    List<TupleTag<HopRow>> targetTupleTags = new ArrayList<>();
    TupleTagList targetTupleTagList = null;
    for ( String targetStep : targetSteps ) {
      String tupleId = HopBeamUtil.createTargetTupleId( transformName, targetStep );
      TupleTag<HopRow> tupleTag = new TupleTag<HopRow>( tupleId ) {
      };
      targetTupleTags.add( tupleTag );
      if ( targetTupleTagList == null ) {
        targetTupleTagList = TupleTagList.of( tupleTag );
      } else {
        targetTupleTagList = targetTupleTagList.and( tupleTag );
      }
    }
    if ( targetTupleTagList == null ) {
      targetTupleTagList = TupleTagList.empty();
    }

    // Create a new transform function, initializes the transform
    //
    StepFn stepFn = new StepFn( variableValues, metastoreJson, transformPluginClasses, xpPluginClasses,
      transformName, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep,
      targetSteps, infoSteps, infoRowMetaJsons );

    // The actual transform functionality
    //
    ParDo.SingleOutput<HopRow, HopRow> parDoStepFn = ParDo.of( stepFn );

    // Add optional side inputs...
    //
    if ( infoCollectionViews.size() > 0 ) {
      parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews );
    }

    // Specify the main output and targeted outputs
    //
    ParDo.MultiOutput<HopRow, HopRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList );

    // Apply the multi output parallel do transform function to the main input stream
    //
    PCollectionTuple collectionTuple = input.apply( multiOutput );

    // In the tuple is everything we need to find.
    // Just make sure to retrieve the PCollections using the correct Tuple ID
    // Use HopBeamUtil.createTargetTupleId()... to make sure
    //
    return collectionTuple;
  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error transforming data in transform '" + transformName + "'", e );
    throw new RuntimeException( "Error transforming data in transform", e );
  }

}
 
Example 17
Source File: Transforms.java    From nomulus with Apache License 2.0 4 votes vote down vote up
/**
 * Returns CommitLog files with timestamps between {@code fromTime} (inclusive) and {@code
 * endTime} (exclusive).
 */
public static PTransform<PCollection<? extends String>, PCollection<String>>
    filterCommitLogsByTime(DateTime fromTime, DateTime toTime) {
  return ParDo.of(new FilterCommitLogFileByTime(fromTime, toTime));
}
 
Example 18
Source File: Monitor.java    From beam with Apache License 2.0 4 votes vote down vote up
public Monitor(String name, String prefix) {
  this.name = name;
  this.prefix = prefix;
  doFn = new MonitorDoFn();
  transform = ParDo.of(doFn);
}
 
Example 19
Source File: StepTransform.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
@Override public PCollectionTuple expand( PCollection<KettleRow> input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamKettle.init( stepPluginClasses, xpPluginClasses );

    // Similar for the output : treate a TupleTag list for the target steps...
    //
    TupleTag<KettleRow> mainOutputTupleTag = new TupleTag<KettleRow>( KettleBeamUtil.createMainOutputTupleId( stepname ) ) {
    };
    List<TupleTag<KettleRow>> targetTupleTags = new ArrayList<>();
    TupleTagList targetTupleTagList = null;
    for ( String targetStep : targetSteps ) {
      String tupleId = KettleBeamUtil.createTargetTupleId( stepname, targetStep );
      TupleTag<KettleRow> tupleTag = new TupleTag<KettleRow>( tupleId ) {
      };
      targetTupleTags.add( tupleTag );
      if ( targetTupleTagList == null ) {
        targetTupleTagList = TupleTagList.of( tupleTag );
      } else {
        targetTupleTagList = targetTupleTagList.and( tupleTag );
      }
    }
    if ( targetTupleTagList == null ) {
      targetTupleTagList = TupleTagList.empty();
    }

    // Create a new step function, initializes the step
    //
    StepFn stepFn = new StepFn( variableValues, metastoreJson, stepPluginClasses, xpPluginClasses,
      stepname, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep,
      targetSteps, infoSteps, infoRowMetaJsons );

    // The actual step functionality
    //
    ParDo.SingleOutput<KettleRow, KettleRow> parDoStepFn = ParDo.of( stepFn );

    // Add optional side inputs...
    //
    if ( infoCollectionViews.size() > 0 ) {
      parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews );
    }

    // Specify the main output and targeted outputs
    //
    ParDo.MultiOutput<KettleRow, KettleRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList );

    // Apply the multi output parallel do step function to the main input stream
    //
    PCollectionTuple collectionTuple = input.apply( multiOutput );

    // In the tuple is everything we need to find.
    // Just make sure to retrieve the PCollections using the correct Tuple ID
    // Use KettleBeamUtil.createTargetTupleId()... to make sure
    //
    return collectionTuple;
  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error transforming data in step '" + stepname + "'", e );
    throw new RuntimeException( "Error transforming data in step", e );
  }

}
 
Example 20
Source File: HCatToRow.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a {@link PTransform} that converts incoming {@link HCatRecord HCatRecords} to {@link
 * Row Rows} using specified schema.
 *
 * <p>If there is a mismatch between the schema specified here and actual record schema, or
 * internal representation and schema, then runtime errors will happen.
 */
private static PTransform<PCollection<? extends HCatRecord>, PCollection<Row>> forSchema(
    Schema schema) {
  return ParDo.of(new HCatToRowFn(schema));
}