Java Code Examples for org.apache.beam.sdk.values.TupleTagList#empty()

The following examples show how to use org.apache.beam.sdk.values.TupleTagList#empty() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Partition.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Constructs a PartitionDoFn.
 *
 * @throws IllegalArgumentException if {@code numPartitions <= 0}
 */
private PartitionDoFn(
    int numPartitions,
    Contextful<Contextful.Fn<X, Integer>> ctxFn,
    Object originalFnClassForDisplayData) {
  this.ctxFn = ctxFn;
  this.originalFnClassForDisplayData = originalFnClassForDisplayData;
  if (numPartitions <= 0) {
    throw new IllegalArgumentException("numPartitions must be > 0");
  }

  this.numPartitions = numPartitions;

  TupleTagList buildOutputTags = TupleTagList.empty();
  for (int partition = 0; partition < numPartitions; partition++) {
    buildOutputTags = buildOutputTags.and(new TupleTag<X>());
  }
  outputTags = buildOutputTags;
}
 
Example 2
Source File: CoGbkResultSchema.java    From beam with Apache License 2.0 5 votes vote down vote up
public static CoGbkResultSchema of(List<TupleTag<?>> tags) {
  TupleTagList tupleTags = TupleTagList.empty();
  for (TupleTag<?> tag : tags) {
    tupleTags = tupleTags.and(tag);
  }
  return new CoGbkResultSchema(tupleTags);
}
 
Example 3
Source File: TransformTransform.java    From hop with Apache License 2.0 4 votes vote down vote up
@Override public PCollectionTuple expand( PCollection<HopRow> input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamHop.init( transformPluginClasses, xpPluginClasses );

    // Similar for the output : treate a TupleTag list for the target transforms...
    //
    TupleTag<HopRow> mainOutputTupleTag = new TupleTag<HopRow>( HopBeamUtil.createMainOutputTupleId( transformName ) ) {
    };
    List<TupleTag<HopRow>> targetTupleTags = new ArrayList<>();
    TupleTagList targetTupleTagList = null;
    for ( String targetStep : targetSteps ) {
      String tupleId = HopBeamUtil.createTargetTupleId( transformName, targetStep );
      TupleTag<HopRow> tupleTag = new TupleTag<HopRow>( tupleId ) {
      };
      targetTupleTags.add( tupleTag );
      if ( targetTupleTagList == null ) {
        targetTupleTagList = TupleTagList.of( tupleTag );
      } else {
        targetTupleTagList = targetTupleTagList.and( tupleTag );
      }
    }
    if ( targetTupleTagList == null ) {
      targetTupleTagList = TupleTagList.empty();
    }

    // Create a new transform function, initializes the transform
    //
    StepFn stepFn = new StepFn( variableValues, metastoreJson, transformPluginClasses, xpPluginClasses,
      transformName, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep,
      targetSteps, infoSteps, infoRowMetaJsons );

    // The actual transform functionality
    //
    ParDo.SingleOutput<HopRow, HopRow> parDoStepFn = ParDo.of( stepFn );

    // Add optional side inputs...
    //
    if ( infoCollectionViews.size() > 0 ) {
      parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews );
    }

    // Specify the main output and targeted outputs
    //
    ParDo.MultiOutput<HopRow, HopRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList );

    // Apply the multi output parallel do transform function to the main input stream
    //
    PCollectionTuple collectionTuple = input.apply( multiOutput );

    // In the tuple is everything we need to find.
    // Just make sure to retrieve the PCollections using the correct Tuple ID
    // Use HopBeamUtil.createTargetTupleId()... to make sure
    //
    return collectionTuple;
  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error transforming data in transform '" + transformName + "'", e );
    throw new RuntimeException( "Error transforming data in transform", e );
  }

}
 
Example 4
Source File: TransformBatchTransform.java    From hop with Apache License 2.0 4 votes vote down vote up
@Override public PCollectionTuple expand( PCollection<HopRow> input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamHop.init( transformPluginClasses, xpPluginClasses );

    // Similar for the output : treate a TupleTag list for the target transforms...
    //
    TupleTag<HopRow> mainOutputTupleTag = new TupleTag<HopRow>( HopBeamUtil.createMainOutputTupleId( transformName ) ) {
    };
    List<TupleTag<HopRow>> targetTupleTags = new ArrayList<>();
    TupleTagList targetTupleTagList = null;
    for ( String targetStep : targetSteps ) {
      String tupleId = HopBeamUtil.createTargetTupleId( transformName, targetStep );
      TupleTag<HopRow> tupleTag = new TupleTag<HopRow>( tupleId ) {
      };
      targetTupleTags.add( tupleTag );
      if ( targetTupleTagList == null ) {
        targetTupleTagList = TupleTagList.of( tupleTag );
      } else {
        targetTupleTagList = targetTupleTagList.and( tupleTag );
      }
    }
    if ( targetTupleTagList == null ) {
      targetTupleTagList = TupleTagList.empty();
    }

    // Create a new transform function, initializes the transform
    //
    StepBatchFn stepBatchFn = new StepBatchFn( variableValues, metastoreJson, transformPluginClasses, xpPluginClasses,
      transformName, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep,
      targetSteps, infoSteps, infoRowMetaJsons );

    // The actual transform functionality
    //
    ParDo.SingleOutput<HopRow, HopRow> parDoStepFn = ParDo.of( stepBatchFn );

    // Add optional side inputs...
    //
    if ( infoCollectionViews.size() > 0 ) {
      parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews );
    }

    // Specify the main output and targeted outputs
    //
    ParDo.MultiOutput<HopRow, HopRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList );

    // Apply the multi output parallel do transform function to the main input stream
    //
    PCollectionTuple collectionTuple = input.apply( multiOutput );

    // In the tuple is everything we need to find.
    // Just make sure to retrieve the PCollections using the correct Tuple ID
    // Use HopBeamUtil.createTargetTupleId()... to make sure
    //
    return collectionTuple;
  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error transforming data in transform '" + transformName + "'", e );
    throw new RuntimeException( "Error transforming data in transform", e );
  }

}
 
Example 5
Source File: StepBatchTransform.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
@Override public PCollectionTuple expand( PCollection<KettleRow> input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamKettle.init( stepPluginClasses, xpPluginClasses );

    // Similar for the output : treate a TupleTag list for the target steps...
    //
    TupleTag<KettleRow> mainOutputTupleTag = new TupleTag<KettleRow>( KettleBeamUtil.createMainOutputTupleId( stepname ) ) {
    };
    List<TupleTag<KettleRow>> targetTupleTags = new ArrayList<>();
    TupleTagList targetTupleTagList = null;
    for ( String targetStep : targetSteps ) {
      String tupleId = KettleBeamUtil.createTargetTupleId( stepname, targetStep );
      TupleTag<KettleRow> tupleTag = new TupleTag<KettleRow>( tupleId ) {
      };
      targetTupleTags.add( tupleTag );
      if ( targetTupleTagList == null ) {
        targetTupleTagList = TupleTagList.of( tupleTag );
      } else {
        targetTupleTagList = targetTupleTagList.and( tupleTag );
      }
    }
    if ( targetTupleTagList == null ) {
      targetTupleTagList = TupleTagList.empty();
    }

    // Create a new step function, initializes the step
    //
    StepBatchFn stepBatchFn = new StepBatchFn( variableValues, metastoreJson, stepPluginClasses, xpPluginClasses,
      stepname, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep,
      targetSteps, infoSteps, infoRowMetaJsons );

    // The actual step functionality
    //
    ParDo.SingleOutput<KettleRow, KettleRow> parDoStepFn = ParDo.of( stepBatchFn );

    // Add optional side inputs...
    //
    if ( infoCollectionViews.size() > 0 ) {
      parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews );
    }

    // Specify the main output and targeted outputs
    //
    ParDo.MultiOutput<KettleRow, KettleRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList );

    // Apply the multi output parallel do step function to the main input stream
    //
    PCollectionTuple collectionTuple = input.apply( multiOutput );

    // In the tuple is everything we need to find.
    // Just make sure to retrieve the PCollections using the correct Tuple ID
    // Use KettleBeamUtil.createTargetTupleId()... to make sure
    //
    return collectionTuple;
  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error transforming data in step '" + stepname + "'", e );
    throw new RuntimeException( "Error transforming data in step", e );
  }

}
 
Example 6
Source File: StepTransform.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
@Override public PCollectionTuple expand( PCollection<KettleRow> input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamKettle.init( stepPluginClasses, xpPluginClasses );

    // Similar for the output : treate a TupleTag list for the target steps...
    //
    TupleTag<KettleRow> mainOutputTupleTag = new TupleTag<KettleRow>( KettleBeamUtil.createMainOutputTupleId( stepname ) ) {
    };
    List<TupleTag<KettleRow>> targetTupleTags = new ArrayList<>();
    TupleTagList targetTupleTagList = null;
    for ( String targetStep : targetSteps ) {
      String tupleId = KettleBeamUtil.createTargetTupleId( stepname, targetStep );
      TupleTag<KettleRow> tupleTag = new TupleTag<KettleRow>( tupleId ) {
      };
      targetTupleTags.add( tupleTag );
      if ( targetTupleTagList == null ) {
        targetTupleTagList = TupleTagList.of( tupleTag );
      } else {
        targetTupleTagList = targetTupleTagList.and( tupleTag );
      }
    }
    if ( targetTupleTagList == null ) {
      targetTupleTagList = TupleTagList.empty();
    }

    // Create a new step function, initializes the step
    //
    StepFn stepFn = new StepFn( variableValues, metastoreJson, stepPluginClasses, xpPluginClasses,
      stepname, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep,
      targetSteps, infoSteps, infoRowMetaJsons );

    // The actual step functionality
    //
    ParDo.SingleOutput<KettleRow, KettleRow> parDoStepFn = ParDo.of( stepFn );

    // Add optional side inputs...
    //
    if ( infoCollectionViews.size() > 0 ) {
      parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews );
    }

    // Specify the main output and targeted outputs
    //
    ParDo.MultiOutput<KettleRow, KettleRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList );

    // Apply the multi output parallel do step function to the main input stream
    //
    PCollectionTuple collectionTuple = input.apply( multiOutput );

    // In the tuple is everything we need to find.
    // Just make sure to retrieve the PCollections using the correct Tuple ID
    // Use KettleBeamUtil.createTargetTupleId()... to make sure
    //
    return collectionTuple;
  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error transforming data in step '" + stepname + "'", e );
    throw new RuntimeException( "Error transforming data in step", e );
  }

}
 
Example 7
Source File: CoGbkResult.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Returns an empty {@link CoGbkResult}. */
public static <V> CoGbkResult empty() {
  return new CoGbkResult(
      new CoGbkResultSchema(TupleTagList.empty()), new ArrayList<Iterable<?>>());
}
 
Example 8
Source File: KeyedPCollectionTuple.java    From beam with Apache License 2.0 4 votes vote down vote up
KeyedPCollectionTuple(Pipeline pipeline) {
  this(pipeline, new ArrayList<>(), TupleTagList.empty(), null);
}