com.google.cloud.dataflow.sdk.values.TupleTag Java Examples

The following examples show how to use com.google.cloud.dataflow.sdk.values.TupleTag. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkStreamingTransformTranslators.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
private Map<TupleTag<?>, Integer> transformTupleTagsToLabels(TupleTag<?> mainTag, Set<TupleTag<?>> secondaryTags) {
	Map<TupleTag<?>, Integer> tagToLabelMap = Maps.newHashMap();
	tagToLabelMap.put(mainTag, MAIN_TAG_INDEX);
	int count = MAIN_TAG_INDEX + 1;
	for (TupleTag<?> tag : secondaryTags) {
		if (!tagToLabelMap.containsKey(tag)) {
			tagToLabelMap.put(tag, count++);
		}
	}
	return tagToLabelMap;
}
 
Example #2
Source File: FlinkDoFnFunction.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public WindowingInternals<IN, OUT> windowingInternals() {
	return new WindowingInternals<IN, OUT>() {
		@Override
		public StateInternals stateInternals() {
			return null;
		}

		@Override
		public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {

		}

		@Override
		public TimerInternals timerInternals() {
			return null;
		}

		@Override
		public Collection<? extends BoundedWindow> windows() {
			return ImmutableList.of(GlobalWindow.INSTANCE);
		}

		@Override
		public PaneInfo pane() {
			return PaneInfo.NO_FIRING;
		}

		@Override
		public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
		}

		@Override
		public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
			throw new RuntimeException("sideInput() not implemented.");
		}
	};
}
 
Example #3
Source File: FlinkParDoBoundMultiWrapper.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public <T> void sideOutputWithTimestampHelper(WindowedValue<IN> inElement, T output, Instant timestamp, Collector<WindowedValue<RawUnionValue>> collector, TupleTag<T> tag) {
	checkTimestamp(inElement, timestamp);
	Integer index = outputLabels.get(tag);
	if (index != null) {
		collector.collect(makeWindowedValue(
				new RawUnionValue(index, output),
				timestamp,
				inElement.getWindows(),
				inElement.getPane()));
	}
}
 
Example #4
Source File: FlinkMultiOutputDoFnFunction.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public <T> void sideOutput(TupleTag<T> tag, T value) {
	Integer index = outputMap.get(tag);
	if (index != null) {
		outCollector.collect(new RawUnionValue(index, value));
	}
}
 
Example #5
Source File: FlinkParDoBoundWrapper.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public WindowingInternals<IN, OUT> windowingInternalsHelper(final WindowedValue<IN> inElement, final Collector<WindowedValue<OUT>> collector) {
	return new WindowingInternals<IN, OUT>() {
		@Override
		public StateInternals stateInternals() {
			throw new NullPointerException("StateInternals are not available for ParDo.Bound().");
		}

		@Override
		public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
			collector.collect(makeWindowedValue(output, timestamp, windows, pane));
		}

		@Override
		public TimerInternals timerInternals() {
			throw new NullPointerException("TimeInternals are not available for ParDo.Bound().");
		}

		@Override
		public Collection<? extends BoundedWindow> windows() {
			return inElement.getWindows();
		}

		@Override
		public PaneInfo pane() {
			return inElement.getPane();
		}

		@Override
		public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
			throw new RuntimeException("writePCollectionViewData() not supported in Streaming mode.");
		}

		@Override
		public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
			throw new RuntimeException("sideInput() not implemented.");
		}
	};
}
 
Example #6
Source File: FlinkBatchTransformTranslators.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(CoGroupByKey<K> transform, FlinkBatchTranslationContext context) {
	KeyedPCollectionTuple<K> input = context.getInput(transform);

	CoGbkResultSchema schema = input.getCoGbkResultSchema();
	List<KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?>> keyedCollections = input.getKeyedCollections();

	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection1 = keyedCollections.get(0);
	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection2 = keyedCollections.get(1);

	TupleTag<?> tupleTag1 = taggedCollection1.getTupleTag();
	TupleTag<?> tupleTag2 = taggedCollection2.getTupleTag();

	PCollection<? extends KV<K, ?>> collection1 = taggedCollection1.getCollection();
	PCollection<? extends KV<K, ?>> collection2 = taggedCollection2.getCollection();

	DataSet<KV<K,V1>> inputDataSet1 = context.getInputDataSet(collection1);
	DataSet<KV<K,V2>> inputDataSet2 = context.getInputDataSet(collection2);

	TypeInformation<KV<K,CoGbkResult>> typeInfo = context.getOutputTypeInfo();

	FlinkCoGroupKeyedListAggregator<K,V1,V2> aggregator = new FlinkCoGroupKeyedListAggregator<>(schema, tupleTag1, tupleTag2);

	Keys.ExpressionKeys<KV<K,V1>> keySelector1 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet1.getType());
	Keys.ExpressionKeys<KV<K,V2>> keySelector2 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet2.getType());

	DataSet<KV<K, CoGbkResult>> out = new CoGroupOperator<>(inputDataSet1, inputDataSet2,
															keySelector1, keySelector2,
			                                                aggregator, typeInfo, null, transform.getName());
	context.setOutputDataSet(context.getOutput(transform), out);
}
 
Example #7
Source File: ParDoMultiOutputITCase.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	Pipeline p = FlinkTestPipeline.createForBatch();

	PCollection<String> words = p.apply(Create.of("Hello", "Whatupmyman", "hey", "SPECIALthere", "MAAA", "MAAFOOO"));

	// Select words whose length is below a cut off,
	// plus the lengths of words that are above the cut off.
	// Also select words starting with "MARKER".
	final int wordLengthCutOff = 3;
	// Create tags to use for the main and side outputs.
	final TupleTag<String> wordsBelowCutOffTag = new TupleTag<String>(){};
	final TupleTag<Integer> wordLengthsAboveCutOffTag = new TupleTag<Integer>(){};
	final TupleTag<String> markedWordsTag = new TupleTag<String>(){};

	PCollectionTuple results =
			words.apply(ParDo
					.withOutputTags(wordsBelowCutOffTag, TupleTagList.of(wordLengthsAboveCutOffTag)
							.and(markedWordsTag))
					.of(new DoFn<String, String>() {
						final TupleTag<String> specialWordsTag = new TupleTag<String>() {
						};

						public void processElement(ProcessContext c) {
							String word = c.element();
							if (word.length() <= wordLengthCutOff) {
								c.output(word);
							} else {
								c.sideOutput(wordLengthsAboveCutOffTag, word.length());
							}
							if (word.startsWith("MAA")) {
								c.sideOutput(markedWordsTag, word);
							}

							if (word.startsWith("SPECIAL")) {
								c.sideOutput(specialWordsTag, word);
							}
						}
					}));

	// Extract the PCollection results, by tag.
	PCollection<String> wordsBelowCutOff = results.get(wordsBelowCutOffTag);
	PCollection<Integer> wordLengthsAboveCutOff = results.get
			(wordLengthsAboveCutOffTag);
	PCollection<String> markedWords = results.get(markedWordsTag);

	markedWords.apply(TextIO.Write.to(resultPath));

	p.run();
}
 
Example #8
Source File: FileToState.java    From policyscanner with Apache License 2.0 4 votes vote down vote up
public FileToState(TupleTag<GCPResourceErrorInfo> tag) {
  errorOutputTag = tag;
}
 
Example #9
Source File: FlinkDoFnFunction.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
@Override
public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
	sideOutput(tag, output);
}
 
Example #10
Source File: FlinkDoFnFunction.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
@Override
public <T> void sideOutput(TupleTag<T> tag, T output) {
	// ignore the side output, this can happen when a user does not register
	// side outputs but then outputs using a freshly created TupleTag.
}
 
Example #11
Source File: FlinkCoGroupKeyedListAggregator.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
public FlinkCoGroupKeyedListAggregator(CoGbkResultSchema schema, TupleTag<?> tupleTag1, TupleTag<?> tupleTag2) {
	this.schema = schema;
	this.tupleTag1 = tupleTag1;
	this.tupleTag2 = tupleTag2;
}
 
Example #12
Source File: FlinkMultiOutputDoFnFunction.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
@Override
public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
	sideOutput(tag, output);
}
 
Example #13
Source File: FlinkMultiOutputDoFnFunction.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
public FlinkMultiOutputDoFnFunction(DoFn<IN, OUT> doFn, PipelineOptions options, Map<TupleTag<?>, Integer> outputMap) {
	this.doFn = doFn;
	this.options = options;
	this.outputMap = outputMap;
}
 
Example #14
Source File: FlinkBatchTransformTranslators.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(ParDo.BoundMulti<IN, OUT> transform, FlinkBatchTranslationContext context) {
	DataSet<IN> inputDataSet = context.getInputDataSet(context.getInput(transform));

	final DoFn<IN, OUT> doFn = transform.getFn();

	Map<TupleTag<?>, PCollection<?>> outputs = context.getOutput(transform).getAll();

	Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
	// put the main output at index 0, FlinkMultiOutputDoFnFunction also expects this
	outputMap.put(transform.getMainOutputTag(), 0);
	int count = 1;
	for (TupleTag<?> tag: outputs.keySet()) {
		if (!outputMap.containsKey(tag)) {
			outputMap.put(tag, count++);
		}
	}

	// collect all output Coders and create a UnionCoder for our tagged outputs
	List<Coder<?>> outputCoders = Lists.newArrayList();
	for (PCollection<?> coll: outputs.values()) {
		outputCoders.add(coll.getCoder());
	}

	UnionCoder unionCoder = UnionCoder.of(outputCoders);

	@SuppressWarnings("unchecked")
	TypeInformation<RawUnionValue> typeInformation = new CoderTypeInformation<>(unionCoder);

	@SuppressWarnings("unchecked")
	FlinkMultiOutputDoFnFunction<IN, OUT> doFnWrapper = new FlinkMultiOutputDoFnFunction(doFn, context.getPipelineOptions(), outputMap);
	MapPartitionOperator<IN, RawUnionValue> outputDataSet = new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, transform.getName());

	transformSideInputs(transform.getSideInputs(), outputDataSet, context);

	for (Map.Entry<TupleTag<?>, PCollection<?>> output: outputs.entrySet()) {
		TypeInformation<Object> outputType = context.getTypeInfo(output.getValue());
		int outputTag = outputMap.get(output.getKey());
		FlinkMultiOutputPruningFunction<Object> pruningFunction = new FlinkMultiOutputPruningFunction<>(outputTag);
		FlatMapOperator<RawUnionValue, Object> pruningOperator = new
				FlatMapOperator<>(outputDataSet, outputType,
				pruningFunction, output.getValue().getName());
		context.setOutputDataSet(output.getValue(), pruningOperator);

	}
}
 
Example #15
Source File: FlinkParDoBoundWrapper.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
@Override
public <T> void sideOutputWithTimestampHelper(WindowedValue<IN> inElement, T output, Instant timestamp, Collector<WindowedValue<OUT>> outCollector, TupleTag<T> tag) {
	// ignore the side output, this can happen when a user does not register
	// side outputs but then outputs using a freshly created TupleTag.
	throw new RuntimeException("sideOutput() not not available in ParDo.Bound().");
}
 
Example #16
Source File: FlinkAbstractParDoWrapper.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
public abstract <T> void sideOutputWithTimestampHelper(
WindowedValue<IN> inElement,
T output,
Instant timestamp,
Collector<WindowedValue<OUTFL>> outCollector,
TupleTag<T> tag);
 
Example #17
Source File: FlinkAbstractParDoWrapper.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
@Override
public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
	sideOutputWithTimestampHelper(element, output, timestamp, collector, tag);
}
 
Example #18
Source File: FlinkAbstractParDoWrapper.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
@Override
public <T> void sideOutput(TupleTag<T> tag, T output) {
	sideOutputWithTimestamp(tag, output, this.element.getTimestamp());
}
 
Example #19
Source File: FlinkParDoBoundMultiWrapper.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
public FlinkParDoBoundMultiWrapper(PipelineOptions options, WindowingStrategy<?, ?> windowingStrategy, DoFn<IN, OUT> doFn, TupleTag<?> mainTag, Map<TupleTag<?>, Integer> tagsToLabels) {
	super(options, windowingStrategy, doFn);
	this.mainTag = Preconditions.checkNotNull(mainTag);
	this.outputLabels = Preconditions.checkNotNull(tagsToLabels);
}
 
Example #20
Source File: DistributeWorkDataDoFn.java    From data-timeseries-java with Apache License 2.0 4 votes vote down vote up
public DistributeWorkDataDoFn(WorkPacketConfig workPacketView, TupleTag<Integer> tag) {
  this.workPacketView = workPacketView;
  this.tag  = tag;
}
 
Example #21
Source File: CreateWorkPacketsDoFn.java    From data-timeseries-java with Apache License 2.0 4 votes vote down vote up
public CreateWorkPacketsDoFn(WorkPacketConfig workPacketView, TupleTag<Integer> counter) {
  this.workPacketView = workPacketView;
  this.counter = counter;
}
 
Example #22
Source File: ExtractState.java    From policyscanner with Apache License 2.0 4 votes vote down vote up
public ExtractState(TupleTag<GCPResourceErrorInfo> tag) {
  errorOutputTag = tag;
}