org.apache.flink.api.java.operators.FlatMapOperator Java Examples

The following examples show how to use org.apache.flink.api.java.operators.FlatMapOperator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: FlinkBatchPortablePipelineTranslator.java From beam with Apache License 2.0

6 votes

private static void pruneOutput(
    DataSet<RawUnionValue> taggedDataset,
    BatchTranslationContext context,
    int unionTag,
    Coder<WindowedValue<?>> outputCoder,
    String transformName,
    String collectionId) {
  TypeInformation<WindowedValue<?>> outputType = new CoderTypeInformation<>(outputCoder);
  FlinkExecutableStagePruningFunction pruningFunction =
      new FlinkExecutableStagePruningFunction(unionTag, context.getPipelineOptions());
  FlatMapOperator<RawUnionValue, WindowedValue<?>> pruningOperator =
      new FlatMapOperator<>(
          taggedDataset,
          outputType,
          pruningFunction,
          String.format("ExtractOutput[%s]", unionTag));
  context.addDataSet(collectionId, pruningOperator);
}

Example #2

Source File: SampleITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed);
	List<String> result = sampled.collect();
	containsResultAsText(result, getSourceStrings());
}

Example #3

Source File: SampleITCase.java From flink with Apache License 2.0

5 votes

private FlatMapOperator<Tuple3<Integer, Long, String>, String> getSourceDataSet(ExecutionEnvironment env) {
	return CollectionDataSets.get3TupleDataSet(env).flatMap(
		new FlatMapFunction<Tuple3<Integer, Long, String>, String>() {
			@Override
			public void flatMap(Tuple3<Integer, Long, String> value, Collector<String> out) throws Exception {
				out.collect(value.f2);
			}
		});
}

Example #4

Source File: SampleITCase.java From flink with Apache License 2.0

5 votes

private void verifySamplerWithFixedSize(boolean withReplacement, int numSamples, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	DataSet<String> sampled = DataSetUtils.sampleWithSize(ds, withReplacement, numSamples, seed);
	List<String> result = sampled.collect();
	assertEquals(numSamples, result.size());
	containsResultAsText(result, getSourceStrings());
}

Example #5

Source File: SampleITCase.java From flink with Apache License 2.0

5 votes

private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed);
	List<String> result = sampled.collect();
	containsResultAsText(result, getSourceStrings());
}

Example #6

Source File: BroadcastBranchingITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	// Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
	DataSet<Tuple4<String, Integer, Integer, Integer>> sc1 = env
			.fromElements(new Tuple4<>("1", 61, 6, 29), new Tuple4<>("2", 7, 13, 10), new Tuple4<>("3", 8, 13, 27));

	// Sc2 generates N x values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc2 = env
			.fromElements(new Tuple2<>("1", 5), new Tuple2<>("2", 3), new Tuple2<>("3", 6));

	// Sc3 generates N y values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc3 = env
			.fromElements(new Tuple2<>("1", 2), new Tuple2<>("2", 3), new Tuple2<>("3", 7));

	// Jn1 matches x and y values on id and emits (id, x, y) triples
	JoinOperator<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple3<String, Integer, Integer>> jn1 =
			sc2.join(sc3).where(0).equalTo(0).with(new Jn1());

	// Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples
	JoinOperator<Tuple3<String, Integer, Integer>, Tuple4<String, Integer, Integer, Integer>, Tuple2<String, Integer>> jn2 =
			jn1.join(sc1).where(0).equalTo(0).with(new Jn2());

	// Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2
	FlatMapOperator<Tuple3<String, Integer, Integer>, Tuple2<String, Integer>> mp1 =
			jn1.flatMap(new Mp1());

	// Mp2 filters out all p values which can be divided by z
	List<Tuple2<String, Integer>> result = jn2.flatMap(new Mp2()).withBroadcastSet(mp1, "z").collect();

	JavaProgramTestBase.compareResultAsText(result, RESULT);
}

Example #7

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public void translateNode(Create.Values<OUT> transform, FlinkBatchTranslationContext context) {
	TypeInformation<OUT> typeInformation = context.getOutputTypeInfo();
	Iterable<OUT> elements = transform.getElements();

	// we need to serialize the elements to byte arrays, since they might contain
	// elements that are not serializable by Java serialization. We deserialize them
	// in the FlatMap function using the Coder.

	List<byte[]> serializedElements = Lists.newArrayList();
	Coder<OUT> coder = context.getOutput(transform).getCoder();
	for (OUT element: elements) {
		ByteArrayOutputStream bao = new ByteArrayOutputStream();
		try {
			coder.encode(element, bao, Coder.Context.OUTER);
			serializedElements.add(bao.toByteArray());
		} catch (IOException e) {
			throw new RuntimeException("Could not serialize Create elements using Coder: " + e);
		}
	}

	DataSet<Integer> initDataSet = context.getExecutionEnvironment().fromElements(1);
	FlinkCreateFunction<Integer, OUT> flatMapFunction = new FlinkCreateFunction<>(serializedElements, coder);
	FlatMapOperator<Integer, OUT> outputDataSet = new FlatMapOperator<>(initDataSet, typeInformation, flatMapFunction, transform.getName());

	context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}

Example #8

Source File: FlinkBatchTransformTranslators.java From beam with Apache License 2.0

5 votes

private <T> void pruneOutput(
    DataSet<WindowedValue<RawUnionValue>> taggedDataSet,
    FlinkBatchTranslationContext context,
    int integerTag,
    PCollection<T> collection) {
  TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection);

  FlinkMultiOutputPruningFunction<T> pruningFunction =
      new FlinkMultiOutputPruningFunction<>(integerTag, context.getPipelineOptions());

  FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator =
      new FlatMapOperator<>(taggedDataSet, outputType, pruningFunction, collection.getName());

  context.setOutputDataSet(collection, pruningOperator);
}

Example #9

Source File: SampleITCase.java From flink with Apache License 2.0

5 votes

private FlatMapOperator<Tuple3<Integer, Long, String>, String> getSourceDataSet(ExecutionEnvironment env) {
	return CollectionDataSets.get3TupleDataSet(env).flatMap(
		new FlatMapFunction<Tuple3<Integer, Long, String>, String>() {
			@Override
			public void flatMap(Tuple3<Integer, Long, String> value, Collector<String> out) throws Exception {
				out.collect(value.f2);
			}
		});
}

Example #10

Source File: SampleITCase.java From flink with Apache License 2.0

5 votes

private void verifySamplerWithFixedSize(boolean withReplacement, int numSamples, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	DataSet<String> sampled = DataSetUtils.sampleWithSize(ds, withReplacement, numSamples, seed);
	List<String> result = sampled.collect();
	assertEquals(numSamples, result.size());
	containsResultAsText(result, getSourceStrings());
}

Example #11

Source File: SampleITCase.java From flink with Apache License 2.0

5 votes

private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed);
	List<String> result = sampled.collect();
	containsResultAsText(result, getSourceStrings());
}

Example #12

Source File: BroadcastBranchingITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	// Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
	DataSet<Tuple4<String, Integer, Integer, Integer>> sc1 = env
			.fromElements(new Tuple4<>("1", 61, 6, 29), new Tuple4<>("2", 7, 13, 10), new Tuple4<>("3", 8, 13, 27));

	// Sc2 generates N x values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc2 = env
			.fromElements(new Tuple2<>("1", 5), new Tuple2<>("2", 3), new Tuple2<>("3", 6));

	// Sc3 generates N y values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc3 = env
			.fromElements(new Tuple2<>("1", 2), new Tuple2<>("2", 3), new Tuple2<>("3", 7));

	// Jn1 matches x and y values on id and emits (id, x, y) triples
	JoinOperator<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple3<String, Integer, Integer>> jn1 =
			sc2.join(sc3).where(0).equalTo(0).with(new Jn1());

	// Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples
	JoinOperator<Tuple3<String, Integer, Integer>, Tuple4<String, Integer, Integer, Integer>, Tuple2<String, Integer>> jn2 =
			jn1.join(sc1).where(0).equalTo(0).with(new Jn2());

	// Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2
	FlatMapOperator<Tuple3<String, Integer, Integer>, Tuple2<String, Integer>> mp1 =
			jn1.flatMap(new Mp1());

	// Mp2 filters out all p values which can be divided by z
	List<Tuple2<String, Integer>> result = jn2.flatMap(new Mp2()).withBroadcastSet(mp1, "z").collect();

	JavaProgramTestBase.compareResultAsText(result, RESULT);
}

Example #13

Source File: SampleITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

private FlatMapOperator<Tuple3<Integer, Long, String>, String> getSourceDataSet(ExecutionEnvironment env) {
	return CollectionDataSets.get3TupleDataSet(env).flatMap(
		new FlatMapFunction<Tuple3<Integer, Long, String>, String>() {
			@Override
			public void flatMap(Tuple3<Integer, Long, String> value, Collector<String> out) throws Exception {
				out.collect(value.f2);
			}
		});
}

Example #14

Source File: SampleITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

private void verifySamplerWithFixedSize(boolean withReplacement, int numSamples, long seed) throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
	DataSet<String> sampled = DataSetUtils.sampleWithSize(ds, withReplacement, numSamples, seed);
	List<String> result = sampled.collect();
	assertEquals(numSamples, result.size());
	containsResultAsText(result, getSourceStrings());
}

Example #15

Source File: BroadcastBranchingITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	// Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
	DataSet<Tuple4<String, Integer, Integer, Integer>> sc1 = env
			.fromElements(new Tuple4<>("1", 61, 6, 29), new Tuple4<>("2", 7, 13, 10), new Tuple4<>("3", 8, 13, 27));

	// Sc2 generates N x values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc2 = env
			.fromElements(new Tuple2<>("1", 5), new Tuple2<>("2", 3), new Tuple2<>("3", 6));

	// Sc3 generates N y values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc3 = env
			.fromElements(new Tuple2<>("1", 2), new Tuple2<>("2", 3), new Tuple2<>("3", 7));

	// Jn1 matches x and y values on id and emits (id, x, y) triples
	JoinOperator<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple3<String, Integer, Integer>> jn1 =
			sc2.join(sc3).where(0).equalTo(0).with(new Jn1());

	// Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples
	JoinOperator<Tuple3<String, Integer, Integer>, Tuple4<String, Integer, Integer, Integer>, Tuple2<String, Integer>> jn2 =
			jn1.join(sc1).where(0).equalTo(0).with(new Jn2());

	// Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2
	FlatMapOperator<Tuple3<String, Integer, Integer>, Tuple2<String, Integer>> mp1 =
			jn1.flatMap(new Mp1());

	// Mp2 filters out all p values which can be divided by z
	List<Tuple2<String, Integer>> result = jn2.flatMap(new Mp2()).withBroadcastSet(mp1, "z").collect();

	JavaProgramTestBase.compareResultAsText(result, RESULT);
}

Example #16

Source File: FlinkBatchTransformTranslators.java From beam with Apache License 2.0

4 votes

@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform,
    FlinkBatchTranslationContext context) {
  final int numConsumers =
      context.getOutputs(transform).values().stream().mapToInt(context::getNumConsumers).sum();
  final boolean multipleConsumers = numConsumers > 1;
  final boolean reIterableResult =
      multipleConsumers
          || context
              .getPipelineOptions()
              .as(FlinkPipelineOptions.class)
              .getReIterableGroupByKeyResult();
  final DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
      context.getInputDataSet(context.getInput(transform));
  final KvCoder<K, InputT> inputCoder =
      (KvCoder<K, InputT>) context.getInput(transform).getCoder();
  final WindowingStrategy<?, ?> windowingStrategy =
      context.getInput(transform).getWindowingStrategy();
  final String fullName = getCurrentTransformName(context);
  final UnsortedGrouping<WindowedValue<KV<K, InputT>>> inputGrouping =
      new FlatMapOperator<>(
              inputDataSet,
              inputDataSet.getType(),
              new FlinkExplodeWindowsFunction<>(),
              "ExplodeWindows: " + fullName)
          .groupBy(
              new WindowedKvKeySelector<>(
                  inputCoder.getKeyCoder(), windowingStrategy.getWindowFn().windowCoder()));
  final TypeInformation<WindowedValue<KV<K, Iterable<InputT>>>> outputTypeInfo =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(
              KvCoder.of(
                  inputCoder.getKeyCoder(), IterableCoder.of(inputCoder.getValueCoder())),
              windowingStrategy.getWindowFn().windowCoder()));
  final DataSet<WindowedValue<KV<K, Iterable<InputT>>>> outputDataSet =
      new GroupReduceOperator<>(
              inputGrouping,
              outputTypeInfo,
              new FlinkNonMergingReduceFunction<>(windowingStrategy, reIterableResult),
              fullName)
          .returns(outputTypeInfo);
  context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}

Example #17

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

4 votes

@Override
public void translateNode(ParDo.BoundMulti<IN, OUT> transform, FlinkBatchTranslationContext context) {
	DataSet<IN> inputDataSet = context.getInputDataSet(context.getInput(transform));

	final DoFn<IN, OUT> doFn = transform.getFn();

	Map<TupleTag<?>, PCollection<?>> outputs = context.getOutput(transform).getAll();

	Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
	// put the main output at index 0, FlinkMultiOutputDoFnFunction also expects this
	outputMap.put(transform.getMainOutputTag(), 0);
	int count = 1;
	for (TupleTag<?> tag: outputs.keySet()) {
		if (!outputMap.containsKey(tag)) {
			outputMap.put(tag, count++);
		}
	}

	// collect all output Coders and create a UnionCoder for our tagged outputs
	List<Coder<?>> outputCoders = Lists.newArrayList();
	for (PCollection<?> coll: outputs.values()) {
		outputCoders.add(coll.getCoder());
	}

	UnionCoder unionCoder = UnionCoder.of(outputCoders);

	@SuppressWarnings("unchecked")
	TypeInformation<RawUnionValue> typeInformation = new CoderTypeInformation<>(unionCoder);

	@SuppressWarnings("unchecked")
	FlinkMultiOutputDoFnFunction<IN, OUT> doFnWrapper = new FlinkMultiOutputDoFnFunction(doFn, context.getPipelineOptions(), outputMap);
	MapPartitionOperator<IN, RawUnionValue> outputDataSet = new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, transform.getName());

	transformSideInputs(transform.getSideInputs(), outputDataSet, context);

	for (Map.Entry<TupleTag<?>, PCollection<?>> output: outputs.entrySet()) {
		TypeInformation<Object> outputType = context.getTypeInfo(output.getValue());
		int outputTag = outputMap.get(output.getKey());
		FlinkMultiOutputPruningFunction<Object> pruningFunction = new FlinkMultiOutputPruningFunction<>(outputTag);
		FlatMapOperator<RawUnionValue, Object> pruningOperator = new
				FlatMapOperator<>(outputDataSet, outputType,
				pruningFunction, output.getValue().getName());
		context.setOutputDataSet(output.getValue(), pruningOperator);

	}
}

Example #18

Source File: DataSet.java From flink with Apache License 2.0

3 votes

/**
 * Applies a FlatMap transformation on a {@link DataSet}.
 *
 * <p>The transformation calls a {@link org.apache.flink.api.common.functions.RichFlatMapFunction} for each element of the DataSet.
 * Each FlatMapFunction call can return any number of elements including none.
 *
 * @param flatMapper The FlatMapFunction that is called for each element of the DataSet.
 * @return A FlatMapOperator that represents the transformed DataSet.
 *
 * @see org.apache.flink.api.common.functions.RichFlatMapFunction
 * @see FlatMapOperator
 * @see DataSet
 */
public <R> FlatMapOperator<T, R> flatMap(FlatMapFunction<T, R> flatMapper) {
	if (flatMapper == null) {
		throw new NullPointerException("FlatMap function must not be null.");
	}

	String callLocation = Utils.getCallLocationName();
	TypeInformation<R> resultType = TypeExtractor.getFlatMapReturnTypes(flatMapper, getType(), callLocation, true);
	return new FlatMapOperator<>(this, resultType, clean(flatMapper), callLocation);
}

Example #19

Source File: DataSet.java From Flink-CEPplus with Apache License 2.0

3 votes

/**
 * Applies a FlatMap transformation on a {@link DataSet}.
 *
 * <p>The transformation calls a {@link org.apache.flink.api.common.functions.RichFlatMapFunction} for each element of the DataSet.
 * Each FlatMapFunction call can return any number of elements including none.
 *
 * @param flatMapper The FlatMapFunction that is called for each element of the DataSet.
 * @return A FlatMapOperator that represents the transformed DataSet.
 *
 * @see org.apache.flink.api.common.functions.RichFlatMapFunction
 * @see FlatMapOperator
 * @see DataSet
 */
public <R> FlatMapOperator<T, R> flatMap(FlatMapFunction<T, R> flatMapper) {
	if (flatMapper == null) {
		throw new NullPointerException("FlatMap function must not be null.");
	}

	String callLocation = Utils.getCallLocationName();
	TypeInformation<R> resultType = TypeExtractor.getFlatMapReturnTypes(flatMapper, getType(), callLocation, true);
	return new FlatMapOperator<>(this, resultType, clean(flatMapper), callLocation);
}

Example #20

Source File: DataSet.java From flink with Apache License 2.0

3 votes

/**
 * Applies a FlatMap transformation on a {@link DataSet}.
 *
 * <p>The transformation calls a {@link org.apache.flink.api.common.functions.RichFlatMapFunction} for each element of the DataSet.
 * Each FlatMapFunction call can return any number of elements including none.
 *
 * @param flatMapper The FlatMapFunction that is called for each element of the DataSet.
 * @return A FlatMapOperator that represents the transformed DataSet.
 *
 * @see org.apache.flink.api.common.functions.RichFlatMapFunction
 * @see FlatMapOperator
 * @see DataSet
 */
public <R> FlatMapOperator<T, R> flatMap(FlatMapFunction<T, R> flatMapper) {
	if (flatMapper == null) {
		throw new NullPointerException("FlatMap function must not be null.");
	}

	String callLocation = Utils.getCallLocationName();
	TypeInformation<R> resultType = TypeExtractor.getFlatMapReturnTypes(flatMapper, getType(), callLocation, true);
	return new FlatMapOperator<>(this, resultType, clean(flatMapper), callLocation);
}