Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#union()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#union() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Sources.java    From stateful-functions with Apache License 2.0 6 votes vote down vote up
private static <T> DataStream<T> union(Collection<DataStream<T>> sources) {
  if (sources.isEmpty()) {
    throw new IllegalStateException("There are no routers defined.");
  }
  final int sourceCount = sources.size();
  final Iterator<DataStream<T>> iterator = sources.iterator();
  if (sourceCount == 1) {
    return iterator.next();
  }
  DataStream<T> first = iterator.next();
  @SuppressWarnings("unchecked")
  DataStream<T>[] rest = new DataStream[sourceCount - 1];
  for (int i = 0; i < sourceCount - 1; i++) {
    rest[i] = iterator.next();
  }
  return first.union(rest);
}
 
Example 2
Source File: Sources.java    From flink-statefun with Apache License 2.0 6 votes vote down vote up
private static <T> DataStream<T> union(Collection<DataStream<T>> sources) {
  if (sources.isEmpty()) {
    throw new IllegalStateException("There are no routers defined.");
  }
  final int sourceCount = sources.size();
  final Iterator<DataStream<T>> iterator = sources.iterator();
  if (sourceCount == 1) {
    return iterator.next();
  }
  DataStream<T> first = iterator.next();
  @SuppressWarnings("unchecked")
  DataStream<T>[] rest = new DataStream[sourceCount - 1];
  for (int i = 0; i < sourceCount - 1; i++) {
    rest[i] = iterator.next();
  }
  return first.union(rest);
}
 
Example 3
Source File: Union.java    From da-streamingledger with Apache License 2.0 6 votes vote down vote up
/**
 * Union differently typed {@link DataStream}s into single {@code DataStream}.
 *
 * <p>The resulting {@code DataStream} is of type {@link TaggedElement} where
 * {@link TaggedElement#getDataStreamTag()} corresponds to the list position of the source {@code DataStream} in
 * {@code inputs} that produced that element, and {@link TaggedElement#getElement()} is the element produced.
 *
 * @param inputs the input data streams to union.
 * @return a {@code DataStream} that corresponds to the union of all the input {@link DataStream}s
 */
public static DataStream<TaggedElement> apply(List<DataStream<?>> inputs) {
    checkArgument(!inputs.isEmpty(), "union requires at least one input data stream.");

    List<DataStream<TaggedElement>> taggedInputs = tagInputStreams(inputs);
    if (taggedInputs.size() == 1) {
        return taggedInputs.get(0);
    }
    DataStream<TaggedElement> first = taggedInputs.get(0);
    List<DataStream<TaggedElement>> restList = taggedInputs.subList(1, taggedInputs.size());

    @SuppressWarnings({"unchecked", "raw"})
    DataStream<TaggedElement>[] restArray = (DataStream<TaggedElement>[]) new DataStream[restList.size()];
    DataStream<TaggedElement>[] rest = restList.toArray(restArray);
    return first.union(rest);
}
 
Example 4
Source File: FromCollection.java    From blog_demos with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //并行度为1
    env.setParallelism(1);

    //创建一个List,里面有两个Tuple2元素
    List<Tuple2<String, Integer>> list = new ArrayList<>();
    list.add(new Tuple2("aaa", 1));
    list.add(new Tuple2("bbb", 1));

    //通过List创建DataStream
    DataStream<Tuple2<String, Integer>> fromCollectionDataStream = env.fromCollection(list);

    //通过多个Tuple2元素创建DataStream
    DataStream<Tuple2<String, Integer>> fromElementDataStream = env.fromElements(
            new Tuple2("ccc", 1),
            new Tuple2("ddd", 1),
            new Tuple2("aaa", 1)
    );

    //通过union将两个DataStream合成一个
    DataStream<Tuple2<String, Integer>> unionDataStream = fromCollectionDataStream.union(fromElementDataStream);

    //统计每个单词的数量
    unionDataStream
            .keyBy(0)
            .sum(1)
            .print();

    env.execute("API DataSource demo : collection");
}
 
Example 5
Source File: SiddhiStream.java    From flink-siddhi with Apache License 2.0 5 votes vote down vote up
@Override
protected DataStream<Tuple2<StreamRoute, Object>> toDataStream() {
    final String localFirstStreamId = firstStreamId;
    final List<String> localUnionStreamIds = this.unionStreamIds;
    DataStream<Tuple2<StreamRoute, Object>> dataStream = convertDataStream(getCepEnvironment().<T>getDataStream(localFirstStreamId), this.firstStreamId);
    for (String unionStreamId : localUnionStreamIds) {
        dataStream = dataStream.union(convertDataStream(getCepEnvironment().<T>getDataStream(unionStreamId), unionStreamId));
    }
    return dataStream;
}
 
Example 6
Source File: SiddhiStream.java    From bahir-flink with Apache License 2.0 5 votes vote down vote up
@Override
protected DataStream<Tuple2<String, Object>> toDataStream() {
    final String localFirstStreamId = firstStreamId;
    final List<String> localUnionStreamIds = this.unionStreamIds;
    DataStream<Tuple2<String, Object>> dataStream = convertDataStream(getCepEnvironment().<T>getDataStream(localFirstStreamId), this.firstStreamId);
    for (String unionStreamId : localUnionStreamIds) {
        dataStream = dataStream.union(convertDataStream(getCepEnvironment().<T>getDataStream(unionStreamId), unionStreamId));
    }
    return dataStream;
}
 
Example 7
Source File: BaseEvalClassStreamOp.java    From Alink with Apache License 2.0 4 votes vote down vote up
@Override
public T linkFrom(StreamOperator<?>... inputs) {
    StreamOperator<?> in = checkAndGetFirst(inputs);
    String labelColName = this.get(MultiEvaluationStreamParams.LABEL_COL);
    String positiveValue = this.get(BinaryEvaluationStreamParams.POS_LABEL_VAL_STR);
    Integer timeInterval = this.get(MultiEvaluationStreamParams.TIME_INTERVAL);

    ClassificationEvaluationUtil.Type type = ClassificationEvaluationUtil.judgeEvaluationType(this.getParams());

    DataStream<BaseMetricsSummary> statistics;

    switch (type) {
        case PRED_RESULT: {
            String predResultColName = this.get(MultiEvaluationStreamParams.PREDICTION_COL);
            TableUtil.assertSelectedColExist(in.getColNames(), labelColName, predResultColName);

            LabelPredictionWindow predMultiWindowFunction = new LabelPredictionWindow(binary, positiveValue);
            statistics = in.select(new String[] {labelColName, predResultColName})
                .getDataStream()
                .timeWindowAll(Time.of(timeInterval, TimeUnit.SECONDS))
                .apply(predMultiWindowFunction);
            break;
        }
        case PRED_DETAIL: {
            String predDetailColName = this.get(MultiEvaluationStreamParams.PREDICTION_DETAIL_COL);
            TableUtil.assertSelectedColExist(in.getColNames(), labelColName, predDetailColName);

            PredDetailLabel eval = new PredDetailLabel(positiveValue, binary);

            statistics = in.select(new String[] {labelColName, predDetailColName})
                .getDataStream()
                .timeWindowAll(Time.of(timeInterval, TimeUnit.SECONDS))
                .apply(eval);
            break;
        }
        default: {
            throw new RuntimeException("Error Input");
        }
    }
    DataStream<BaseMetricsSummary> totalStatistics = statistics
        .map(new EvaluationUtil.AllDataMerge())
        .setParallelism(1);

    DataStream<Row> windowOutput = statistics.map(
        new EvaluationUtil.SaveDataStream(ClassificationEvaluationUtil.WINDOW.f0));
    DataStream<Row> allOutput = totalStatistics.map(
        new EvaluationUtil.SaveDataStream(ClassificationEvaluationUtil.ALL.f0));

    DataStream<Row> union = windowOutput.union(allOutput);

    this.setOutput(union,
        new String[] {ClassificationEvaluationUtil.STATISTICS_OUTPUT, DATA_OUTPUT},
        new TypeInformation[] {Types.STRING, Types.STRING});

    return (T)this;
}
 
Example 8
Source File: WordCount.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final MultipleParameterTool params = MultipleParameterTool.fromArgs(args);

		// set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataStream<String> text = null;
		if (params.has("input")) {
			// union all the inputs from text files
			for (String input : params.getMultiParameterRequired("input")) {
				if (text == null) {
					text = env.readTextFile(input);
				} else {
					text = text.union(env.readTextFile(input));
				}
			}
			Preconditions.checkNotNull(text, "Input DataStream should not be null.");
		} else {
			System.out.println("Executing WordCount example with default input data set.");
			System.out.println("Use --input to specify file input.");
			// get default test text data
			text = env.fromElements(WordCountData.WORDS);
		}

		DataStream<Tuple2<String, Integer>> counts =
			// split up the lines in pairs (2-tuples) containing: (word,1)
			text.flatMap(new Tokenizer())
			// group by the tuple field "0" and sum up tuple field "1"
			.keyBy(0).sum(1);

		// emit result
		if (params.has("output")) {
			counts.writeAsText(params.get("output"));
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			counts.print();
		}
		// execute program
		env.execute("Streaming WordCount");
	}