Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#getParallelism()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#getParallelism() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RouterTranslator.java    From stateful-functions with Apache License 2.0 6 votes vote down vote up
/**
 * For each input {@linkplain DataStream} (created as a result of {@linkplain IngressSpec}
 * translation) we attach a single FlatMap function that would invoke all the defined routers for
 * that spec. Please note that the FlatMap function must have the same parallelism as the
 * {@linkplain DataStream} it is attached to, so that we keep per key ordering.
 */
@SuppressWarnings("unchecked")
private DataStream<Message> createRoutersForSource(
    IngressIdentifier<?> id, DataStream<?> sourceStream) {
  IngressIdentifier<Object> castedId = (IngressIdentifier<Object>) id;
  DataStream<Object> castedSource = (DataStream<Object>) sourceStream;

  IngressRouterOperator<Object> router = new IngressRouterOperator<>(castedId);

  TypeInformation<Message> typeInfo = universe.types().registerType(Message.class);

  int sourceParallelism = castedSource.getParallelism();

  String operatorName = StatefulFunctionsJobConstants.ROUTER_NAME + " (" + castedId.name() + ")";
  return castedSource
      .transform(operatorName, typeInfo, router)
      .setParallelism(sourceParallelism)
      .returns(typeInfo);
}
 
Example 2
Source File: RouterTranslator.java    From flink-statefun with Apache License 2.0 6 votes vote down vote up
/**
 * For each input {@linkplain DataStream} (created as a result of {@linkplain IngressSpec}
 * translation) we attach a single FlatMap function that would invoke all the defined routers for
 * that spec. Please note that the FlatMap function must have the same parallelism as the
 * {@linkplain DataStream} it is attached to, so that we keep per key ordering.
 */
@SuppressWarnings("unchecked")
private DataStream<Message> createRoutersForSource(
    IngressIdentifier<?> id, DataStream<?> sourceStream) {
  IngressIdentifier<Object> castedId = (IngressIdentifier<Object>) id;
  DataStream<Object> castedSource = (DataStream<Object>) sourceStream;

  IngressRouterOperator<Object> router = new IngressRouterOperator<>(configuration, castedId);

  TypeInformation<Message> typeInfo = universe.types().registerType(Message.class);

  int sourceParallelism = castedSource.getParallelism();

  String operatorName = StatefulFunctionsJobConstants.ROUTER_NAME + " (" + castedId.name() + ")";
  return castedSource
      .transform(operatorName, typeInfo, router)
      .setParallelism(sourceParallelism)
      .returns(typeInfo);
}
 
Example 3
Source File: SummaryTreeReduce.java    From gelly-streaming with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public DataStream<T> run(final DataStream<Edge<K, EV>> edgeStream) {
	TypeInformation<Tuple2<Integer, Edge<K, EV>>> basicTypeInfo = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, edgeStream.getType());

	TupleTypeInfo edgeTypeInfo = (TupleTypeInfo) edgeStream.getType();
	TypeInformation<S> partialAggType = TypeExtractor.createTypeInfo(EdgesFold.class, getUpdateFun().getClass(), 2, edgeTypeInfo.getTypeAt(0), edgeTypeInfo.getTypeAt(2));
	TypeInformation<Tuple2<Integer, S>> partialTypeInfo = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, partialAggType);

	degree = (degree == -1) ? edgeStream.getParallelism() : degree;
	
	DataStream<S> partialAgg = edgeStream
			.map(new PartitionMapper<>()).returns(basicTypeInfo)
			.setParallelism(degree)
			.keyBy(0)
			.timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS))
			.fold(getInitialValue(), new PartialAgg<>(getUpdateFun(), partialAggType)).setParallelism(degree);
	//split here

	DataStream<Tuple2<Integer, S>> treeAgg = enhance(partialAgg.map(new PartitionMapper<>()).setParallelism(degree).returns(partialTypeInfo), partialTypeInfo);

	DataStream<S> resultStream = treeAgg.map(new PartitionStripper<>()).setParallelism(treeAgg.getParallelism())
			.timeWindowAll(Time.of(timeMillis, TimeUnit.MILLISECONDS))
			.reduce(getCombineFun())
			.flatMap(getAggregator(edgeStream)).setParallelism(1);

	return (getTransform() != null) ? resultStream.map(getTransform()) : (DataStream<T>) resultStream;
}
 
Example 4
Source File: SummaryTreeReduce.java    From gelly-streaming with Apache License 2.0 5 votes vote down vote up
private DataStream<Tuple2<Integer, S>> enhance(DataStream<Tuple2<Integer, S>> input, TypeInformation<Tuple2<Integer, S>> aggType) {

		if (input.getParallelism() <= 2) {
			return input;
		}

		int nextParal = input.getParallelism() / 2;
		DataStream<Tuple2<Integer, S>> unpartitionedStream =
				input.keyBy(new KeySelector<Tuple2<Integer, S>, Integer>() {
					//collapse two partitions into one
					@Override
					public Integer getKey(Tuple2<Integer, S> record) throws Exception {
						return record.f0 / 2;
					}
				});

		//repartition stream to p / 2 aggregators
		KeyedStream<Tuple2<Integer, S>, Integer> repartitionedStream =
				unpartitionedStream.map(new PartitionReMapper()).returns(aggType)
						.setParallelism(nextParal)
						.keyBy(0);

		//window again on event time and aggregate
		DataStream<Tuple2<Integer, S>> aggregatedStream =
				repartitionedStream.timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS))
						.reduce(new AggregationWrapper<>(getCombineFun()))       
						.setParallelism(nextParal);
		return enhance(aggregatedStream, aggType);
	}