org.apache.flink.streaming.api.functions.source.ParallelSourceFunction Java Examples

The following examples show how to use org.apache.flink.streaming.api.functions.source.ParallelSourceFunction. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StreamExecutionEnvironment.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Ads a data source with a custom type information thus opening a
 * {@link DataStream}. Only in very special cases does the user need to
 * support type information. Otherwise use
 * {@link #addSource(org.apache.flink.streaming.api.functions.source.SourceFunction)}
 *
 * @param function
 * 		the user defined function
 * @param sourceName
 * 		Name of the data source
 * @param <OUT>
 * 		type of the returned stream
 * @param typeInfo
 * 		the user defined type information for the stream
 * @return the data stream constructed
 */
@SuppressWarnings("unchecked")
public <OUT> DataStreamSource<OUT> addSource(SourceFunction<OUT> function, String sourceName, TypeInformation<OUT> typeInfo) {

	if (function instanceof ResultTypeQueryable) {
		typeInfo = ((ResultTypeQueryable<OUT>) function).getProducedType();
	}
	if (typeInfo == null) {
		try {
			typeInfo = TypeExtractor.createTypeInfo(
					SourceFunction.class,
					function.getClass(), 0, null, null);
		} catch (final InvalidTypesException e) {
			typeInfo = (TypeInformation<OUT>) new MissingTypeInfo(sourceName, e);
		}
	}

	boolean isParallel = function instanceof ParallelSourceFunction;

	clean(function);

	final StreamSource<OUT, ?> sourceOperator = new StreamSource<>(function);
	return new DataStreamSource<>(this, typeInfo, sourceOperator, isParallel, sourceName);
}
 
Example #2
Source File: StreamExecutionEnvironment.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Ads a data source with a custom type information thus opening a
 * {@link DataStream}. Only in very special cases does the user need to
 * support type information. Otherwise use
 * {@link #addSource(org.apache.flink.streaming.api.functions.source.SourceFunction)}
 *
 * @param function
 * 		the user defined function
 * @param sourceName
 * 		Name of the data source
 * @param <OUT>
 * 		type of the returned stream
 * @param typeInfo
 * 		the user defined type information for the stream
 * @return the data stream constructed
 */
@SuppressWarnings("unchecked")
public <OUT> DataStreamSource<OUT> addSource(SourceFunction<OUT> function, String sourceName, TypeInformation<OUT> typeInfo) {

	if (typeInfo == null) {
		if (function instanceof ResultTypeQueryable) {
			typeInfo = ((ResultTypeQueryable<OUT>) function).getProducedType();
		} else {
			try {
				typeInfo = TypeExtractor.createTypeInfo(
						SourceFunction.class,
						function.getClass(), 0, null, null);
			} catch (final InvalidTypesException e) {
				typeInfo = (TypeInformation<OUT>) new MissingTypeInfo(sourceName, e);
			}
		}
	}

	boolean isParallel = function instanceof ParallelSourceFunction;

	clean(function);
	StreamSource<OUT, ?> sourceOperator;
	if (function instanceof StoppableFunction) {
		sourceOperator = new StoppableStreamSource<>(cast2StoppableSourceFunction(function));
	} else {
		sourceOperator = new StreamSource<>(function);
	}

	return new DataStreamSource<>(this, typeInfo, sourceOperator, isParallel, sourceName);
}
 
Example #3
Source File: ParrelSourceFunctionDemo.java    From blog_demos with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //并行度为2
    env.setParallelism(2);

    DataStream<Tuple2<Integer,Integer>> dataStream = env.addSource(new ParallelSourceFunction<Tuple2<Integer, Integer>>() {

        private volatile boolean isRunning = true;

        @Override
        public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
            int i = 0;
            while (isRunning) {
                ctx.collect(new Tuple2<>(i++ % 5, 1));
                Thread.sleep(1000);
                if(i>9){
                    break;
                }
            }
        }

        @Override
        public void cancel() {
            isRunning = false;
        }
    });

    dataStream
            .keyBy(0)
            .timeWindow(Time.seconds(2))
            .sum(1)
            .print();

    env.execute("Customize DataSource demo : ParallelSourceFunction");
}
 
Example #4
Source File: TestableStreamingJobTest.java    From flink-training-exercises with Apache License 2.0 5 votes vote down vote up
@Test
public void testCompletePipeline() throws Exception {

	// Arrange
	ParallelSourceFunction<Long> source =
			new ParallelCollectionSource(Arrays.asList(1L, 10L, -10L));
	SinkCollectingLongs sink = new SinkCollectingLongs();
	TestableStreamingJob job = new TestableStreamingJob(source, sink);

	// Act
	job.execute();

	// Assert
	assertThat(sink.result).containsExactlyInAnyOrder(2L, 11L, -9L);
}
 
Example #5
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraphForManagedMemoryFractionTest(
	final List<ResourceSpec> resourceSpecs,
	@Nullable final List<Integer> managedMemoryWeights) throws Exception {

	final Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) {
		}

		@Override
		public void cancel() {
		}
	});
	opMethod.invoke(source, resourceSpecs.get(0));

	// CHAIN(source -> map1) in default slot sharing group
	final DataStream<Integer> map1 = source.map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map1, resourceSpecs.get(1));

	// CHAIN(map2) in default slot sharing group
	final DataStream<Integer> map2 = map1.rebalance().map((MapFunction<Integer, Integer>) value -> value);
	opMethod.invoke(map2, resourceSpecs.get(2));

	// CHAIN(map3) in test slot sharing group
	final DataStream<Integer> map3 = map2.rebalance().map(value -> value).slotSharingGroup("test");
	opMethod.invoke(map3, resourceSpecs.get(3));

	if (managedMemoryWeights != null) {
		source.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(0));
		map1.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(1));
		map2.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(2));
		map3.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(3));
	}

	return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
}
 
Example #6
Source File: Sink2ES7Main.java    From flink-learning with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);

    List<HttpHost> esAddresses = ESSinkUtil.getEsAddresses(parameterTool.get(ELASTICSEARCH_HOSTS));
    int bulkSize = parameterTool.getInt(ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS, 40);
    int sinkParallelism = parameterTool.getInt(STREAM_SINK_PARALLELISM, 1);

    log.info("-----esAddresses = {}, parameterTool = {}, ", esAddresses, parameterTool);


    DataStreamSource<MetricEvent> data = env.addSource(new ParallelSourceFunction<MetricEvent>() {
        @Override
        public void run(SourceContext<MetricEvent> context) throws Exception {
            while (true) {
                //just for test

                Map<String, Object> fields = new HashMap<>();
                fields.put("system", 10);
                fields.put("user", 20);
                fields.put("idle", 70);

                Map<String, String> tags = new HashMap<>();
                tags.put("cluster_name", "zhisheng");
                tags.put("host_ip", "11.0.11.0");

                MetricEvent metricEvent = MetricEvent.builder()
                        .name("cpu")
                        .timestamp(System.currentTimeMillis())
                        .fields(fields)
                        .tags(tags)
                        .build();

                context.collect(metricEvent);
                Thread.sleep(200);
            }
        }

        @Override
        public void cancel() {
        }
    });

    ESSinkUtil.addSink(esAddresses, bulkSize, sinkParallelism, data,
            (MetricEvent metric, RuntimeContext runtimeContext, RequestIndexer requestIndexer) -> {
                requestIndexer.add(Requests.indexRequest()
                        .index(ZHISHENG + "_" + metric.getName())
                        .source(GsonUtil.toJSONBytes(metric), XContentType.JSON));
            },
            parameterTool);

    env.execute("flink learning connectors es7");
}
 
Example #7
Source File: Sink2ES7Main.java    From flink-learning with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);

    List<HttpHost> esAddresses = ESSinkUtil.getEsAddresses(parameterTool.get(ELASTICSEARCH_HOSTS));
    int bulkSize = parameterTool.getInt(ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS, 40);
    int sinkParallelism = parameterTool.getInt(STREAM_SINK_PARALLELISM, 1);

    log.info("-----esAddresses = {}, parameterTool = {}, ", esAddresses, parameterTool);


    DataStreamSource<MetricEvent> data = env.addSource(new ParallelSourceFunction<MetricEvent>() {
        @Override
        public void run(SourceContext<MetricEvent> context) throws Exception {
            while (true) {
                //just for test

                Map<String, Object> fields = new HashMap<>();
                fields.put("system", 10);
                fields.put("user", 20);
                fields.put("idle", 70);

                Map<String, String> tags = new HashMap<>();
                tags.put("cluster_name", "zhisheng");
                tags.put("host_ip", "11.0.11.0");

                MetricEvent metricEvent = MetricEvent.builder()
                        .name("cpu")
                        .timestamp(System.currentTimeMillis())
                        .fields(fields)
                        .tags(tags)
                        .build();

                context.collect(metricEvent);
                Thread.sleep(200);
            }
        }

        @Override
        public void cancel() {
        }
    });

    ESSinkUtil.addSink(esAddresses, bulkSize, sinkParallelism, data,
            (MetricEvent metric, RuntimeContext runtimeContext, RequestIndexer requestIndexer) -> {
                requestIndexer.add(Requests.indexRequest()
                        .index(ZHISHENG + "_" + metric.getName())
                        .source(GsonUtil.toJSONBytes(metric), XContentType.JSON));
            },
            parameterTool);

    env.execute("flink learning connectors es7");
}
 
Example #8
Source File: StreamExecutionEnvironment.java    From flink with Apache License 2.0 1 votes vote down vote up
/**
 * Ads a data source with a custom type information thus opening a
 * {@link DataStream}. Only in very special cases does the user need to
 * support type information. Otherwise use
 * {@link #addSource(org.apache.flink.streaming.api.functions.source.SourceFunction)}
 *
 * @param function
 * 		the user defined function
 * @param sourceName
 * 		Name of the data source
 * @param <OUT>
 * 		type of the returned stream
 * @param typeInfo
 * 		the user defined type information for the stream
 * @return the data stream constructed
 */
public <OUT> DataStreamSource<OUT> addSource(SourceFunction<OUT> function, String sourceName, TypeInformation<OUT> typeInfo) {

	TypeInformation<OUT> resolvedTypeInfo = getTypeInfo(function, sourceName, SourceFunction.class, typeInfo);

	boolean isParallel = function instanceof ParallelSourceFunction;

	clean(function);

	final StreamSource<OUT, ?> sourceOperator = new StreamSource<>(function);
	return new DataStreamSource<>(this, resolvedTypeInfo, sourceOperator, isParallel, sourceName);
}