org.apache.flink.streaming.api.datastream.DataStreamSource Java Examples

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStreamSource. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StreamExecutionEnvironment.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a data stream from the given non-empty collection. The type of the data stream is that of the
 * elements in the collection.
 *
 * <p>The framework will try and determine the exact type from the collection elements. In case of generic
 * elements, it may be necessary to manually supply the type information via
 * {@link #fromCollection(java.util.Collection, org.apache.flink.api.common.typeinfo.TypeInformation)}.
 *
 * <p>Note that this operation will result in a non-parallel data stream source, i.e. a data stream source with
 * parallelism one.
 *
 * @param data
 * 		The collection of elements to create the data stream from.
 * @param <OUT>
 *     The generic type of the returned data stream.
 * @return
 *     The data stream representing the given collection
 */
public <OUT> DataStreamSource<OUT> fromCollection(Collection<OUT> data) {
	Preconditions.checkNotNull(data, "Collection must not be null");
	if (data.isEmpty()) {
		throw new IllegalArgumentException("Collection must not be empty");
	}

	OUT first = data.iterator().next();
	if (first == null) {
		throw new IllegalArgumentException("Collection must not contain null elements");
	}

	TypeInformation<OUT> typeInfo;
	try {
		typeInfo = TypeExtractor.getForObject(first);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + first.getClass()
				+ "; please specify the TypeInformation manually via "
				+ "StreamExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}
	return fromCollection(data, typeInfo);
}
 
Example #2
Source File: StreamTableEnvironmentImplTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private StreamTableEnvironmentImpl getStreamTableEnvironment(
		StreamExecutionEnvironment env,
		DataStreamSource<Integer> elements) {
	TableConfig config = new TableConfig();
	CatalogManager catalogManager = CatalogManagerMocks.createEmptyCatalogManager();
	ModuleManager moduleManager = new ModuleManager();
	return new StreamTableEnvironmentImpl(
		catalogManager,
		moduleManager,
		new FunctionCatalog(config, catalogManager, moduleManager),
		config,
		env,
		new TestPlanner(elements.getTransformation()),
		new ExecutorMock(),
		true
	);
}
 
Example #3
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;

        //下面这些写死的参数可以放在配置文件中,然后通过 parameterTool 获取
        final RMQConnectionConfig connectionConfig = new RMQConnectionConfig
                .Builder().setHost("localhost").setVirtualHost("/")
                .setPort(5672).setUserName("admin").setPassword("admin")
                .build();

        DataStreamSource<String> zhisheng = env.addSource(new RMQSource<>(connectionConfig,
                "zhisheng",
                true,
                new SimpleStringSchema()))
                .setParallelism(1);
        zhisheng.print();

        //如果想保证 exactly-once 或 at-least-once 需要把 checkpoint 开启
//        env.enableCheckpointing(10000);
        env.execute("flink learning connectors rabbitmq");
    }
 
Example #4
Source File: StreamExecutionEnvironment.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data stream that contains the given elements. The elements must all be of the
 * same type, for example, all of the {@link String} or {@link Integer}.
 *
 * <p>The framework will try and determine the exact type from the elements. In case of generic
 * elements, it may be necessary to manually supply the type information via
 * {@link #fromCollection(java.util.Collection, org.apache.flink.api.common.typeinfo.TypeInformation)}.
 *
 * <p>Note that this operation will result in a non-parallel data stream source, i.e. a data
 * stream source with a degree of parallelism one.
 *
 * @param data
 * 		The array of elements to create the data stream from.
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream representing the given array of elements
 */
@SafeVarargs
public final <OUT> DataStreamSource<OUT> fromElements(OUT... data) {
	if (data.length == 0) {
		throw new IllegalArgumentException("fromElements needs at least one element as argument");
	}

	TypeInformation<OUT> typeInfo;
	try {
		typeInfo = TypeExtractor.getForObject(data[0]);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + data[0].getClass().getName()
				+ "; please specify the TypeInformation manually via "
				+ "StreamExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}
	return fromCollection(Arrays.asList(data), typeInfo);
}
 
Example #5
Source File: ElasticsearchSinkTestBase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests whether the Elasticsearch sink fails when there is no cluster to connect to.
 */
public void runInvalidElasticsearchClusterTest() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());

	Map<String, String> userConfig = new HashMap<>();
	userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
	userConfig.put("cluster.name", "invalid-cluster-name");

	source.addSink(createElasticsearchSinkForNode(
			1,
			"invalid-cluster-name",
			new SourceSinkDataTestKit.TestElasticsearchSinkFunction("test"),
			"123.123.123.123")); // incorrect ip address

	try {
		env.execute("Elasticsearch Sink Test");
	} catch (JobExecutionException expectedException) {
		// test passes
		return;
	}

	fail();
}
 
Example #6
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception{
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Properties props = new Properties();
    props.put("bootstrap.servers", "localhost:9092");
    props.put("zookeeper.connect", "localhost:2181");
    props.put("group.id", "metric-group");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");  //key 反序列化
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "latest"); //value 反序列化

    DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>(
            "metric",  //kafka topic
            new SimpleStringSchema(),  // String 序列化
            props)).setParallelism(1);

    dataStreamSource.print(); //把从 kafka 读取到的数据打印在控制台

    env.execute("Flink add data source");
}
 
Example #7
Source File: CassandraTupleSinkExample.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Tuple2<String, Integer>> source = env.fromCollection(collection);

    CassandraSink.addSink(source)
            .setQuery(INSERT)
            .setClusterBuilder(new ClusterBuilder() {
                @Override
                protected Cluster buildCluster(Cluster.Builder builder) {
                    return builder.addContactPoint("127.0.0.1").build();
                }
            })
            .build();

    env.execute("WriteTupleIntoCassandra");
}
 
Example #8
Source File: AsyncWaitOperatorBenchmark.java    From flink-benchmarks with Apache License 2.0 6 votes vote down vote up
private DataStream<Long> createAsyncOperator(DataStreamSource<Long> source) {
	switch (outputMode) {
		case ORDERED:
			return AsyncDataStream.orderedWait(
					source,
					new BenchmarkAsyncFunctionExecutor(),
					0,
					TimeUnit.MILLISECONDS);
		case UNORDERED:
			return AsyncDataStream.unorderedWait(
					source,
					new BenchmarkAsyncFunctionExecutor(),
					0,
					TimeUnit.MILLISECONDS);
		default:
			throw new UnsupportedOperationException("Unknown mode");
	}
}
 
Example #9
Source File: StreamExecutionEnvironment.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new data set that contains the given elements. The framework will determine the type according to the
 * based type user supplied. The elements should be the same or be the subclass to the based type.
 * The sequence of elements must not be empty.
 * Note that this operation will result in a non-parallel data stream source, i.e. a data stream source with a
 * degree of parallelism one.
 *
 * @param type
 * 		The based class type in the collection.
 * @param data
 * 		The array of elements to create the data stream from.
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream representing the given array of elements
 */
@SafeVarargs
public final <OUT> DataStreamSource<OUT> fromElements(Class<OUT> type, OUT... data) {
	if (data.length == 0) {
		throw new IllegalArgumentException("fromElements needs at least one element as argument");
	}

	TypeInformation<OUT> typeInfo;
	try {
		typeInfo = TypeExtractor.getForClass(type);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + type.getName()
				+ "; please specify the TypeInformation manually via "
				+ "StreamExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}
	return fromCollection(Arrays.asList(data), typeInfo);
}
 
Example #10
Source File: StreamExecutionEnvironment.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a data stream from the given non-empty collection.
 *
 * <p>Note that this operation will result in a non-parallel data stream source,
 * i.e., a data stream source with parallelism one.
 *
 * @param data
 * 		The collection of elements to create the data stream from
 * @param typeInfo
 * 		The TypeInformation for the produced data stream
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream representing the given collection
 */
public <OUT> DataStreamSource<OUT> fromCollection(Collection<OUT> data, TypeInformation<OUT> typeInfo) {
	Preconditions.checkNotNull(data, "Collection must not be null");

	// must not have null elements and mixed elements
	FromElementsFunction.checkCollection(data, typeInfo.getTypeClass());

	SourceFunction<OUT> function;
	try {
		function = new FromElementsFunction<>(typeInfo.createSerializer(getConfig()), data);
	}
	catch (IOException e) {
		throw new RuntimeException(e.getMessage(), e);
	}
	return addSource(function, "Collection Source", typeInfo).setParallelism(1);
}
 
Example #11
Source File: InputBenchmark.java    From flink-benchmarks with Apache License 2.0 6 votes vote down vote up
@Benchmark
public void mapRebalanceMapSink(FlinkEnvironmentContext context) throws Exception {

	StreamExecutionEnvironment env = context.env;
	env.enableCheckpointing(CHECKPOINT_INTERVAL_MS);
	env.setParallelism(1);

	DataStreamSource<Long> source = env.addSource(new LongSource(RECORDS_PER_INVOCATION));
	source
		.map(new MultiplyByTwo())
		.rebalance()
		.map((Long in) -> in)
		.addSink(new DiscardingSink<>());

	env.execute();
}
 
Example #12
Source File: StreamTableEnvironmentImplTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testRetractStreamDoesNotOverwriteTableConfig() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStreamSource<Integer> elements = env.fromElements(1, 2, 3);

	StreamTableEnvironmentImpl tEnv = getStreamTableEnvironment(env, elements);

	Time minRetention = Time.minutes(1);
	Time maxRetention = Time.minutes(10);
	tEnv.getConfig().setIdleStateRetentionTime(minRetention, maxRetention);
	Table table = tEnv.fromDataStream(elements);
	tEnv.toRetractStream(table, Row.class);

	assertThat(
		tEnv.getConfig().getMinIdleStateRetentionTime(),
		equalTo(minRetention.toMilliseconds()));
	assertThat(
		tEnv.getConfig().getMaxIdleStateRetentionTime(),
		equalTo(maxRetention.toMilliseconds()));
}
 
Example #13
Source File: ActiveMQConnectorITCase.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
private void createProducerTopology(StreamExecutionEnvironment env, AMQSinkConfig<String> config) {
    DataStreamSource<String> stream = env.addSource(new SourceFunction<String>() {
        @Override
        public void run(SourceContext<String> ctx) throws Exception {
            for (int i = 0; i < MESSAGES_NUM; i++) {
                ctx.collect("amq-" + i);
            }
        }

        @Override
        public void cancel() {}
    });


    AMQSink<String> sink = new AMQSink<>(config);
    stream.addSink(sink);
}
 
Example #14
Source File: DataStreamTestEnvironment.java    From flink-spector with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a data stream from the given non-empty collection. The type of the data stream is that of the elements in the collection.
 * <p>
 * <p>The framework will try and determine the exact type from the collection elements. In case of generic
 * elements, it may be necessary to manually supply the type information via {@link #fromCollection(java.util.Collection,
 * org.apache.flink.api.common.typeinfo.TypeInformation)}.</p>
 * <p>
 * <p>Note that this operation will result in a non-parallel data stream source, i.e. a data stream source with a
 * parallelism one.</p>
 *
 * @param <OUT>        The generic type of the returned data stream.
 * @param data         The collection of elements to startWith the data stream from.
 * @param flushWindows Specifies whether open windows should be flushed on termination.
 * @return The data stream representing the given collection
 */
public <OUT> DataStreamSource<OUT> fromCollectionWithTimestamp(Collection<StreamRecord<OUT>> data, Boolean flushWindows) {
	Preconditions.checkNotNull(data, "Collection must not be null");
	if(data.isEmpty()) {
		throw new IllegalArgumentException("Collection must not be empty");
	}

	StreamRecord<OUT> first = data.iterator().next();
	if(first == null) {
		throw new IllegalArgumentException("Collection must not contain null elements");
	}

	TypeInformation<OUT> typeInfo;
	try {
		typeInfo = TypeExtractor.getForObject(first.getValue());
	}
	catch(Exception e) {
		throw new RuntimeException("Could not startWith TypeInformation for type " + first.getClass()
				+ "; please specify the TypeInformation manually via "
				+ "StreamExecutionEnvironment#fromElements(Collection, TypeInformation)");
	}
	return fromCollectionWithTimestamp(data, typeInfo, flushWindows);
}
 
Example #15
Source File: Sink2ES6Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    List<HttpHost> esAddresses = ESSinkUtil.getEsAddresses(parameterTool.get(ELASTICSEARCH_HOSTS));
    int bulkSize = parameterTool.getInt(ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS, 40);
    int sinkParallelism = parameterTool.getInt(STREAM_SINK_PARALLELISM, 5);

    log.info("-----esAddresses = {}, parameterTool = {}, ", esAddresses, parameterTool);

    ESSinkUtil.addSink(esAddresses, bulkSize, sinkParallelism, data,
            (MetricEvent metric, RuntimeContext runtimeContext, RequestIndexer requestIndexer) -> {
                requestIndexer.add(Requests.indexRequest()
                        .index(ZHISHENG + "_" + metric.getName())
                        .type(ZHISHENG)
                        .source(GsonUtil.toJSONBytes(metric), XContentType.JSON));
            },
            parameterTool);
    env.execute("flink learning connectors es6");
}
 
Example #16
Source File: KuduSinkTest.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    List<KuduColumnInfo> kuduColumnInfos = new ArrayList<>();
    KuduColumnInfo columnInfo1 = KuduColumnInfo.Builder.createLong("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo2 = KuduColumnInfo.Builder.createFloat("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo3 = KuduColumnInfo.Builder.createString("zhisheng").rangeKey(true).build();
    kuduColumnInfos.add(columnInfo1);
    kuduColumnInfos.add(columnInfo2);
    kuduColumnInfos.add(columnInfo3);

    KuduTableInfo zhisheng = new KuduTableInfo.Builder("zhisheng")
            .replicas(1)
            .createIfNotExist(true)
            .columns(kuduColumnInfos)
            .build();

    data.addSink(new KuduSink<>("127.0.0.1", zhisheng, new PojoSerDe<>(MetricEvent.class)).withInsertWriteMode());
}
 
Example #17
Source File: CassandraTupleSinkExample.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Tuple2<String, Integer>> source = env.fromCollection(collection);

    CassandraSink.addSink(source)
            .setQuery(INSERT)
            .setClusterBuilder(new ClusterBuilder() {
                @Override
                protected Cluster buildCluster(Cluster.Builder builder) {
                    return builder.addContactPoint("127.0.0.1").build();
                }
            })
            .build();

    env.execute("WriteTupleIntoCassandra");
}
 
Example #18
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props));

    data.map(new MapFunction<String, Object>() {
        @Override
        public Object map(String string) throws Exception {
            writeEventToHbase(string, parameterTool);
            return string;
        }
    }).print();

    env.execute("flink learning connectors hbase");
}
 
Example #19
Source File: CassandraPojoSinkExample.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    DataStreamSource<Message> source = env.fromCollection(messages);

    CassandraSink.addSink(source)
            .setClusterBuilder(new ClusterBuilder() {
                @Override
                protected Cluster buildCluster(Cluster.Builder builder) {
                    return builder.addContactPoint("127.0.0.1").build();
                }
            })
            .setMapperOptions(() -> new Mapper.Option[]{Mapper.Option.saveNullFields(true)})
            .build();

    env.execute("Cassandra Sink example");
}
 
Example #20
Source File: SlidingWindow.java    From flink-simple-tutorial with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 构建输入数据
        List<Tuple2<String, Long>> data = new ArrayList<>();
        Tuple2<String, Long> a = new Tuple2<>("first event", 1L);
        Tuple2<String, Long> b = new Tuple2<>("second event", 2L);
        data.add(a);
        data.add(b);
        DataStreamSource<Tuple2<String, Long>> input = env.fromCollection(data);

        // 使用 ProcessTime 滑动窗口, 10s 为一个窗口长度, 每 1s 滑动一次
        input.keyBy(x -> x.f1)
                .timeWindow(Time.seconds(10), Time.seconds(1))
                .reduce(new MyWindowFunction());

        env.execute();
    }
 
Example #21
Source File: Sink2ES6Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    List<HttpHost> esAddresses = ESSinkUtil.getEsAddresses(parameterTool.get(ELASTICSEARCH_HOSTS));
    int bulkSize = parameterTool.getInt(ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS, 40);
    int sinkParallelism = parameterTool.getInt(STREAM_SINK_PARALLELISM, 5);

    log.info("-----esAddresses = {}, parameterTool = {}, ", esAddresses, parameterTool);

    ESSinkUtil.addSink(esAddresses, bulkSize, sinkParallelism, data,
            (MetricEvent metric, RuntimeContext runtimeContext, RequestIndexer requestIndexer) -> {
                requestIndexer.add(Requests.indexRequest()
                        .index(ZHISHENG + "_" + metric.getName())
                        .type(ZHISHENG)
                        .source(GsonUtil.toJSONBytes(metric), XContentType.JSON));
            },
            parameterTool);
    env.execute("flink learning connectors es6");
}
 
Example #22
Source File: HiveTableSourceITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public DataStream<RowData> getDataStream(StreamExecutionEnvironment execEnv) {
	DataStreamSource<RowData> dataStream = (DataStreamSource<RowData>) super.getDataStream(execEnv);
	int parallelism = dataStream.getTransformation().getParallelism();
	assertEquals(inferParallelism ? 1 : 2, parallelism);
	return dataStream;
}
 
Example #23
Source File: CombinedWindowFunctionDemo.java    From flink-simple-tutorial with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 构建输入数据
        List<Tuple2<Long, Long>> data = new ArrayList<>();
        Tuple2<Long, Long> a = new Tuple2<>(1L, 1L);
        Tuple2<Long, Long> b = new Tuple2<>(3L, 1L);
        data.add(a);
        data.add(b);
        DataStreamSource<Tuple2<Long, Long>> input = env.fromCollection(data);


        input.keyBy(x -> x.f1)
                .timeWindow(Time.seconds(10), Time.seconds(1))
                // 第一个Function为 ReduceFunction, 取窗口的最小值
                .reduce((r1, r2) -> {
                    return r1.f0 < r2.f0 ? r1 : r2;
                    // 第二个Function为 ProcessWindowFunction, 获取窗口的时间信息
                }, new ProcessWindowFunction<Tuple2<Long, Long>, String, Long, TimeWindow>() {
                    @Override
                    public void process(Long aLong, Context context, Iterable<Tuple2<Long, Long>> elements, Collector<String> out) throws Exception {
                        out.collect("window: " + context.window());
                    }
                }).print();

        env.execute();
    }
 
Example #24
Source File: FilterEvent.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);

    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);  //从 Kafka 获取到所有的数据流
    SingleOutputStreamOperator<MetricEvent> machineData = data.filter(m -> "machine".equals(m.getTags().get("type")));  //过滤出机器的数据
    SingleOutputStreamOperator<MetricEvent> dockerData = data.filter(m -> "docker".equals(m.getTags().get("type")));    //过滤出容器的数据
    SingleOutputStreamOperator<MetricEvent> applicationData = data.filter(m -> "application".equals(m.getTags().get("type")));  //过滤出应用的数据
    SingleOutputStreamOperator<MetricEvent> middlewareData = data.filter(m -> "middleware".equals(m.getTags().get("type")));    //过滤出中间件的数据

}
 
Example #25
Source File: StreamingWCJava02App.java    From 163-bigdate-note with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    //获取参数
    int port = 0;
    try {
        ParameterTool tool = ParameterTool.fromArgs(args);
        port = tool.getInt("port");
    } catch (Exception e) {
        System.err.println("Port undefined, use 9999.");
        port = 9999;
    }
    
    //获取执行环境
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //读取数据
    DataStreamSource<String> dataStreamSource = env.socketTextStream("localhost", 9999);

    //执行转换操作
    dataStreamSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
        @Override
        public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) throws Exception {
            String[] tokens = value.toLowerCase().split(",");
            for (String token : tokens) {
                if (token.length() > 0) {
                    collector.collect(new Tuple2<String, Integer>(token, 1));
                }
            }
        }
    }).keyBy(0)
    .timeWindow(Time.seconds(5))
    .sum(1)
    .print()
    .setParallelism(1);

    env.execute("StreamingWCJavaApp");
}
 
Example #26
Source File: RedisSinkITCase.java    From bahir-flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRedisSetDataType() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunction());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
        new RedisCommandMapper(RedisCommand.SADD));

    source.addSink(redisSink);
    env.execute("Test Redis Set Data Type");

    assertEquals(NUM_ELEMENTS, jedis.scard(REDIS_KEY));

    jedis.del(REDIS_KEY);
}
 
Example #27
Source File: KafkaShortRetentionTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Ensure that the consumer is properly failing if "auto.offset.reset" is set to "none".
 */
public void runFailOnAutoOffsetResetNone() throws Exception {
	final String topic = "auto-offset-reset-none-test";
	final int parallelism = 1;

	kafkaServer.createTestTopic(topic, parallelism, 1);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add consumer ----------

	Properties customProps = new Properties();
	customProps.putAll(standardProps);
	customProps.putAll(secureProps);
	customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	try {
		env.execute("Test auto offset reset none");
	} catch (Throwable e) {
		// check if correct exception has been thrown
		if (!e.getCause().getCause().getMessage().contains("Unable to find previous offset")  // kafka 0.8
			&& !e.getCause().getCause().getMessage().contains("Undefined offset with no reset policy for partition") // kafka 0.9
				) {
			throw e;
		}
	}

	kafkaServer.deleteTestTopic(topic);
}
 
Example #28
Source File: Main.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        DataStreamSource<String> data = env.readTextFile("file:///usr/local/blink-1.5.1/README.txt");
        data.print();

        //两种格式都行,另外还支持写入到 hdfs
//        data.writeAsText("file:///usr/local/blink-1.5.1/README1.txt");
        data.writeAsText("/usr/local/blink-1.5.1/README1.txt");

        env.execute();
    }
 
Example #29
Source File: StreamExecutionEnvironmentTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T> SourceFunction<T> getFunctionFromDataSource(DataStreamSource<T> dataStreamSource) {
	dataStreamSource.addSink(new DiscardingSink<T>());
	AbstractUdfStreamOperator<?, ?> operator =
			(AbstractUdfStreamOperator<?, ?>) getOperatorFromDataStream(dataStreamSource);
	return (SourceFunction<T>) operator.getUserFunction();
}
 
Example #30
Source File: KafkaConfigUtil.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
/**
 * @param env
 * @param topic
 * @param time  订阅的时间
 * @return
 * @throws IllegalAccessException
 */
public static DataStreamSource<MetricEvent> buildSource(StreamExecutionEnvironment env, String topic, Long time) throws IllegalAccessException {
    ParameterTool parameterTool = (ParameterTool) env.getConfig().getGlobalJobParameters();
    Properties props = buildKafkaProps(parameterTool);
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(
            topic,
            new MetricSchema(),
            props);
    //重置offset到time时刻
    if (time != 0L) {
        Map<KafkaTopicPartition, Long> partitionOffset = buildOffsetByTime(props, parameterTool, time);
        consumer.setStartFromSpecificOffsets(partitionOffset);
    }
    return env.addSource(consumer);
}