Java Code Examples for org.apache.flink.streaming.api.datastream.DataStreamSource#addSink()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStreamSource#addSink() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
/**
 * Tests whether the Elasticsearch sink fails when there is no cluster to connect to.
 */
public void runInvalidElasticsearchClusterTest() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());

	Map<String, String> userConfig = new HashMap<>();
	userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
	userConfig.put("cluster.name", "invalid-cluster-name");

	source.addSink(createElasticsearchSinkForNode(
			1,
			"invalid-cluster-name",
			new SourceSinkDataTestKit.TestElasticsearchSinkFunction("test"),
			"123.123.123.123")); // incorrect ip address

	try {
		env.execute("Elasticsearch Sink Test");
	} catch (JobExecutionException expectedException) {
		// test passes
		return;
	}

	fail();
}
 
Example 2
@Test
public void testGetStreamGraph() {
	try {
		TypeInformation<Integer> typeInfo = BasicTypeInfo.INT_TYPE_INFO;
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		DataStreamSource<Integer> dataStream1 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream1.addSink(new DiscardingSink<Integer>());
		assertEquals(2, env.getStreamGraph().getStreamNodes().size());

		DataStreamSource<Integer> dataStream2 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream2.addSink(new DiscardingSink<Integer>());
		assertEquals(2, env.getStreamGraph().getStreamNodes().size());

		DataStreamSource<Integer> dataStream3 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream3.addSink(new DiscardingSink<Integer>());
		// Does not clear the transformations.
		env.getExecutionPlan();
		DataStreamSource<Integer> dataStream4 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream4.addSink(new DiscardingSink<Integer>());
		assertEquals(4, env.getStreamGraph("TestJob").getStreamNodes().size());
	} catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 3
Source Project: flink   File: ElasticsearchSinkTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests whether the Elasticsearch sink fails when there is no cluster to connect to.
 */
public void runInvalidElasticsearchClusterTest() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());

	source.addSink(createElasticsearchSinkForNode(
			1,
			"invalid-cluster-name",
			SourceSinkDataTestKit.getJsonSinkFunction("test"),
			"123.123.123.123")); // incorrect ip address

	try {
		env.execute("Elasticsearch Sink Test");
	} catch (JobExecutionException expectedException) {
		// every ES version throws a different exception in case of timeouts, so don't bother asserting on the exception
		// test passes
		return;
	}

	fail();
}
 
Example 4
private void createProducerTopology(StreamExecutionEnvironment env, AMQSinkConfig<String> config) {
    DataStreamSource<String> stream = env.addSource(new SourceFunction<String>() {
        @Override
        public void run(SourceContext<String> ctx) throws Exception {
            for (int i = 0; i < MESSAGES_NUM; i++) {
                ctx.collect("amq-" + i);
            }
        }

        @Override
        public void cancel() {}
    });


    AMQSink<String> sink = new AMQSink<>(config);
    stream.addSink(sink);
}
 
Example 5
Source Project: flink-learning   File: Main.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    //请将下面的这些字段弄成常量
    InfluxDBConfig config = InfluxDBConfig.builder()
            .url(parameterTool.get("influxdb.url"))
            .username(parameterTool.get("influxdb.username"))
            .password(parameterTool.get("influxdb.password"))
            .database(parameterTool.get("influxdb.database"))
            .batchActions(parameterTool.getInt("influxdb.batchActions"))
            .flushDuration(parameterTool.getInt("influxdb.flushDuration"))
            .enableGzip(parameterTool.getBoolean("influxdb.enableGzip"))
            .createDatabase(parameterTool.getBoolean("influxdb.createDatabase"))
            .build();

    data.addSink(new InfluxDBSink(config));

    env.execute("flink InfluxDB connector");
}
 
Example 6
Source Project: flink-learning   File: KuduSinkTest.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    List<KuduColumnInfo> kuduColumnInfos = new ArrayList<>();
    KuduColumnInfo columnInfo1 = KuduColumnInfo.Builder.createLong("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo2 = KuduColumnInfo.Builder.createFloat("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo3 = KuduColumnInfo.Builder.createString("zhisheng").rangeKey(true).build();
    kuduColumnInfos.add(columnInfo1);
    kuduColumnInfos.add(columnInfo2);
    kuduColumnInfos.add(columnInfo3);

    KuduTableInfo zhisheng = new KuduTableInfo.Builder("zhisheng")
            .replicas(1)
            .createIfNotExist(true)
            .columns(kuduColumnInfos)
            .build();

    data.addSink(new KuduSink<>("127.0.0.1", zhisheng, new PojoSerDe<>(MetricEvent.class)).withInsertWriteMode());
}
 
Example 7
Source Project: flink   File: ElasticsearchSinkTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests whether the Elasticsearch sink fails when there is no cluster to connect to.
 */
public void runInvalidElasticsearchClusterTest() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());

	Map<String, String> userConfig = new HashMap<>();
	userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
	userConfig.put("cluster.name", "invalid-cluster-name");

	source.addSink(createElasticsearchSinkForNode(
			1,
			"invalid-cluster-name",
			new SourceSinkDataTestKit.TestElasticsearchSinkFunction("test"),
			"123.123.123.123")); // incorrect ip address

	try {
		env.execute("Elasticsearch Sink Test");
	} catch (JobExecutionException expectedException) {
		// test passes
		return;
	}

	fail();
}
 
Example 8
Source Project: flink-learning   File: KuduSinkTest.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    List<KuduColumnInfo> kuduColumnInfos = new ArrayList<>();
    KuduColumnInfo columnInfo1 = KuduColumnInfo.Builder.createLong("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo2 = KuduColumnInfo.Builder.createFloat("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo3 = KuduColumnInfo.Builder.createString("zhisheng").rangeKey(true).build();
    kuduColumnInfos.add(columnInfo1);
    kuduColumnInfos.add(columnInfo2);
    kuduColumnInfos.add(columnInfo3);

    KuduTableInfo zhisheng = new KuduTableInfo.Builder("zhisheng")
            .replicas(1)
            .createIfNotExist(true)
            .columns(kuduColumnInfos)
            .build();

    data.addSink(new KuduSink<>("127.0.0.1", zhisheng, new PojoSerDe<>(MetricEvent.class)).withInsertWriteMode());
}
 
Example 9
@Test
public void testTableSourceUsingDescriptor() throws Exception {
    StreamExecutionEnvironment execEnvWrite = StreamExecutionEnvironment.getExecutionEnvironment();
    execEnvWrite.setParallelism(1);

    Stream stream = Stream.of(SETUP_UTILS.getScope(), "testJsonTableSource1");
    SETUP_UTILS.createTestStream(stream.getStreamName(), 1);

    // read data from the stream using Table reader
    TableSchema tableSchema = TableSchema.builder()
            .field("user", DataTypes.STRING())
            .field("uri", DataTypes.STRING())
            .field("accessTime", DataTypes.TIMESTAMP(3).bridgedTo(Timestamp.class))
            .build();
    TypeInformation<Row> typeInfo = (RowTypeInfo) TypeConversions.fromDataTypeToLegacyInfo(tableSchema.toRowDataType());

    PravegaConfig pravegaConfig = SETUP_UTILS.getPravegaConfig();

    // Write some data to the stream
    DataStreamSource<Row> dataStream = execEnvWrite
            .addSource(new TableEventSource(EVENT_COUNT_PER_SOURCE));

    FlinkPravegaWriter<Row> pravegaSink = FlinkPravegaWriter.<Row>builder()
            .withPravegaConfig(pravegaConfig)
            .forStream(stream)
            .withSerializationSchema(new JsonRowSerializationSchema.Builder(typeInfo).build())
            .withEventRouter((Row event) -> "fixedkey")
            .build();

    dataStream.addSink(pravegaSink);
    Assert.assertNotNull(execEnvWrite.getExecutionPlan());
    execEnvWrite.execute("PopulateRowData");

    testTableSourceStreamingDescriptor(stream, pravegaConfig);
    testTableSourceBatchDescriptor(stream, pravegaConfig);
}
 
Example 10
/**
 * Ensure that the consumer is properly failing if "auto.offset.reset" is set to "none".
 */
public void runFailOnAutoOffsetResetNone() throws Exception {
	final String topic = "auto-offset-reset-none-test";
	final int parallelism = 1;

	kafkaServer.createTestTopic(topic, parallelism, 1);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add consumer ----------

	Properties customProps = new Properties();
	customProps.putAll(standardProps);
	customProps.putAll(secureProps);
	customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	try {
		env.execute("Test auto offset reset none");
	} catch (Throwable e) {
		// check if correct exception has been thrown
		if (!e.getCause().getCause().getMessage().contains("Unable to find previous offset")  // kafka 0.8
			&& !e.getCause().getCause().getMessage().contains("Undefined offset with no reset policy for partition") // kafka 0.9
				) {
			throw e;
		}
	}

	kafkaServer.deleteTestTopic(topic);
}
 
Example 11
@Test
public void testWriteThenRead() throws Exception {
    String tp = newTopic();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.getConfig().disableSysoutLogging();
    see.setParallelism(1);

    DataStreamSource ds = see.fromCollection(fooList);
    ds.addSink(
            new FlinkPulsarSink(
                    serviceUrl, adminUrl, Optional.of(tp), getSinkProperties(), TopicKeyExtractor.NULL,
                    SchemaData.Foo.class));

    see.execute("write first");

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().disableSysoutLogging();
    env.setParallelism(1);

    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    tEnv.connect(getPulsarDescriptor(tp))
            .inAppendMode()
            .registerTableSource(tp);

    Table t = tEnv.scan(tp).select("i, f, bar");
    tEnv.toAppendStream(t, t.getSchema().toRowType())
            .map(new FailingIdentityMapper<Row>(fooList.size()))
            .addSink(new SingletonStreamSink.StringSink<>()).setParallelism(1);

    try {
        env.execute("count elements from topics");
    } catch (Exception e) {

    }
    SingletonStreamSink.compareWithList(fooList.subList(0, fooList.size() - 1).stream().map(Objects::toString).collect(Collectors.toList()));
}
 
Example 12
Source Project: bahir-flink   File: RedisSinkITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRedisSetDataType() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunction());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
        new RedisCommandMapper(RedisCommand.SADD));

    source.addSink(redisSink);
    env.execute("Test Redis Set Data Type");

    assertEquals(NUM_ELEMENTS, jedis.scard(REDIS_KEY));

    jedis.del(REDIS_KEY);
}
 
Example 13
Source Project: bahir-flink   File: RedisSinkITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRedisStringDataTypeWithTTL() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunctionString());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
            new RedisCommandMapperWithTTL(RedisCommand.SETEX));

    source.addSink(redisSink);
    env.execute("Test Redis Set Data Type With TTL");

    assertEquals(TEST_MESSAGE_LENGTH, jedis.strlen(REDIS_KEY));
    assertEquals(REDIS_TTL_IN_SECS, jedis.ttl(REDIS_KEY));

    jedis.del(REDIS_KEY);
}
 
Example 14
Source Project: bahir-flink   File: RedisSinkITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRedisHyperLogLogDataType() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunction());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
        new RedisCommandMapper(RedisCommand.PFADD));

    source.addSink(redisSink);
    env.execute("Test Redis Hyper Log Log Data Type");

    assertEquals(NUM_ELEMENTS, Long.valueOf(jedis.pfcount(REDIS_KEY)));

    jedis.del(REDIS_KEY);
}
 
Example 15
@Test
public void testSources() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	SourceFunction<Integer> srcFun = new SourceFunction<Integer>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
		}

		@Override
		public void cancel() {
		}
	};
	DataStreamSource<Integer> src1 = env.addSource(srcFun);
	src1.addSink(new DiscardingSink<Integer>());
	assertEquals(srcFun, getFunctionFromDataSource(src1));

	List<Long> list = Arrays.asList(0L, 1L, 2L);

	DataStreamSource<Long> src2 = env.generateSequence(0, 2);
	assertTrue(getFunctionFromDataSource(src2) instanceof StatefulSequenceSource);

	DataStreamSource<Long> src3 = env.fromElements(0L, 1L, 2L);
	assertTrue(getFunctionFromDataSource(src3) instanceof FromElementsFunction);

	DataStreamSource<Long> src4 = env.fromCollection(list);
	assertTrue(getFunctionFromDataSource(src4) instanceof FromElementsFunction);
}
 
Example 16
@SuppressWarnings("unchecked")
private static <T> SourceFunction<T> getFunctionFromDataSource(DataStreamSource<T> dataStreamSource) {
	dataStreamSource.addSink(new DiscardingSink<T>());
	AbstractUdfStreamOperator<?, ?> operator =
			(AbstractUdfStreamOperator<?, ?>) getOperatorFromDataStream(dataStreamSource);
	return (SourceFunction<T>) operator.getUserFunction();
}
 
Example 17
@Test
public void testSources() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	SourceFunction<Integer> srcFun = new SourceFunction<Integer>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
		}

		@Override
		public void cancel() {
		}
	};
	DataStreamSource<Integer> src1 = env.addSource(srcFun);
	src1.addSink(new DiscardingSink<Integer>());
	assertEquals(srcFun, getFunctionFromDataSource(src1));

	List<Long> list = Arrays.asList(0L, 1L, 2L);

	DataStreamSource<Long> src2 = env.generateSequence(0, 2);
	assertTrue(getFunctionFromDataSource(src2) instanceof StatefulSequenceSource);

	DataStreamSource<Long> src3 = env.fromElements(0L, 1L, 2L);
	assertTrue(getFunctionFromDataSource(src3) instanceof FromElementsFunction);

	DataStreamSource<Long> src4 = env.fromCollection(list);
	assertTrue(getFunctionFromDataSource(src4) instanceof FromElementsFunction);
}
 
Example 18
Source Project: bahir-flink   File: RedisSinkITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRedisHashDataTypeWithTTLFromOpt() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunctionHash());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
            new RedisAdditionalTTLMapperFromOpt(RedisCommand.HSET));

    source.addSink(redisSink);
    env.execute("Test Redis Hash Data Type 2");

    assertEquals(NUM_ELEMENTS, jedis.hlen(REDIS_ADDITIONAL_KEY));
    assertEquals(REDIS_TTL_IN_SECS, jedis.ttl(REDIS_ADDITIONAL_KEY));

    jedis.del(REDIS_ADDITIONAL_KEY);
}
 
Example 19
public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new SimpleStringSchema(), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}
 
Example 20
public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}