Java Code Examples for org.apache.flink.streaming.api.datastream.DataStreamSource#addSink()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStreamSource#addSink() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ElasticsearchSinkTestBase.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Tests whether the Elasticsearch sink fails when there is no cluster to connect to.
 */
public void runInvalidElasticsearchClusterTest() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());

	Map<String, String> userConfig = new HashMap<>();
	userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
	userConfig.put("cluster.name", "invalid-cluster-name");

	source.addSink(createElasticsearchSinkForNode(
			1,
			"invalid-cluster-name",
			new SourceSinkDataTestKit.TestElasticsearchSinkFunction("test"),
			"123.123.123.123")); // incorrect ip address

	try {
		env.execute("Elasticsearch Sink Test");
	} catch (JobExecutionException expectedException) {
		// test passes
		return;
	}

	fail();
}

Example 2

Source File: StreamExecutionEnvironmentTest.java From flink with Apache License 2.0

6 votes

@Test
public void testGetStreamGraph() {
	try {
		TypeInformation<Integer> typeInfo = BasicTypeInfo.INT_TYPE_INFO;
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		DataStreamSource<Integer> dataStream1 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream1.addSink(new DiscardingSink<Integer>());
		assertEquals(2, env.getStreamGraph().getStreamNodes().size());

		DataStreamSource<Integer> dataStream2 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream2.addSink(new DiscardingSink<Integer>());
		assertEquals(2, env.getStreamGraph().getStreamNodes().size());

		DataStreamSource<Integer> dataStream3 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream3.addSink(new DiscardingSink<Integer>());
		// Does not clear the transformations.
		env.getExecutionPlan();
		DataStreamSource<Integer> dataStream4 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream4.addSink(new DiscardingSink<Integer>());
		assertEquals(4, env.getStreamGraph("TestJob").getStreamNodes().size());
	} catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example 3

Source File: KuduSinkTest.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    List<KuduColumnInfo> kuduColumnInfos = new ArrayList<>();
    KuduColumnInfo columnInfo1 = KuduColumnInfo.Builder.createLong("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo2 = KuduColumnInfo.Builder.createFloat("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo3 = KuduColumnInfo.Builder.createString("zhisheng").rangeKey(true).build();
    kuduColumnInfos.add(columnInfo1);
    kuduColumnInfos.add(columnInfo2);
    kuduColumnInfos.add(columnInfo3);

    KuduTableInfo zhisheng = new KuduTableInfo.Builder("zhisheng")
            .replicas(1)
            .createIfNotExist(true)
            .columns(kuduColumnInfos)
            .build();

    data.addSink(new KuduSink<>("127.0.0.1", zhisheng, new PojoSerDe<>(MetricEvent.class)).withInsertWriteMode());
}

Example 4

Source File: ElasticsearchSinkTestBase.java From flink with Apache License 2.0

6 votes

/**
 * Tests whether the Elasticsearch sink fails when there is no cluster to connect to.
 */
public void runInvalidElasticsearchClusterTest() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());

	source.addSink(createElasticsearchSinkForNode(
			1,
			"invalid-cluster-name",
			SourceSinkDataTestKit.getJsonSinkFunction("test"),
			"123.123.123.123")); // incorrect ip address

	try {
		env.execute("Elasticsearch Sink Test");
	} catch (JobExecutionException expectedException) {
		// every ES version throws a different exception in case of timeouts, so don't bother asserting on the exception
		// test passes
		return;
	}

	fail();
}

Example 5

Source File: ActiveMQConnectorITCase.java From bahir-flink with Apache License 2.0

6 votes

private void createProducerTopology(StreamExecutionEnvironment env, AMQSinkConfig<String> config) {
    DataStreamSource<String> stream = env.addSource(new SourceFunction<String>() {
        @Override
        public void run(SourceContext<String> ctx) throws Exception {
            for (int i = 0; i < MESSAGES_NUM; i++) {
                ctx.collect("amq-" + i);
            }
        }

        @Override
        public void cancel() {}
    });


    AMQSink<String> sink = new AMQSink<>(config);
    stream.addSink(sink);
}

Example 6

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    //请将下面的这些字段弄成常量
    InfluxDBConfig config = InfluxDBConfig.builder()
            .url(parameterTool.get("influxdb.url"))
            .username(parameterTool.get("influxdb.username"))
            .password(parameterTool.get("influxdb.password"))
            .database(parameterTool.get("influxdb.database"))
            .batchActions(parameterTool.getInt("influxdb.batchActions"))
            .flushDuration(parameterTool.getInt("influxdb.flushDuration"))
            .enableGzip(parameterTool.getBoolean("influxdb.enableGzip"))
            .createDatabase(parameterTool.getBoolean("influxdb.createDatabase"))
            .build();

    data.addSink(new InfluxDBSink(config));

    env.execute("flink InfluxDB connector");
}

Example 7

Source File: KuduSinkTest.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    List<KuduColumnInfo> kuduColumnInfos = new ArrayList<>();
    KuduColumnInfo columnInfo1 = KuduColumnInfo.Builder.createLong("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo2 = KuduColumnInfo.Builder.createFloat("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo3 = KuduColumnInfo.Builder.createString("zhisheng").rangeKey(true).build();
    kuduColumnInfos.add(columnInfo1);
    kuduColumnInfos.add(columnInfo2);
    kuduColumnInfos.add(columnInfo3);

    KuduTableInfo zhisheng = new KuduTableInfo.Builder("zhisheng")
            .replicas(1)
            .createIfNotExist(true)
            .columns(kuduColumnInfos)
            .build();

    data.addSink(new KuduSink<>("127.0.0.1", zhisheng, new PojoSerDe<>(MetricEvent.class)).withInsertWriteMode());
}

Example 8

Source File: ElasticsearchSinkTestBase.java From flink with Apache License 2.0

6 votes

/**
 * Tests whether the Elasticsearch sink fails when there is no cluster to connect to.
 */
public void runInvalidElasticsearchClusterTest() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());

	Map<String, String> userConfig = new HashMap<>();
	userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
	userConfig.put("cluster.name", "invalid-cluster-name");

	source.addSink(createElasticsearchSinkForNode(
			1,
			"invalid-cluster-name",
			new SourceSinkDataTestKit.TestElasticsearchSinkFunction("test"),
			"123.123.123.123")); // incorrect ip address

	try {
		env.execute("Elasticsearch Sink Test");
	} catch (JobExecutionException expectedException) {
		// test passes
		return;
	}

	fail();
}

Example 9

Source File: RedisSinkITCase.java From bahir-flink with Apache License 2.0

5 votes

@Test
public void testRedisSetDataType() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunction());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
        new RedisCommandMapper(RedisCommand.SADD));

    source.addSink(redisSink);
    env.execute("Test Redis Set Data Type");

    assertEquals(NUM_ELEMENTS, jedis.scard(REDIS_KEY));

    jedis.del(REDIS_KEY);
}

Example 10

Source File: RedisSinkITCase.java From bahir-flink with Apache License 2.0

5 votes

@Test
public void testRedisHashDataTypeWithTTLFromOpt() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunctionHash());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
            new RedisAdditionalTTLMapperFromOpt(RedisCommand.HSET));

    source.addSink(redisSink);
    env.execute("Test Redis Hash Data Type 2");

    assertEquals(NUM_ELEMENTS, jedis.hlen(REDIS_ADDITIONAL_KEY));
    assertEquals(REDIS_TTL_IN_SECS, jedis.ttl(REDIS_ADDITIONAL_KEY));

    jedis.del(REDIS_ADDITIONAL_KEY);
}

Example 11

Source File: StreamExecutionEnvironmentTest.java From flink with Apache License 2.0

5 votes

@Test
public void testSources() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	SourceFunction<Integer> srcFun = new SourceFunction<Integer>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
		}

		@Override
		public void cancel() {
		}
	};
	DataStreamSource<Integer> src1 = env.addSource(srcFun);
	src1.addSink(new DiscardingSink<Integer>());
	assertEquals(srcFun, getFunctionFromDataSource(src1));

	List<Long> list = Arrays.asList(0L, 1L, 2L);

	DataStreamSource<Long> src2 = env.generateSequence(0, 2);
	assertTrue(getFunctionFromDataSource(src2) instanceof StatefulSequenceSource);

	DataStreamSource<Long> src3 = env.fromElements(0L, 1L, 2L);
	assertTrue(getFunctionFromDataSource(src3) instanceof FromElementsFunction);

	DataStreamSource<Long> src4 = env.fromCollection(list);
	assertTrue(getFunctionFromDataSource(src4) instanceof FromElementsFunction);
}

Example 12

Source File: StreamExecutionEnvironmentTest.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private static <T> SourceFunction<T> getFunctionFromDataSource(DataStreamSource<T> dataStreamSource) {
	dataStreamSource.addSink(new DiscardingSink<T>());
	AbstractUdfStreamOperator<?, ?> operator =
			(AbstractUdfStreamOperator<?, ?>) getOperatorFromDataStream(dataStreamSource);
	return (SourceFunction<T>) operator.getUserFunction();
}

Example 13

Source File: StreamExecutionEnvironmentTest.java From flink with Apache License 2.0

5 votes

@Test
public void testSources() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	SourceFunction<Integer> srcFun = new SourceFunction<Integer>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
		}

		@Override
		public void cancel() {
		}
	};
	DataStreamSource<Integer> src1 = env.addSource(srcFun);
	src1.addSink(new DiscardingSink<Integer>());
	assertEquals(srcFun, getFunctionFromDataSource(src1));

	List<Long> list = Arrays.asList(0L, 1L, 2L);

	DataStreamSource<Long> src2 = env.generateSequence(0, 2);
	assertTrue(getFunctionFromDataSource(src2) instanceof StatefulSequenceSource);

	DataStreamSource<Long> src3 = env.fromElements(0L, 1L, 2L);
	assertTrue(getFunctionFromDataSource(src3) instanceof FromElementsFunction);

	DataStreamSource<Long> src4 = env.fromCollection(list);
	assertTrue(getFunctionFromDataSource(src4) instanceof FromElementsFunction);
}

Example 14

Source File: RedisSinkITCase.java From bahir-flink with Apache License 2.0

5 votes

@Test
public void testRedisHyperLogLogDataType() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunction());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
        new RedisCommandMapper(RedisCommand.PFADD));

    source.addSink(redisSink);
    env.execute("Test Redis Hyper Log Log Data Type");

    assertEquals(NUM_ELEMENTS, Long.valueOf(jedis.pfcount(REDIS_KEY)));

    jedis.del(REDIS_KEY);
}

Example 15

Source File: RedisSinkITCase.java From bahir-flink with Apache License 2.0

5 votes

@Test
public void testRedisStringDataTypeWithTTL() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunctionString());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
            new RedisCommandMapperWithTTL(RedisCommand.SETEX));

    source.addSink(redisSink);
    env.execute("Test Redis Set Data Type With TTL");

    assertEquals(TEST_MESSAGE_LENGTH, jedis.strlen(REDIS_KEY));
    assertEquals(REDIS_TTL_IN_SECS, jedis.ttl(REDIS_KEY));

    jedis.del(REDIS_KEY);
}

Example 16

Source File: FlinkPulsarTableITest.java From pulsar-flink with Apache License 2.0

5 votes

@Test
public void testWriteThenRead() throws Exception {
    String tp = newTopic();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.getConfig().disableSysoutLogging();
    see.setParallelism(1);

    DataStreamSource ds = see.fromCollection(fooList);
    ds.addSink(
            new FlinkPulsarSink(
                    serviceUrl, adminUrl, Optional.of(tp), getSinkProperties(), TopicKeyExtractor.NULL,
                    SchemaData.Foo.class));

    see.execute("write first");

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().disableSysoutLogging();
    env.setParallelism(1);

    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    tEnv.connect(getPulsarDescriptor(tp))
            .inAppendMode()
            .registerTableSource(tp);

    Table t = tEnv.scan(tp).select("i, f, bar");
    tEnv.toAppendStream(t, t.getSchema().toRowType())
            .map(new FailingIdentityMapper<Row>(fooList.size()))
            .addSink(new SingletonStreamSink.StringSink<>()).setParallelism(1);

    try {
        env.execute("count elements from topics");
    } catch (Exception e) {

    }
    SingletonStreamSink.compareWithList(fooList.subList(0, fooList.size() - 1).stream().map(Objects::toString).collect(Collectors.toList()));
}

Example 17

Source File: KafkaShortRetentionTestBase.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Ensure that the consumer is properly failing if "auto.offset.reset" is set to "none".
 */
public void runFailOnAutoOffsetResetNone() throws Exception {
	final String topic = "auto-offset-reset-none-test";
	final int parallelism = 1;

	kafkaServer.createTestTopic(topic, parallelism, 1);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add consumer ----------

	Properties customProps = new Properties();
	customProps.putAll(standardProps);
	customProps.putAll(secureProps);
	customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	try {
		env.execute("Test auto offset reset none");
	} catch (Throwable e) {
		// check if correct exception has been thrown
		if (!e.getCause().getCause().getMessage().contains("Unable to find previous offset")  // kafka 0.8
			&& !e.getCause().getCause().getMessage().contains("Undefined offset with no reset policy for partition") // kafka 0.9
				) {
			throw e;
		}
	}

	kafkaServer.deleteTestTopic(topic);
}

Example 18

Source File: FlinkPravegaTableITCase.java From flink-connectors with Apache License 2.0

5 votes

@Test
public void testTableSourceUsingDescriptor() throws Exception {
    StreamExecutionEnvironment execEnvWrite = StreamExecutionEnvironment.getExecutionEnvironment();
    execEnvWrite.setParallelism(1);

    Stream stream = Stream.of(SETUP_UTILS.getScope(), "testJsonTableSource1");
    SETUP_UTILS.createTestStream(stream.getStreamName(), 1);

    // read data from the stream using Table reader
    TableSchema tableSchema = TableSchema.builder()
            .field("user", DataTypes.STRING())
            .field("uri", DataTypes.STRING())
            .field("accessTime", DataTypes.TIMESTAMP(3).bridgedTo(Timestamp.class))
            .build();
    TypeInformation<Row> typeInfo = (RowTypeInfo) TypeConversions.fromDataTypeToLegacyInfo(tableSchema.toRowDataType());

    PravegaConfig pravegaConfig = SETUP_UTILS.getPravegaConfig();

    // Write some data to the stream
    DataStreamSource<Row> dataStream = execEnvWrite
            .addSource(new TableEventSource(EVENT_COUNT_PER_SOURCE));

    FlinkPravegaWriter<Row> pravegaSink = FlinkPravegaWriter.<Row>builder()
            .withPravegaConfig(pravegaConfig)
            .forStream(stream)
            .withSerializationSchema(new JsonRowSerializationSchema.Builder(typeInfo).build())
            .withEventRouter((Row event) -> "fixedkey")
            .build();

    dataStream.addSink(pravegaSink);
    Assert.assertNotNull(execEnvWrite.getExecutionPlan());
    execEnvWrite.execute("PopulateRowData");

    testTableSourceStreamingDescriptor(stream, pravegaConfig);
    testTableSourceBatchDescriptor(stream, pravegaConfig);
}

Example 19

Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0

4 votes

public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}

Example 20

Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0

4 votes

public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new SimpleStringSchema(), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}