org.apache.flink.api.common.serialization.SimpleStringSchema Java Examples

The following examples show how to use org.apache.flink.api.common.serialization.SimpleStringSchema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;

        //下面这些写死的参数可以放在配置文件中,然后通过 parameterTool 获取
        final RMQConnectionConfig connectionConfig = new RMQConnectionConfig
                .Builder().setHost("localhost").setVirtualHost("/")
                .setPort(5672).setUserName("admin").setPassword("admin")
                .build();

        DataStreamSource<String> zhisheng = env.addSource(new RMQSource<>(connectionConfig,
                "zhisheng",
                true,
                new SimpleStringSchema()))
                .setParallelism(1);
        zhisheng.print();

        //如果想保证 exactly-once 或 at-least-once 需要把 checkpoint 开启
//        env.enableCheckpointing(10000);
        env.execute("flink learning connectors rabbitmq");
    }
 
Example #2
Source File: FlinkPulsarITest.java    From pulsar-flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testRunFailedOnWrongServiceUrl() {

    try {
        Properties props = MapUtils.toProperties(Collections.singletonMap(TOPIC_SINGLE_OPTION_KEY, "tp"));

        StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
        see.getConfig().disableSysoutLogging();
        see.setRestartStrategy(RestartStrategies.noRestart());
        see.setParallelism(1);

        FlinkPulsarSource<String> source =
                new FlinkPulsarSource<String>("sev", "admin", new SimpleStringSchema(), props).setStartFromEarliest();

        DataStream<String> stream = see.addSource(source);
        stream.print();
        see.execute("wrong service url");
    } catch (Exception e) {
        final Optional<Throwable> optionalThrowable = ExceptionUtils.findThrowableWithMessage(e, "authority component is missing");
        assertTrue(optionalThrowable.isPresent());
        assertTrue(optionalThrowable.get() instanceof PulsarClientException);
    }
}
 
Example #3
Source File: ConsumeFromKinesis.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties kinesisConsumerConfig = new Properties();
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
		"flink-test",
		new SimpleStringSchema(),
		kinesisConsumerConfig));

	kinesis.print();

	see.execute();
}
 
Example #4
Source File: KafkaShortRetentionTestBase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public void runFailOnAutoOffsetResetNoneEager() throws Exception {
	final String topic = "auto-offset-reset-none-test";
	final int parallelism = 1;

	kafkaServer.createTestTopic(topic, parallelism, 1);

	// ----------- add consumer ----------

	Properties customProps = new Properties();
	customProps.putAll(standardProps);
	customProps.putAll(secureProps);
	customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception

	try {
		kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);
		fail("should fail with an exception");
	}
	catch (IllegalArgumentException e) {
		// expected
		assertTrue(e.getMessage().contains("none"));
	}

	kafkaServer.deleteTestTopic(topic);
}
 
Example #5
Source File: Main.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props));

    data.map(new MapFunction<String, Object>() {
        @Override
        public Object map(String string) throws Exception {
            writeEventToHbase(string, parameterTool);
            return string;
        }
    }).print();

    env.execute("flink learning connectors hbase");
}
 
Example #6
Source File: KinesisConsumerMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty("aws.region", parameterTool.getRequired("aws.region"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.accesskeyid", parameterTool.getRequired("aws.accesskey"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.secretkey", parameterTool.getRequired("aws.secretkey"));

    DataStream<String> kinesis = env.addSource(new FlinkKinesisConsumer<>(
            "zhisheng",
            new SimpleStringSchema(),
            kinesisConsumerConfig));

    kinesis.print();

    env.execute();
}
 
Example #7
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This test is meant to assure that testAtLeastOnceProducer is valid by testing that if flushing is disabled,
 * the snapshot method does indeed finishes without waiting for pending records;
 * we set a timeout because the test will not finish if the logic is broken.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testDoesNotWaitForPendingRecordsIfFlushingDisabled() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(false);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg"));

	// make sure that all callbacks have not been completed
	verify(mockProducer, times(1)).send(any(ProducerRecord.class), any(Callback.class));

	// should return even if there are pending records
	testHarness.snapshot(123L, 123L);

	testHarness.close();
}
 
Example #8
Source File: ConsumeFromKinesis.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties kinesisConsumerConfig = new Properties();
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
		"flink-test",
		new SimpleStringSchema(),
		kinesisConsumerConfig));

	kinesis.print();

	see.execute();
}
 
Example #9
Source File: EmulatedPubSubSinkTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test(expected = Exception.class)
public void testPubSubSinkThrowsExceptionOnFailure() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(100);
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());

	// Create test stream
	//use source function to prevent the job from shutting down before a checkpoint has been made
	env.addSource(new SingleInputSourceFunction())

		.map((MapFunction<String, String>) StringUtils::reverse)
		.addSink(PubSubSink.newBuilder()
							.withSerializationSchema(new SimpleStringSchema())
							.withProjectName(PROJECT_NAME)
							.withTopicName(TOPIC_NAME)
							// Specific for emulator
							.withHostAndPortForEmulator("unknown-host-to-force-sink-crash:1234")
							.withCredentials(NoCredentials.getInstance())
							.build()).name("PubSub sink");

	// Run
	env.execute();
}
 
Example #10
Source File: ConsumeFromDynamoDBStreams.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties dynamodbStreamsConsumerConfig = new Properties();
	final String streamName = pt.getRequired(DYNAMODB_STREAM_NAME);
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> dynamodbStreams = see.addSource(new FlinkDynamoDBStreamsConsumer<>(
			streamName,
			new SimpleStringSchema(),
			dynamodbStreamsConsumerConfig));

	dynamodbStreams.print();

	see.execute();
}
 
Example #11
Source File: KafkaConsumer08Test.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateSourceWithoutCluster() {
	try {
		Properties props = new Properties();
		props.setProperty("zookeeper.connect", "localhost:56794");
		props.setProperty("bootstrap.servers", "localhost:11111, localhost:22222");
		props.setProperty("group.id", "non-existent-group");
		props.setProperty(FlinkKafkaConsumer08.GET_PARTITIONS_RETRIES_KEY, "1");

		FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>(
			Collections.singletonList("no op topic"), new SimpleStringSchema(), props);
		StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
		Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true);
		consumer.setRuntimeContext(mockRuntimeContext);

		consumer.open(new Configuration());

		fail();
	}
	catch (Exception e) {
		assertTrue(e.getMessage().contains("Unable to retrieve any partitions"));
	}
}
 
Example #12
Source File: KafkaConsumer08Test.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateSourceWithoutCluster() {
	try {
		Properties props = new Properties();
		props.setProperty("zookeeper.connect", "localhost:56794");
		props.setProperty("bootstrap.servers", "localhost:11111, localhost:22222");
		props.setProperty("group.id", "non-existent-group");
		props.setProperty(FlinkKafkaConsumer08.GET_PARTITIONS_RETRIES_KEY, "1");

		FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>(
			Collections.singletonList("no op topic"), new SimpleStringSchema(), props);
		StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
		Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true);
		consumer.setRuntimeContext(mockRuntimeContext);

		consumer.open(new Configuration());

		fail();
	}
	catch (Exception e) {
		assertTrue(e.getMessage().contains("Unable to retrieve any partitions"));
	}
}
 
Example #13
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that constructor defaults to key value serializers in config to byte array deserializers if not set.
 */
@Test
public void testKeyValueDeserializersSetIfMissing() throws Exception {
	Properties props = new Properties();
	props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345");
	// should set missing key value deserializers
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
}
 
Example #14
Source File: FlinkKafkaProducerBaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that partitions list is determinate and correctly provided to custom partitioner.
 */
@SuppressWarnings("unchecked")
@Test
public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception {
	FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class);

	RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
	when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0);
	when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1);

	// out-of-order list of 4 partitions
	List<PartitionInfo> mockPartitionsList = new ArrayList<>(4);
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null));

	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner);
	producer.setRuntimeContext(mockRuntimeContext);

	final KafkaProducer mockProducer = producer.getMockKafkaProducer();
	when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList);
	when(mockProducer.metrics()).thenReturn(null);

	producer.open(new Configuration());
	verify(mockPartitioner, times(1)).open(0, 1);

	producer.invoke("foobar", SinkContextUtil.forTimestamp(0));
	verify(mockPartitioner, times(1)).partition(
		"foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] {0, 1, 2, 3});
}
 
Example #15
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example #16
Source File: FlinkKinesisProducerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@SuppressWarnings("ResultOfMethodCallIgnored")
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKinesisProducer<String> producer = new DummyFlinkKinesisProducer<>(new SimpleStringSchema());

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	producer.getPendingRecordFutures().get(0).setException(new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(ExceptionUtils.findThrowableWithMessage(e, "artificial async exception").isPresent());

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example #17
Source File: FlinkKafkaProducerBaseTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the constructor eagerly checks bootstrap servers are set in config.
 */
@Test(expected = IllegalArgumentException.class)
public void testInstantiationFailsWhenBootstrapServersMissing() throws Exception {
	// no bootstrap servers set in props
	Properties props = new Properties();
	// should throw IllegalArgumentException
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
}
 
Example #18
Source File: ShardConsumerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedState() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example #19
Source File: ShardConsumerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedState() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example #20
Source File: FlinkKinesisProducerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testConfigureWithNonSerializableCustomPartitionerFails() {
	exception.expect(IllegalArgumentException.class);
	exception.expectMessage("The provided custom partitioner is not serializable");

	new FlinkKinesisProducer<>(new SimpleStringSchema(), TestUtils.getStandardProperties())
		.setCustomPartitioner(new NonSerializableCustomPartitioner());
}
 
Example #21
Source File: Main.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props)).setParallelism(1)
            .map(string -> GsonUtil.fromJson(string, Student.class)); //博客里面用的是 fastjson,这里用的是gson解析,解析字符串成 student 对象

    student.addSink(new SinkToMySQL()); //数据 sink 到 mysql

    env.execute("Flink data sink");
}
 
Example #22
Source File: Main.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
        Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

        SingleOutputStreamOperator<Tuple2<String, String>> product = env.addSource(new FlinkKafkaConsumer011<>(
                parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
                new SimpleStringSchema(),
                props))
                .map(string -> GsonUtil.fromJson(string, ProductEvent.class)) //反序列化 JSON
                .flatMap(new FlatMapFunction<ProductEvent, Tuple2<String, String>>() {
                    @Override
                    public void flatMap(ProductEvent value, Collector<Tuple2<String, String>> out) throws Exception {
                        //收集商品 id 和 price 两个属性
                        out.collect(new Tuple2<>(value.getId().toString(), value.getPrice().toString()));
                    }
                });
//        product.print();

        //单个 Redis
        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost(parameterTool.get("redis.host")).build();
        product.addSink(new RedisSink<Tuple2<String, String>>(conf, new RedisSinkMapper()));

        //Redis 的 ip 信息一般都从配置文件取出来
        //Redis 集群
/*        FlinkJedisClusterConfig clusterConfig = new FlinkJedisClusterConfig.Builder()
                .setNodes(new HashSet<InetSocketAddress>(
                        Arrays.asList(new InetSocketAddress("redis1", 6379)))).build();*/

        //Redis Sentinels
/*        FlinkJedisSentinelConfig sentinelConfig = new FlinkJedisSentinelConfig.Builder()
                .setMasterName("master")
                .setSentinels(new HashSet<>(Arrays.asList("sentinel1", "sentinel2")))
                .setPassword("")
                .setDatabase(1).build();*/

        env.execute("flink redis connector");
    }
 
Example #23
Source File: FlinkKafkaProducerBaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example #24
Source File: FlinkKafkaProducerBaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnCheckpoint() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.snapshot(123L, 123L);
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example #25
Source File: TuningKeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }
 
Example #26
Source File: KeyedStateDeduplication.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }
 
Example #27
Source File: KafkaShortRetentionTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Ensure that the consumer is properly failing if "auto.offset.reset" is set to "none".
 */
public void runFailOnAutoOffsetResetNone() throws Exception {
	final String topic = "auto-offset-reset-none-test";
	final int parallelism = 1;

	kafkaServer.createTestTopic(topic, parallelism, 1);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add consumer ----------

	Properties customProps = new Properties();
	customProps.putAll(standardProps);
	customProps.putAll(secureProps);
	customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	try {
		env.execute("Test auto offset reset none");
	} catch (Throwable e) {
		// check if correct exception has been thrown
		if (!e.getCause().getCause().getMessage().contains("Unable to find previous offset")  // kafka 0.8
			&& !e.getCause().getCause().getMessage().contains("Undefined offset with no reset policy for partition") // kafka 0.9
				) {
			throw e;
		}
	}

	kafkaServer.deleteTestTopic(topic);
}
 
Example #28
Source File: KafkaConsumer08Test.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testAllBoostrapServerHostsAreInvalid() {
	try {
		String unknownHost = "foobar:11111";

		URL unknownHostURL = NetUtils.getCorrectHostnamePort(unknownHost);

		PowerMockito.mockStatic(InetAddress.class);
		when(InetAddress.getByName(Matchers.eq(unknownHostURL.getHost()))).thenThrow(new UnknownHostException("Test exception"));

		String zookeeperConnect = "localhost:56794";
		String groupId = "non-existent-group";
		Properties props = createKafkaProps(zookeeperConnect, unknownHost, groupId);

		FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>(
			Collections.singletonList("no op topic"), new SimpleStringSchema(), props);
		StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
		Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true);
		consumer.setRuntimeContext(mockRuntimeContext);

		consumer.open(new Configuration());

		fail();
	} catch (Exception expected) {
		assertTrue("Exception should be thrown containing 'all bootstrap servers invalid' message!",
				expected.getMessage().contains("All the servers provided in: '" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG
						+ "' config are invalid"));
	}
}
 
Example #29
Source File: KafkaConsumer08Test.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testAtLeastOneBootstrapServerHostIsValid() throws Exception {
	try {
		String zookeeperConnect = "localhost:56794";
		String unknownHost = "foobar:11111";
		// we declare one valid bootstrap server, namely the one with 'localhost'
		String bootstrapServers = unknownHost + ", localhost:22222";

		URL unknownHostURL = NetUtils.getCorrectHostnamePort(unknownHost);

		PowerMockito.mockStatic(InetAddress.class);
		when(InetAddress.getByName(Matchers.eq(unknownHostURL.getHost()))).thenThrow(new UnknownHostException("Test exception"));

		String groupId = "non-existent-group";
		Properties props = createKafkaProps(zookeeperConnect, bootstrapServers, groupId);
		DummyFlinkKafkaConsumer consumer = new DummyFlinkKafkaConsumer(
			"no op topic",
			new SimpleStringSchema(),
			props);
		consumer.open(new Configuration());

		// no exception should be thrown, because we have one valid bootstrap server; test passes if we reach here
	} catch (Exception e) {
		assertFalse("No exception should be thrown containing 'all bootstrap servers invalid' message!",
			e.getMessage().contains("All the servers provided in: '" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG
				+ "' config are invalid"));
	}
}
 
Example #30
Source File: EmulatedPubSubSinkTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testFlinkSink() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	List<String> input = Arrays.asList("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eigth", "Nine", "Ten");

	// Create test stream
	DataStream<String> theData = env
		.fromCollection(input)
		.name("Test input")
		.map((MapFunction<String, String>) StringUtils::reverse);

	// Sink into pubsub
	theData
		.addSink(PubSubSink.newBuilder()
							.withSerializationSchema(new SimpleStringSchema())
							.withProjectName(PROJECT_NAME)
							.withTopicName(TOPIC_NAME)
						   // Specific for emulator
						.withHostAndPortForEmulator(getPubSubHostPort())
						.withCredentials(NoCredentials.getInstance())
						.build())
		.name("PubSub sink");

	// Run
	env.execute();

	// Now get the result from PubSub and verify if everything is there
	List<ReceivedMessage> receivedMessages = pubsubHelper.pullMessages(PROJECT_NAME, SUBSCRIPTION_NAME, 100);

	assertEquals("Wrong number of elements", input.size(), receivedMessages.size());

	// Check output strings
	List<String> output = new ArrayList<>();
	receivedMessages.forEach(msg -> output.add(msg.getMessage().getData().toStringUtf8()));

	for (String test : input) {
		assertTrue("Missing " + test, output.contains(StringUtils.reverse(test)));
	}
}