Java Code Examples for org.apache.flink.api.common.serialization.SimpleStringSchema

The following examples show how to use org.apache.flink.api.common.serialization.SimpleStringSchema. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: KafkaConsumer08Test.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateSourceWithoutCluster() {
	try {
		Properties props = new Properties();
		props.setProperty("zookeeper.connect", "localhost:56794");
		props.setProperty("bootstrap.servers", "localhost:11111, localhost:22222");
		props.setProperty("group.id", "non-existent-group");
		props.setProperty(FlinkKafkaConsumer08.GET_PARTITIONS_RETRIES_KEY, "1");

		FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>(
			Collections.singletonList("no op topic"), new SimpleStringSchema(), props);
		StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
		Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true);
		consumer.setRuntimeContext(mockRuntimeContext);

		consumer.open(new Configuration());

		fail();
	}
	catch (Exception e) {
		assertTrue(e.getMessage().contains("Unable to retrieve any partitions"));
	}
}
 
Example 2
Source Project: Flink-CEPplus   Source File: FlinkKafkaProducerBaseTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * This test is meant to assure that testAtLeastOnceProducer is valid by testing that if flushing is disabled,
 * the snapshot method does indeed finishes without waiting for pending records;
 * we set a timeout because the test will not finish if the logic is broken.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testDoesNotWaitForPendingRecordsIfFlushingDisabled() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
	producer.setFlushOnCheckpoint(false);

	final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();

	final OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg"));

	// make sure that all callbacks have not been completed
	verify(mockProducer, times(1)).send(any(ProducerRecord.class), any(Callback.class));

	// should return even if there are pending records
	testHarness.snapshot(123L, 123L);

	testHarness.close();
}
 
Example 3
Source Project: Flink-CEPplus   Source File: KafkaShortRetentionTestBase.java    License: Apache License 2.0 6 votes vote down vote up
public void runFailOnAutoOffsetResetNoneEager() throws Exception {
	final String topic = "auto-offset-reset-none-test";
	final int parallelism = 1;

	kafkaServer.createTestTopic(topic, parallelism, 1);

	// ----------- add consumer ----------

	Properties customProps = new Properties();
	customProps.putAll(standardProps);
	customProps.putAll(secureProps);
	customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception

	try {
		kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);
		fail("should fail with an exception");
	}
	catch (IllegalArgumentException e) {
		// expected
		assertTrue(e.getMessage().contains("none"));
	}

	kafkaServer.deleteTestTopic(topic);
}
 
Example 4
Source Project: flink   Source File: EmulatedPubSubSinkTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = Exception.class)
public void testPubSubSinkThrowsExceptionOnFailure() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(100);
	env.setParallelism(1);
	env.setRestartStrategy(RestartStrategies.noRestart());

	// Create test stream
	//use source function to prevent the job from shutting down before a checkpoint has been made
	env.addSource(new SingleInputSourceFunction())

		.map((MapFunction<String, String>) StringUtils::reverse)
		.addSink(PubSubSink.newBuilder()
							.withSerializationSchema(new SimpleStringSchema())
							.withProjectName(PROJECT_NAME)
							.withTopicName(TOPIC_NAME)
							// Specific for emulator
							.withHostAndPortForEmulator("unknown-host-to-force-sink-crash:1234")
							.withCredentials(NoCredentials.getInstance())
							.build()).name("PubSub sink");

	// Run
	env.execute();
}
 
Example 5
Source Project: Flink-CEPplus   Source File: ConsumeFromKinesis.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties kinesisConsumerConfig = new Properties();
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
		"flink-test",
		new SimpleStringSchema(),
		kinesisConsumerConfig));

	kinesis.print();

	see.execute();
}
 
Example 6
Source Project: flink   Source File: ConsumeFromKinesis.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties kinesisConsumerConfig = new Properties();
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
		"flink-test",
		new SimpleStringSchema(),
		kinesisConsumerConfig));

	kinesis.print();

	see.execute();
}
 
Example 7
Source Project: pulsar-flink   Source File: FlinkPulsarITest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRunFailedOnWrongServiceUrl() {

    try {
        Properties props = MapUtils.toProperties(Collections.singletonMap(TOPIC_SINGLE_OPTION_KEY, "tp"));

        StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
        see.getConfig().disableSysoutLogging();
        see.setRestartStrategy(RestartStrategies.noRestart());
        see.setParallelism(1);

        FlinkPulsarSource<String> source =
                new FlinkPulsarSource<String>("sev", "admin", new SimpleStringSchema(), props).setStartFromEarliest();

        DataStream<String> stream = see.addSource(source);
        stream.print();
        see.execute("wrong service url");
    } catch (Exception e) {
        final Optional<Throwable> optionalThrowable = ExceptionUtils.findThrowableWithMessage(e, "authority component is missing");
        assertTrue(optionalThrowable.isPresent());
        assertTrue(optionalThrowable.get() instanceof PulsarClientException);
    }
}
 
Example 8
Source Project: flink-learning   Source File: Main.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props));

    data.map(new MapFunction<String, Object>() {
        @Override
        public Object map(String string) throws Exception {
            writeEventToHbase(string, parameterTool);
            return string;
        }
    }).print();

    env.execute("flink learning connectors hbase");
}
 
Example 9
Source Project: flink-learning   Source File: KinesisConsumerMain.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty("aws.region", parameterTool.getRequired("aws.region"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.accesskeyid", parameterTool.getRequired("aws.accesskey"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.secretkey", parameterTool.getRequired("aws.secretkey"));

    DataStream<String> kinesis = env.addSource(new FlinkKinesisConsumer<>(
            "zhisheng",
            new SimpleStringSchema(),
            kinesisConsumerConfig));

    kinesis.print();

    env.execute();
}
 
Example 10
Source Project: flink-learning   Source File: Main.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;

        //下面这些写死的参数可以放在配置文件中,然后通过 parameterTool 获取
        final RMQConnectionConfig connectionConfig = new RMQConnectionConfig
                .Builder().setHost("localhost").setVirtualHost("/")
                .setPort(5672).setUserName("admin").setPassword("admin")
                .build();

        DataStreamSource<String> zhisheng = env.addSource(new RMQSource<>(connectionConfig,
                "zhisheng",
                true,
                new SimpleStringSchema()))
                .setParallelism(1);
        zhisheng.print();

        //如果想保证 exactly-once 或 at-least-once 需要把 checkpoint 开启
//        env.enableCheckpointing(10000);
        env.execute("flink learning connectors rabbitmq");
    }
 
Example 11
Source Project: flink   Source File: ConsumeFromDynamoDBStreams.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties dynamodbStreamsConsumerConfig = new Properties();
	final String streamName = pt.getRequired(DYNAMODB_STREAM_NAME);
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	dynamodbStreamsConsumerConfig.setProperty(
			ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> dynamodbStreams = see.addSource(new FlinkDynamoDBStreamsConsumer<>(
			streamName,
			new SimpleStringSchema(),
			dynamodbStreamsConsumerConfig));

	dynamodbStreams.print();

	see.execute();
}
 
Example 12
Source Project: flink   Source File: KafkaConsumer08Test.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateSourceWithoutCluster() {
	try {
		Properties props = new Properties();
		props.setProperty("zookeeper.connect", "localhost:56794");
		props.setProperty("bootstrap.servers", "localhost:11111, localhost:22222");
		props.setProperty("group.id", "non-existent-group");
		props.setProperty(FlinkKafkaConsumer08.GET_PARTITIONS_RETRIES_KEY, "1");

		FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>(
			Collections.singletonList("no op topic"), new SimpleStringSchema(), props);
		StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
		Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true);
		consumer.setRuntimeContext(mockRuntimeContext);

		consumer.open(new Configuration());

		fail();
	}
	catch (Exception e) {
		assertTrue(e.getMessage().contains("Unable to retrieve any partitions"));
	}
}
 
Example 13
Source Project: flink   Source File: FlinkKinesisProducerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConfigureWithNonSerializableCustomPartitionerFails() {
	exception.expect(IllegalArgumentException.class);
	exception.expectMessage("The provided custom partitioner is not serializable");

	new FlinkKinesisProducer<>(new SimpleStringSchema(), TestUtils.getStandardProperties())
		.setCustomPartitioner(new NonSerializableCustomPartitioner());
}
 
Example 14
Source Project: Flink-CEPplus   Source File: KafkaConsumer08Test.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAllBoostrapServerHostsAreInvalid() {
	try {
		String unknownHost = "foobar:11111";

		URL unknownHostURL = NetUtils.getCorrectHostnamePort(unknownHost);

		PowerMockito.mockStatic(InetAddress.class);
		when(InetAddress.getByName(Matchers.eq(unknownHostURL.getHost()))).thenThrow(new UnknownHostException("Test exception"));

		String zookeeperConnect = "localhost:56794";
		String groupId = "non-existent-group";
		Properties props = createKafkaProps(zookeeperConnect, unknownHost, groupId);

		FlinkKafkaConsumer08<String> consumer = new FlinkKafkaConsumer08<>(
			Collections.singletonList("no op topic"), new SimpleStringSchema(), props);
		StreamingRuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
		Mockito.when(mockRuntimeContext.isCheckpointingEnabled()).thenReturn(true);
		consumer.setRuntimeContext(mockRuntimeContext);

		consumer.open(new Configuration());

		fail();
	} catch (Exception expected) {
		assertTrue("Exception should be thrown containing 'all bootstrap servers invalid' message!",
				expected.getMessage().contains("All the servers provided in: '" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG
						+ "' config are invalid"));
	}
}
 
Example 15
Source Project: Flink-CEPplus   Source File: KafkaConsumer08Test.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAtLeastOneBootstrapServerHostIsValid() throws Exception {
	try {
		String zookeeperConnect = "localhost:56794";
		String unknownHost = "foobar:11111";
		// we declare one valid bootstrap server, namely the one with 'localhost'
		String bootstrapServers = unknownHost + ", localhost:22222";

		URL unknownHostURL = NetUtils.getCorrectHostnamePort(unknownHost);

		PowerMockito.mockStatic(InetAddress.class);
		when(InetAddress.getByName(Matchers.eq(unknownHostURL.getHost()))).thenThrow(new UnknownHostException("Test exception"));

		String groupId = "non-existent-group";
		Properties props = createKafkaProps(zookeeperConnect, bootstrapServers, groupId);
		DummyFlinkKafkaConsumer consumer = new DummyFlinkKafkaConsumer(
			"no op topic",
			new SimpleStringSchema(),
			props);
		consumer.open(new Configuration());

		// no exception should be thrown, because we have one valid bootstrap server; test passes if we reach here
	} catch (Exception e) {
		assertFalse("No exception should be thrown containing 'all bootstrap servers invalid' message!",
			e.getMessage().contains("All the servers provided in: '" + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG
				+ "' config are invalid"));
	}
}
 
Example 16
Source Project: Flink-CEPplus   Source File: FlinkKafkaProducerBaseTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the constructor eagerly checks bootstrap servers are set in config.
 */
@Test(expected = IllegalArgumentException.class)
public void testInstantiationFailsWhenBootstrapServersMissing() throws Exception {
	// no bootstrap servers set in props
	Properties props = new Properties();
	// should throw IllegalArgumentException
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
}
 
Example 17
Source Project: Flink-CEPplus   Source File: FlinkKafkaProducerBaseTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that constructor defaults to key value serializers in config to byte array deserializers if not set.
 */
@Test
public void testKeyValueDeserializersSetIfMissing() throws Exception {
	Properties props = new Properties();
	props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345");
	// should set missing key value deserializers
	new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
	assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
}
 
Example 18
Source Project: flink   Source File: FlinkKafkaProducerBaseTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that partitions list is determinate and correctly provided to custom partitioner.
 */
@SuppressWarnings("unchecked")
@Test
public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception {
	FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class);

	RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
	when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0);
	when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1);

	// out-of-order list of 4 partitions
	List<PartitionInfo> mockPartitionsList = new ArrayList<>(4);
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null));
	mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null));

	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner);
	producer.setRuntimeContext(mockRuntimeContext);

	final KafkaProducer mockProducer = producer.getMockKafkaProducer();
	when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList);
	when(mockProducer.metrics()).thenReturn(null);

	producer.open(new Configuration());
	verify(mockPartitioner, times(1)).open(0, 1);

	producer.invoke("foobar", SinkContextUtil.forTimestamp(0));
	verify(mockPartitioner, times(1)).partition(
		"foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] {0, 1, 2, 3});
}
 
Example 19
Source Project: Flink-CEPplus   Source File: FlinkKafkaProducerBaseTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example 20
Source Project: Flink-CEPplus   Source File: FlinkKafkaProducerBaseTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnCheckpoint() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.snapshot(123L, 123L);
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example 21
Source Project: Flink-CEPplus   Source File: ShardConsumerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMetricsReporting() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(
			KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard,
			new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	ShardMetricsReporter shardMetricsReporter = new ShardMetricsReporter();
	long millisBehindLatest = 500L;
	new ShardConsumer<>(
		fetcher,
		0,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, millisBehindLatest),
		shardMetricsReporter).run();

	// the millisBehindLatest metric should have been reported
	assertEquals(millisBehindLatest, shardMetricsReporter.getMillisBehindLatest());
}
 
Example 22
Source Project: Flink-CEPplus   Source File: ShardConsumerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedState() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example 23
Source Project: flink   Source File: FlinkKinesisProducerTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@SuppressWarnings("ResultOfMethodCallIgnored")
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKinesisProducer<String> producer = new DummyFlinkKinesisProducer<>(new SimpleStringSchema());

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	producer.getPendingRecordFutures().get(0).setException(new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(ExceptionUtils.findThrowableWithMessage(e, "artificial async exception").isPresent());

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example 24
Source Project: flink   Source File: ShardConsumerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedState() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example 25
Source Project: flink-learning   Source File: Main.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props)).setParallelism(1)
            .map(string -> GsonUtil.fromJson(string, Student.class)); //博客里面用的是 fastjson,这里用的是gson解析,解析字符串成 student 对象

    student.addSink(new SinkToMySQL()); //数据 sink 到 mysql

    env.execute("Flink data sink");
}
 
Example 26
Source Project: flink-learning   Source File: Main.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
        Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

        SingleOutputStreamOperator<Tuple2<String, String>> product = env.addSource(new FlinkKafkaConsumer011<>(
                parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
                new SimpleStringSchema(),
                props))
                .map(string -> GsonUtil.fromJson(string, ProductEvent.class)) //反序列化 JSON
                .flatMap(new FlatMapFunction<ProductEvent, Tuple2<String, String>>() {
                    @Override
                    public void flatMap(ProductEvent value, Collector<Tuple2<String, String>> out) throws Exception {
                        //收集商品 id 和 price 两个属性
                        out.collect(new Tuple2<>(value.getId().toString(), value.getPrice().toString()));
                    }
                });
//        product.print();

        //单个 Redis
        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost(parameterTool.get("redis.host")).build();
        product.addSink(new RedisSink<Tuple2<String, String>>(conf, new RedisSinkMapper()));

        //Redis 的 ip 信息一般都从配置文件取出来
        //Redis 集群
/*        FlinkJedisClusterConfig clusterConfig = new FlinkJedisClusterConfig.Builder()
                .setNodes(new HashSet<InetSocketAddress>(
                        Arrays.asList(new InetSocketAddress("redis1", 6379)))).build();*/

        //Redis Sentinels
/*        FlinkJedisSentinelConfig sentinelConfig = new FlinkJedisSentinelConfig.Builder()
                .setMasterName("master")
                .setSentinels(new HashSet<>(Arrays.asList("sentinel1", "sentinel2")))
                .setPassword("")
                .setDatabase(1).build();*/

        env.execute("flink redis connector");
    }
 
Example 27
Source Project: flink   Source File: FlinkKafkaProducerBaseTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if an invoke call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnInvoke() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.processElement(new StreamRecord<>("msg-2"));
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example 28
Source Project: flink   Source File: FlinkKafkaProducerBaseTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test ensuring that if a snapshot call happens right after an async exception is caught, it should be rethrown.
 */
@Test
public void testAsyncErrorRethrownOnCheckpoint() throws Throwable {
	final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(
		FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);

	OneInputStreamOperatorTestHarness<String, Object> testHarness =
		new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));

	testHarness.open();

	testHarness.processElement(new StreamRecord<>("msg-1"));

	// let the message request return an async exception
	producer.getPendingCallbacks().get(0).onCompletion(null, new Exception("artificial async exception"));

	try {
		testHarness.snapshot(123L, 123L);
	} catch (Exception e) {
		// the next invoke should rethrow the async exception
		Assert.assertTrue(e.getCause().getMessage().contains("artificial async exception"));

		// test succeeded
		return;
	}

	Assert.fail();
}
 
Example 29
public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }
 
Example 30
Source Project: flink-learning   Source File: KeyedStateDeduplication.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }