Java Code Examples for org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper

The following examples show how to use org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = RuntimeException.class)
public void testIfNoShardsAreFoundShouldThrowException() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.noShardsFoundForRequestedStreamsBehaviour());

	fetcher.runFetcher(); // this should throw RuntimeException
}
 
Example 2
Source Project: flink   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = RuntimeException.class)
public void testIfNoShardsAreFoundShouldThrowException() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.noShardsFoundForRequestedStreamsBehaviour());

	fetcher.runFetcher(); // this should throw RuntimeException
}
 
Example 3
Source Project: flink   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = RuntimeException.class)
public void testIfNoShardsAreFoundShouldThrowException() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.noShardsFoundForRequestedStreamsBehaviour());

	fetcher.runFetcher(); // this should throw RuntimeException
}
 
Example 4
Source Project: Flink-CEPplus   Source File: ShardConsumerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMetricsReporting() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(
			KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard,
			new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	ShardMetricsReporter shardMetricsReporter = new ShardMetricsReporter();
	long millisBehindLatest = 500L;
	new ShardConsumer<>(
		fetcher,
		0,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, millisBehindLatest),
		shardMetricsReporter).run();

	// the millisBehindLatest metric should have been reported
	assertEquals(millisBehindLatest, shardMetricsReporter.getMillisBehindLatest());
}
 
Example 5
Source Project: Flink-CEPplus   Source File: ShardConsumerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedState() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example 6
Source Project: flink   Source File: ShardConsumerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMetricsReporting() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(
			KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard,
			new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	ShardMetricsReporter shardMetricsReporter = new ShardMetricsReporter();
	long millisBehindLatest = 500L;
	new ShardConsumer<>(
		fetcher,
		0,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, millisBehindLatest),
		shardMetricsReporter).run();

	// the millisBehindLatest metric should have been reported
	assertEquals(millisBehindLatest, shardMetricsReporter.getMillisBehindLatest());
}
 
Example 7
Source Project: flink   Source File: ShardConsumerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedState() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example 8
Source Project: Flink-CEPplus   Source File: ShardConsumerTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedStateWithUnexpectedExpiredIterator() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		// Get a total of 1000 records with 9 getRecords() calls,
		// and the 7th getRecords() call will encounter an unexpected expired shard iterator
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCallsWithUnexpectedExpiredIterator(
			1000, 9, 7, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example 9
Source Project: Flink-CEPplus   Source File: ShardConsumerTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedStateWithAdaptiveReads() {
	Properties consumerProperties = new Properties();
	consumerProperties.put("flink.shard.adaptivereads", "true");

	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			consumerProperties,
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		// Initial number of records to fetch --> 10
		FakeKinesisBehavioursFactory.initialNumOfRecordsAfterNumOfGetRecordsCallsWithAdaptiveReads(10, 2, 500L),
		new ShardMetricsReporter()).run();

	// Avg record size for first batch --> 10 * 10 Kb/10 = 10 Kb
	// Number of records fetched in second batch --> 2 Mb/10Kb * 5 = 40
	// Total number of records = 10 + 40 = 50
	assertEquals(50, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example 10
Source Project: Flink-CEPplus   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testSkipCorruptedRecord() throws Exception {
	final String stream = "fakeStream";
	final int numShards = 3;

	final LinkedList<KinesisStreamShardState> testShardStates = new LinkedList<>();
	final TestSourceContext<String> sourceContext = new TestSourceContext<>();

	final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(
		Collections.singletonList(stream),
		sourceContext,
		TestUtils.getStandardProperties(),
		new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
		1,
		0,
		new AtomicReference<>(),
		testShardStates,
		new HashMap<>(),
		FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(Collections.singletonMap(stream, numShards)));

	// FlinkKinesisConsumer is responsible for setting up the fetcher before it can be run;
	// run the consumer until it reaches the point where the fetcher starts to run
	final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(TestUtils.getStandardProperties(), fetcher, 1, 0);

	CheckedThread consumerThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			consumer.run(new TestSourceContext<>());
		}
	};
	consumerThread.start();

	fetcher.waitUntilRun();
	consumer.cancel();
	consumerThread.sync();

	assertEquals(numShards, testShardStates.size());

	for (int i = 0; i < numShards; i++) {
		fetcher.emitRecordAndUpdateState("record-" + i, 10L, i, new SequenceNumber("seq-num-1"));
		assertEquals(new SequenceNumber("seq-num-1"), testShardStates.get(i).getLastProcessedSequenceNum());
		assertEquals(new StreamRecord<>("record-" + i, 10L), sourceContext.removeLatestOutput());
	}

	// emitting a null (i.e., a corrupt record) should not produce any output, but still have the shard state updated
	fetcher.emitRecordAndUpdateState(null, 10L, 1, new SequenceNumber("seq-num-2"));
		assertEquals(new SequenceNumber("seq-num-2"), testShardStates.get(1).getLastProcessedSequenceNum());
	assertEquals(null, sourceContext.removeLatestOutput()); // no output should have been collected
}
 
Example 11
Source Project: Flink-CEPplus   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNotRestoringFromFailure() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");
	fakeStreams.add("fakeStream3");
	fakeStreams.add("fakeStream4");

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	Map<String, Integer> streamToShardCount = new HashMap<>();
	Random rand = new Random();
	for (String fakeStream : fakeStreams) {
		streamToShardCount.put(fakeStream, rand.nextInt(5) + 1);
	}

	final TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));

	final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(
			TestUtils.getStandardProperties(), fetcher, 1, 0);

	CheckedThread consumerThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			consumer.run(new TestSourceContext<>());
		}
	};
	consumerThread.start();

	fetcher.waitUntilRun();
	consumer.cancel();
	consumerThread.sync();

	// assert that the streams tracked in the state are identical to the subscribed streams
	Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
	assertEquals(fakeStreams.size(), streamsInState.size());
	assertTrue(streamsInState.containsAll(fakeStreams));

	// assert that the last seen shards in state is correctly set
	for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
		assertEquals(
			KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1),
			streamToLastSeenShard.getValue());
	}
}
 
Example 12
Source Project: Flink-CEPplus   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpoint() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");

	Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();

	// fakeStream1 has 3 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
		UUID.randomUUID().toString());

	// fakeStream2 has 2 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());

	Map<String, Integer> streamToShardCount = new HashMap<>();
	streamToShardCount.put("fakeStream1", 3); // fakeStream1 will still have 3 shards after restore
	streamToShardCount.put("fakeStream2", 2); // fakeStream2 will still have 2 shards after restore

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	final TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));

	for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
		fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
		fetcher.registerNewSubscribedShardState(
			new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()),
				restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
	}

	CheckedThread runFetcherThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			fetcher.runFetcher();
		}
	};
	runFetcherThread.start();

	fetcher.waitUntilInitialDiscovery();
	fetcher.shutdownFetcher();
	runFetcherThread.sync();

	// assert that the streams tracked in the state are identical to the subscribed streams
	Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
	assertEquals(fakeStreams.size(), streamsInState.size());
	assertTrue(streamsInState.containsAll(fakeStreams));

	// assert that the last seen shards in state is correctly set
	for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
		assertEquals(
			KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1),
			streamToLastSeenShard.getValue());
	}
}
 
Example 13
Source Project: Flink-CEPplus   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpoint() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");

	Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();

	// fakeStream1 has 3 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
		UUID.randomUUID().toString());

	// fakeStream2 has 2 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());

	Map<String, Integer> streamToShardCount = new HashMap<>();
	streamToShardCount.put("fakeStream1", 3 + 1); // fakeStream1 had 3 shards before & 1 new shard after restore
	streamToShardCount.put("fakeStream2", 2 + 3); // fakeStream2 had 2 shards before & 3 new shard after restore

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	// using a non-resharded streams kinesis behaviour to represent that Kinesis is not resharded AFTER the restore
	final TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));

	for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
		fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
		fetcher.registerNewSubscribedShardState(
			new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()),
				restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
	}

	CheckedThread runFetcherThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			fetcher.runFetcher();
		}
	};
	runFetcherThread.start();

	fetcher.waitUntilInitialDiscovery();
	fetcher.shutdownFetcher();
	runFetcherThread.sync();

	// assert that the streams tracked in the state are identical to the subscribed streams
	Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
	assertEquals(fakeStreams.size(), streamsInState.size());
	assertTrue(streamsInState.containsAll(fakeStreams));

	// assert that the last seen shards in state is correctly set
	for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
		assertEquals(
			KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1),
			streamToLastSeenShard.getValue());
	}
}
 
Example 14
Source Project: Flink-CEPplus   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");
	fakeStreams.add("fakeStream3"); // fakeStream3 will not have any shards
	fakeStreams.add("fakeStream4"); // fakeStream4 will not have any shards

	Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();

	// fakeStream1 has 3 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
		UUID.randomUUID().toString());

	// fakeStream2 has 2 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());

	Map<String, Integer> streamToShardCount = new HashMap<>();
	streamToShardCount.put("fakeStream1", 3); // fakeStream1 has fixed 3 shards
	streamToShardCount.put("fakeStream2", 2); // fakeStream2 has fixed 2 shards
	streamToShardCount.put("fakeStream3", 0); // no shards can be found for fakeStream3
	streamToShardCount.put("fakeStream4", 0); // no shards can be found for fakeStream4

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	// using a non-resharded streams kinesis behaviour to represent that Kinesis is not resharded AFTER the restore
	final TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));

	for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
		fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
		fetcher.registerNewSubscribedShardState(
			new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()),
				restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
	}

	CheckedThread runFetcherThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			fetcher.runFetcher();
		}
	};
	runFetcherThread.start();

	fetcher.waitUntilInitialDiscovery();
	fetcher.shutdownFetcher();
	runFetcherThread.sync();

	// assert that the streams tracked in the state are identical to the subscribed streams
	Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
	assertEquals(fakeStreams.size(), streamsInState.size());
	assertTrue(streamsInState.containsAll(fakeStreams));

	// assert that the last seen shards in state is correctly set
	assertEquals(
		KinesisShardIdGenerator.generateFromShardOrder(2),
		subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1"));
	assertEquals(
		KinesisShardIdGenerator.generateFromShardOrder(1),
		subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2"));
	assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3"));
	assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4"));
}
 
Example 15
Source Project: Flink-CEPplus   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");
	fakeStreams.add("fakeStream3"); // fakeStream3 will not have any shards
	fakeStreams.add("fakeStream4"); // fakeStream4 will not have any shards

	Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();

	// fakeStream1 has 3 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
		UUID.randomUUID().toString());

	// fakeStream2 has 2 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());

	Map<String, Integer> streamToShardCount = new HashMap<>();
	streamToShardCount.put("fakeStream1", 3 + 1); // fakeStream1 had 3 shards before & 1 new shard after restore
	streamToShardCount.put("fakeStream2", 2 + 3); // fakeStream2 had 2 shards before & 2 new shard after restore
	streamToShardCount.put("fakeStream3", 0); // no shards can be found for fakeStream3
	streamToShardCount.put("fakeStream4", 0); // no shards can be found for fakeStream4

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	// using a non-resharded streams kinesis behaviour to represent that Kinesis is not resharded AFTER the restore
	final TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));

	for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
		fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
		fetcher.registerNewSubscribedShardState(
			new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()),
				restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
	}

	CheckedThread runFetcherThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			fetcher.runFetcher();
		}
	};
	runFetcherThread.start();

	fetcher.waitUntilInitialDiscovery();
	fetcher.shutdownFetcher();
	runFetcherThread.sync();

	// assert that the streams tracked in the state are identical to the subscribed streams
	Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
	assertEquals(fakeStreams.size(), streamsInState.size());
	assertTrue(streamsInState.containsAll(fakeStreams));

	// assert that the last seen shards in state is correctly set
	assertEquals(
		KinesisShardIdGenerator.generateFromShardOrder(3),
		subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1"));
	assertEquals(
		KinesisShardIdGenerator.generateFromShardOrder(4),
		subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2"));
	assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3"));
	assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4"));
}
 
Example 16
Source Project: Flink-CEPplus   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testShardToSubtaskMappingWithCustomHashFunction() throws Exception {

	int totalCountOfSubtasks = 10;
	int shardCount = 3;

	for (int i = 0; i < 2; i++) {

		final int hash = i;
		final KinesisShardAssigner allShardsSingleSubtaskFn = (shard, subtasks) -> hash;
		Map<String, Integer> streamToShardCount = new HashMap<>();
		List<String> fakeStreams = new LinkedList<>();
		fakeStreams.add("fakeStream");
		streamToShardCount.put("fakeStream", shardCount);

		for (int j = 0; j < totalCountOfSubtasks; j++) {

			int subtaskIndex = j;
			// subscribe with default hashing
			final TestableKinesisDataFetcher fetcher =
				new TestableKinesisDataFetcher(
					fakeStreams,
					new TestSourceContext<>(),
					new Properties(),
					new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
					totalCountOfSubtasks,
					subtaskIndex,
					new AtomicReference<>(),
					new LinkedList<>(),
					KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams),
					FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
			Whitebox.setInternalState(fetcher, "shardAssigner", allShardsSingleSubtaskFn); // override hashing
			List<StreamShardHandle> shards = fetcher.discoverNewShardsToSubscribe();
			fetcher.shutdownFetcher();

			String msg = String.format("for hash=%d, subtask=%d", hash, subtaskIndex);
			if (j == i) {
				assertEquals(msg, shardCount, shards.size());
			} else {
				assertEquals(msg, 0, shards.size());
			}
		}

	}

}
 
Example 17
Source Project: Flink-CEPplus   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testPeriodicWatermark() {
	final MutableLong clock = new MutableLong();
	final MutableBoolean isTemporaryIdle = new MutableBoolean();
	final List<Watermark> watermarks = new ArrayList<>();

	String fakeStream1 = "fakeStream1";
	StreamShardHandle shardHandle =
		new StreamShardHandle(
			fakeStream1,
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)));

	TestSourceContext<String> sourceContext =
		new TestSourceContext<String>() {
			@Override
			public void emitWatermark(Watermark mark) {
				watermarks.add(mark);
			}

			@Override
			public void markAsTemporarilyIdle() {
				isTemporaryIdle.setTrue();
			}
		};

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = new HashMap<>();

	final KinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<String>(
			Collections.singletonList(fakeStream1),
			sourceContext,
			new java.util.Properties(),
			new KinesisDeserializationSchemaWrapper<>(new org.apache.flink.streaming.util.serialization.SimpleStringSchema()),
			1,
			1,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(new HashMap<>())) {

			@Override
			protected long getCurrentTimeMillis() {
				return clock.getValue();
			}
		};
	Whitebox.setInternalState(fetcher, "periodicWatermarkAssigner", watermarkAssigner);

	SequenceNumber seq = new SequenceNumber("fakeSequenceNumber");
	// register shards to subsequently emit records
	int shardIndex =
		fetcher.registerNewSubscribedShardState(
			new KinesisStreamShardState(
				KinesisDataFetcher.convertToStreamShardMetadata(shardHandle), shardHandle, seq));

	StreamRecord<String> record1 =
		new StreamRecord<>(String.valueOf(Long.MIN_VALUE), Long.MIN_VALUE);
	fetcher.emitRecordAndUpdateState(record1.getValue(), record1.getTimestamp(), shardIndex, seq);
	Assert.assertEquals(record1, sourceContext.getCollectedOutputs().poll());

	fetcher.emitWatermark();
	Assert.assertTrue("potential watermark equals previous watermark", watermarks.isEmpty());

	StreamRecord<String> record2 = new StreamRecord<>(String.valueOf(1), 1);
	fetcher.emitRecordAndUpdateState(record2.getValue(), record2.getTimestamp(), shardIndex, seq);
	Assert.assertEquals(record2, sourceContext.getCollectedOutputs().poll());

	fetcher.emitWatermark();
	Assert.assertFalse("watermark advanced", watermarks.isEmpty());
	Assert.assertEquals(new Watermark(record2.getTimestamp()), watermarks.remove(0));
	Assert.assertFalse("not idle", isTemporaryIdle.booleanValue());

	// test idle timeout
	long idleTimeout = 10;
	// advance clock idleTimeout
	clock.add(idleTimeout + 1);
	fetcher.emitWatermark();
	Assert.assertFalse("not idle", isTemporaryIdle.booleanValue());
	Assert.assertTrue("not idle, no new watermark", watermarks.isEmpty());

	// activate idle timeout
	Whitebox.setInternalState(fetcher, "shardIdleIntervalMillis", idleTimeout);
	fetcher.emitWatermark();
	Assert.assertTrue("idle", isTemporaryIdle.booleanValue());
	Assert.assertTrue("idle, no watermark", watermarks.isEmpty());
}
 
Example 18
@Test
public void testRestoreWithEmptyState() throws Exception {
	final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
	for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
		Shard shard = new Shard();
		shard.setShardId(shardMetadata.getShardId());

		SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
		sequenceNumberRange.withStartingSequenceNumber("1");
		shard.setSequenceNumberRange(sequenceNumberRange);

		initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
	}

	final TestFetcher<String> fetcher = new TestFetcher<>(
		Collections.singletonList(TEST_STREAM_NAME),
		new TestSourceContext<>(),
		new TestRuntimeContext(true, 1, 0),
		TestUtils.getStandardProperties(),
		new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
		null,
		initialDiscoveryShards);

	final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(
		fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));

	StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);

	final AbstractStreamOperatorTestHarness<String> testHarness =
		new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);

	testHarness.setup();
	testHarness.initializeState(
		OperatorSnapshotUtil.getResourceFilename(
			"kinesis-consumer-migration-test-flink" + testMigrateVersion + "-empty-snapshot"));
	testHarness.open();

	consumerFunction.run(new TestSourceContext<>());

	// assert that no state was restored
	assertTrue(consumerFunction.getRestoredState().isEmpty());

	// although the restore state is empty, the fetcher should still have been registered the initial discovered shard;
	// furthermore, the discovered shard should be considered a newly created shard while the job wasn't running,
	// and therefore should be consumed from the earliest sequence number
	KinesisStreamShardState restoredShardState = fetcher.getSubscribedShardsState().get(0);
	assertEquals(TEST_STREAM_NAME, restoredShardState.getStreamShardHandle().getStreamName());
	assertEquals(TEST_SHARD_ID, restoredShardState.getStreamShardHandle().getShard().getShardId());
	assertFalse(restoredShardState.getStreamShardHandle().isClosed());
	assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredShardState.getLastProcessedSequenceNum());

	consumerOperator.close();
	consumerOperator.cancel();
}
 
Example 19
@Test
public void testRestore() throws Exception {
	final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
	for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
		Shard shard = new Shard();
		shard.setShardId(shardMetadata.getShardId());

		SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
		sequenceNumberRange.withStartingSequenceNumber("1");
		shard.setSequenceNumberRange(sequenceNumberRange);

		initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
	}

	final TestFetcher<String> fetcher = new TestFetcher<>(
		Collections.singletonList(TEST_STREAM_NAME),
		new TestSourceContext<>(),
		new TestRuntimeContext(true, 1, 0),
		TestUtils.getStandardProperties(),
		new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
		null,
		initialDiscoveryShards);

	final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(
		fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));

	StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator =
		new StreamSource<>(consumerFunction);

	final AbstractStreamOperatorTestHarness<String> testHarness =
		new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);

	testHarness.setup();
	testHarness.initializeState(
		OperatorSnapshotUtil.getResourceFilename(
			"kinesis-consumer-migration-test-flink" + testMigrateVersion + "-snapshot"));
	testHarness.open();

	consumerFunction.run(new TestSourceContext<>());

	// assert that state is correctly restored
	assertNotEquals(null, consumerFunction.getRestoredState());
	assertEquals(1, consumerFunction.getRestoredState().size());
	assertEquals(TEST_STATE, removeEquivalenceWrappers(consumerFunction.getRestoredState()));
	assertEquals(1, fetcher.getSubscribedShardsState().size());
	assertEquals(TEST_SEQUENCE_NUMBER, fetcher.getSubscribedShardsState().get(0).getLastProcessedSequenceNum());

	KinesisStreamShardState restoredShardState = fetcher.getSubscribedShardsState().get(0);
	assertEquals(TEST_STREAM_NAME, restoredShardState.getStreamShardHandle().getStreamName());
	assertEquals(TEST_SHARD_ID, restoredShardState.getStreamShardHandle().getShard().getShardId());
	assertFalse(restoredShardState.getStreamShardHandle().isClosed());
	assertEquals(TEST_SEQUENCE_NUMBER, restoredShardState.getLastProcessedSequenceNum());

	consumerOperator.close();
	consumerOperator.cancel();
}
 
Example 20
@Test
public void testRestoreWithReshardedStream() throws Exception {
	final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
	for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
		// setup the closed shard
		Shard closedShard = new Shard();
		closedShard.setShardId(shardMetadata.getShardId());

		SequenceNumberRange closedSequenceNumberRange = new SequenceNumberRange();
		closedSequenceNumberRange.withStartingSequenceNumber("1");
		closedSequenceNumberRange.withEndingSequenceNumber("1087654321"); // this represents a closed shard
		closedShard.setSequenceNumberRange(closedSequenceNumberRange);

		initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), closedShard));

		// setup the new shards
		Shard newSplitShard1 = new Shard();
		newSplitShard1.setShardId(KinesisShardIdGenerator.generateFromShardOrder(1));

		SequenceNumberRange newSequenceNumberRange1 = new SequenceNumberRange();
		newSequenceNumberRange1.withStartingSequenceNumber("1087654322");
		newSplitShard1.setSequenceNumberRange(newSequenceNumberRange1);

		newSplitShard1.setParentShardId(TEST_SHARD_ID);

		Shard newSplitShard2 = new Shard();
		newSplitShard2.setShardId(KinesisShardIdGenerator.generateFromShardOrder(2));

		SequenceNumberRange newSequenceNumberRange2 = new SequenceNumberRange();
		newSequenceNumberRange2.withStartingSequenceNumber("2087654322");
		newSplitShard2.setSequenceNumberRange(newSequenceNumberRange2);

		newSplitShard2.setParentShardId(TEST_SHARD_ID);

		initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), newSplitShard1));
		initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), newSplitShard2));
	}

	final TestFetcher<String> fetcher = new TestFetcher<>(
		Collections.singletonList(TEST_STREAM_NAME),
		new TestSourceContext<>(),
		new TestRuntimeContext(true, 1, 0),
		TestUtils.getStandardProperties(),
		new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
		null,
		initialDiscoveryShards);

	final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(
		fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));

	StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator =
		new StreamSource<>(consumerFunction);

	final AbstractStreamOperatorTestHarness<String> testHarness =
		new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);

	testHarness.setup();
	testHarness.initializeState(
		OperatorSnapshotUtil.getResourceFilename(
			"kinesis-consumer-migration-test-flink" + testMigrateVersion + "-snapshot"));
	testHarness.open();

	consumerFunction.run(new TestSourceContext<>());

	// assert that state is correctly restored
	assertNotEquals(null, consumerFunction.getRestoredState());
	assertEquals(1, consumerFunction.getRestoredState().size());
	assertEquals(TEST_STATE, removeEquivalenceWrappers(consumerFunction.getRestoredState()));

	// assert that the fetcher is registered with all shards, including new shards
	assertEquals(3, fetcher.getSubscribedShardsState().size());

	KinesisStreamShardState restoredClosedShardState = fetcher.getSubscribedShardsState().get(0);
	assertEquals(TEST_STREAM_NAME, restoredClosedShardState.getStreamShardHandle().getStreamName());
	assertEquals(TEST_SHARD_ID, restoredClosedShardState.getStreamShardHandle().getShard().getShardId());
	assertTrue(restoredClosedShardState.getStreamShardHandle().isClosed());
	assertEquals(TEST_SEQUENCE_NUMBER, restoredClosedShardState.getLastProcessedSequenceNum());

	KinesisStreamShardState restoredNewSplitShard1 = fetcher.getSubscribedShardsState().get(1);
	assertEquals(TEST_STREAM_NAME, restoredNewSplitShard1.getStreamShardHandle().getStreamName());
	assertEquals(KinesisShardIdGenerator.generateFromShardOrder(1), restoredNewSplitShard1.getStreamShardHandle().getShard().getShardId());
	assertFalse(restoredNewSplitShard1.getStreamShardHandle().isClosed());
	// new shards should be consumed from the beginning
	assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredNewSplitShard1.getLastProcessedSequenceNum());

	KinesisStreamShardState restoredNewSplitShard2 = fetcher.getSubscribedShardsState().get(2);
	assertEquals(TEST_STREAM_NAME, restoredNewSplitShard2.getStreamShardHandle().getStreamName());
	assertEquals(KinesisShardIdGenerator.generateFromShardOrder(2), restoredNewSplitShard2.getStreamShardHandle().getShard().getShardId());
	assertFalse(restoredNewSplitShard2.getStreamShardHandle().isClosed());
	// new shards should be consumed from the beginning
	assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredNewSplitShard2.getLastProcessedSequenceNum());

	consumerOperator.close();
	consumerOperator.cancel();
}
 
Example 21
@SuppressWarnings("unchecked")
private void writeSnapshot(String path, HashMap<StreamShardMetadata, SequenceNumber> state) throws Exception {
	final TestFetcher<String> fetcher = new TestFetcher<>(
		Collections.singletonList(TEST_STREAM_NAME),
		new TestSourceContext<>(),
		new TestRuntimeContext(true, 1, 0),
		new Properties(),
		new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
		state,
		null);

	final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(
		fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));

	StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumer);

	final AbstractStreamOperatorTestHarness<String> testHarness =
			new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);

	testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);

	testHarness.setup();
	testHarness.open();

	final AtomicReference<Throwable> error = new AtomicReference<>();

	// run the source asynchronously
	Thread runner = new Thread() {
		@Override
		public void run() {
			try {
				consumer.run(new TestSourceContext<>());
			} catch (Throwable t) {
				t.printStackTrace();
				error.set(t);
			}
		}
	};
	runner.start();

	fetcher.waitUntilRun();

	final OperatorSubtaskState snapshot;
	synchronized (testHarness.getCheckpointLock()) {
		snapshot = testHarness.snapshot(0L, 0L);
	}

	OperatorSnapshotUtil.writeStateHandle(snapshot, path);

	consumerOperator.close();
	runner.join();
}
 
Example 22
Source Project: flink   Source File: ShardConsumerTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedStateWithUnexpectedExpiredIterator() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		// Get a total of 1000 records with 9 getRecords() calls,
		// and the 7th getRecords() call will encounter an unexpected expired shard iterator
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCallsWithUnexpectedExpiredIterator(
			1000, 9, 7, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example 23
Source Project: flink   Source File: ShardConsumerTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedStateWithAdaptiveReads() {
	Properties consumerProperties = new Properties();
	consumerProperties.put("flink.shard.adaptivereads", "true");

	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			consumerProperties,
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		// Initial number of records to fetch --> 10
		FakeKinesisBehavioursFactory.initialNumOfRecordsAfterNumOfGetRecordsCallsWithAdaptiveReads(10, 2, 500L),
		new ShardMetricsReporter()).run();

	// Avg record size for first batch --> 10 * 10 Kb/10 = 10 Kb
	// Number of records fetched in second batch --> 2 Mb/10Kb * 5 = 40
	// Total number of records = 10 + 40 = 50
	assertEquals(50, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example 24
Source Project: flink   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testSkipCorruptedRecord() throws Exception {
	final String stream = "fakeStream";
	final int numShards = 3;

	final LinkedList<KinesisStreamShardState> testShardStates = new LinkedList<>();
	final TestSourceContext<String> sourceContext = new TestSourceContext<>();

	final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(
		Collections.singletonList(stream),
		sourceContext,
		TestUtils.getStandardProperties(),
		new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
		1,
		0,
		new AtomicReference<>(),
		testShardStates,
		new HashMap<>(),
		FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(Collections.singletonMap(stream, numShards)));

	// FlinkKinesisConsumer is responsible for setting up the fetcher before it can be run;
	// run the consumer until it reaches the point where the fetcher starts to run
	final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(TestUtils.getStandardProperties(), fetcher, 1, 0);

	CheckedThread consumerThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			consumer.run(new TestSourceContext<>());
		}
	};
	consumerThread.start();

	fetcher.waitUntilRun();
	consumer.cancel();
	consumerThread.sync();

	assertEquals(numShards, testShardStates.size());

	for (int i = 0; i < numShards; i++) {
		fetcher.emitRecordAndUpdateState("record-" + i, 10L, i, new SequenceNumber("seq-num-1"));
		assertEquals(new SequenceNumber("seq-num-1"), testShardStates.get(i).getLastProcessedSequenceNum());
		assertEquals(new StreamRecord<>("record-" + i, 10L), sourceContext.removeLatestOutput());
	}

	// emitting a null (i.e., a corrupt record) should not produce any output, but still have the shard state updated
	fetcher.emitRecordAndUpdateState(null, 10L, 1, new SequenceNumber("seq-num-2"));
		assertEquals(new SequenceNumber("seq-num-2"), testShardStates.get(1).getLastProcessedSequenceNum());
	assertEquals(null, sourceContext.removeLatestOutput()); // no output should have been collected
}
 
Example 25
Source Project: flink   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNotRestoringFromFailure() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");
	fakeStreams.add("fakeStream3");
	fakeStreams.add("fakeStream4");

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	Map<String, Integer> streamToShardCount = new HashMap<>();
	Random rand = new Random();
	for (String fakeStream : fakeStreams) {
		streamToShardCount.put(fakeStream, rand.nextInt(5) + 1);
	}

	final TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));

	final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(
			TestUtils.getStandardProperties(), fetcher, 1, 0);

	CheckedThread consumerThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			consumer.run(new TestSourceContext<>());
		}
	};
	consumerThread.start();

	fetcher.waitUntilRun();
	consumer.cancel();
	consumerThread.sync();

	// assert that the streams tracked in the state are identical to the subscribed streams
	Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
	assertEquals(fakeStreams.size(), streamsInState.size());
	assertTrue(streamsInState.containsAll(fakeStreams));

	// assert that the last seen shards in state is correctly set
	for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
		assertEquals(
			KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1),
			streamToLastSeenShard.getValue());
	}
}
 
Example 26
Source Project: flink   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpoint() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");

	Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();

	// fakeStream1 has 3 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
		UUID.randomUUID().toString());

	// fakeStream2 has 2 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());

	Map<String, Integer> streamToShardCount = new HashMap<>();
	streamToShardCount.put("fakeStream1", 3); // fakeStream1 will still have 3 shards after restore
	streamToShardCount.put("fakeStream2", 2); // fakeStream2 will still have 2 shards after restore

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	final TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));

	for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
		fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
		fetcher.registerNewSubscribedShardState(
			new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()),
				restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
	}

	CheckedThread runFetcherThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			fetcher.runFetcher();
		}
	};
	runFetcherThread.start();

	fetcher.waitUntilInitialDiscovery();
	fetcher.shutdownFetcher();
	runFetcherThread.sync();

	// assert that the streams tracked in the state are identical to the subscribed streams
	Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
	assertEquals(fakeStreams.size(), streamsInState.size());
	assertTrue(streamsInState.containsAll(fakeStreams));

	// assert that the last seen shards in state is correctly set
	for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
		assertEquals(
			KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1),
			streamToLastSeenShard.getValue());
	}
}
 
Example 27
Source Project: flink   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpoint() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");

	Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();

	// fakeStream1 has 3 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
		UUID.randomUUID().toString());

	// fakeStream2 has 2 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());

	Map<String, Integer> streamToShardCount = new HashMap<>();
	streamToShardCount.put("fakeStream1", 3 + 1); // fakeStream1 had 3 shards before & 1 new shard after restore
	streamToShardCount.put("fakeStream2", 2 + 3); // fakeStream2 had 2 shards before & 3 new shard after restore

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	// using a non-resharded streams kinesis behaviour to represent that Kinesis is not resharded AFTER the restore
	final TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));

	for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
		fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
		fetcher.registerNewSubscribedShardState(
			new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()),
				restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
	}

	CheckedThread runFetcherThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			fetcher.runFetcher();
		}
	};
	runFetcherThread.start();

	fetcher.waitUntilInitialDiscovery();
	fetcher.shutdownFetcher();
	runFetcherThread.sync();

	// assert that the streams tracked in the state are identical to the subscribed streams
	Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
	assertEquals(fakeStreams.size(), streamsInState.size());
	assertTrue(streamsInState.containsAll(fakeStreams));

	// assert that the last seen shards in state is correctly set
	for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
		assertEquals(
			KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1),
			streamToLastSeenShard.getValue());
	}
}
 
Example 28
Source Project: flink   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");
	fakeStreams.add("fakeStream3"); // fakeStream3 will not have any shards
	fakeStreams.add("fakeStream4"); // fakeStream4 will not have any shards

	Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();

	// fakeStream1 has 3 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
		UUID.randomUUID().toString());

	// fakeStream2 has 2 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());

	Map<String, Integer> streamToShardCount = new HashMap<>();
	streamToShardCount.put("fakeStream1", 3); // fakeStream1 has fixed 3 shards
	streamToShardCount.put("fakeStream2", 2); // fakeStream2 has fixed 2 shards
	streamToShardCount.put("fakeStream3", 0); // no shards can be found for fakeStream3
	streamToShardCount.put("fakeStream4", 0); // no shards can be found for fakeStream4

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	// using a non-resharded streams kinesis behaviour to represent that Kinesis is not resharded AFTER the restore
	final TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));

	for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
		fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
		fetcher.registerNewSubscribedShardState(
			new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()),
				restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
	}

	CheckedThread runFetcherThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			fetcher.runFetcher();
		}
	};
	runFetcherThread.start();

	fetcher.waitUntilInitialDiscovery();
	fetcher.shutdownFetcher();
	runFetcherThread.sync();

	// assert that the streams tracked in the state are identical to the subscribed streams
	Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
	assertEquals(fakeStreams.size(), streamsInState.size());
	assertTrue(streamsInState.containsAll(fakeStreams));

	// assert that the last seen shards in state is correctly set
	assertEquals(
		KinesisShardIdGenerator.generateFromShardOrder(2),
		subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1"));
	assertEquals(
		KinesisShardIdGenerator.generateFromShardOrder(1),
		subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2"));
	assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3"));
	assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4"));
}
 
Example 29
Source Project: flink   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
	List<String> fakeStreams = new LinkedList<>();
	fakeStreams.add("fakeStream1");
	fakeStreams.add("fakeStream2");
	fakeStreams.add("fakeStream3"); // fakeStream3 will not have any shards
	fakeStreams.add("fakeStream4"); // fakeStream4 will not have any shards

	Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();

	// fakeStream1 has 3 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream1",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
		UUID.randomUUID().toString());

	// fakeStream2 has 2 shards before restore
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
		UUID.randomUUID().toString());
	restoredStateUnderTest.put(
		new StreamShardHandle(
			"fakeStream2",
			new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
		UUID.randomUUID().toString());

	Map<String, Integer> streamToShardCount = new HashMap<>();
	streamToShardCount.put("fakeStream1", 3 + 1); // fakeStream1 had 3 shards before & 1 new shard after restore
	streamToShardCount.put("fakeStream2", 2 + 3); // fakeStream2 had 2 shards before & 2 new shard after restore
	streamToShardCount.put("fakeStream3", 0); // no shards can be found for fakeStream3
	streamToShardCount.put("fakeStream4", 0); // no shards can be found for fakeStream4

	HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest =
		KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);

	// using a non-resharded streams kinesis behaviour to represent that Kinesis is not resharded AFTER the restore
	final TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			fakeStreams,
			new TestSourceContext<>(),
			TestUtils.getStandardProperties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			new LinkedList<>(),
			subscribedStreamsToLastSeenShardIdsUnderTest,
			FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));

	for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
		fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
		fetcher.registerNewSubscribedShardState(
			new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()),
				restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
	}

	CheckedThread runFetcherThread = new CheckedThread() {
		@Override
		public void go() throws Exception {
			fetcher.runFetcher();
		}
	};
	runFetcherThread.start();

	fetcher.waitUntilInitialDiscovery();
	fetcher.shutdownFetcher();
	runFetcherThread.sync();

	// assert that the streams tracked in the state are identical to the subscribed streams
	Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
	assertEquals(fakeStreams.size(), streamsInState.size());
	assertTrue(streamsInState.containsAll(fakeStreams));

	// assert that the last seen shards in state is correctly set
	assertEquals(
		KinesisShardIdGenerator.generateFromShardOrder(3),
		subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1"));
	assertEquals(
		KinesisShardIdGenerator.generateFromShardOrder(4),
		subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2"));
	assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3"));
	assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4"));
}
 
Example 30
Source Project: flink   Source File: KinesisDataFetcherTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testShardToSubtaskMappingWithCustomHashFunction() throws Exception {

	int totalCountOfSubtasks = 10;
	int shardCount = 3;

	for (int i = 0; i < 2; i++) {

		final int hash = i;
		final KinesisShardAssigner allShardsSingleSubtaskFn = (shard, subtasks) -> hash;
		Map<String, Integer> streamToShardCount = new HashMap<>();
		List<String> fakeStreams = new LinkedList<>();
		fakeStreams.add("fakeStream");
		streamToShardCount.put("fakeStream", shardCount);

		for (int j = 0; j < totalCountOfSubtasks; j++) {

			int subtaskIndex = j;
			// subscribe with default hashing
			final TestableKinesisDataFetcher fetcher =
				new TestableKinesisDataFetcher(
					fakeStreams,
					new TestSourceContext<>(),
					new Properties(),
					new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
					totalCountOfSubtasks,
					subtaskIndex,
					new AtomicReference<>(),
					new LinkedList<>(),
					KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams),
					FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
			Whitebox.setInternalState(fetcher, "shardAssigner", allShardsSingleSubtaskFn); // override hashing
			List<StreamShardHandle> shards = fetcher.discoverNewShardsToSubscribe();
			fetcher.shutdownFetcher();

			String msg = String.format("for hash=%d, subtask=%d", hash, subtaskIndex);
			if (j == i) {
				assertEquals(msg, shardCount, shards.size());
			} else {
				assertEquals(msg, 0, shards.size());
			}
		}

	}

}