org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber Java Examples

The following examples show how to use org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KinesisDataFetcher.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Update the shard to last processed sequence number state.
 * This method is called by {@link ShardConsumer}s.
 *
 * @param shardStateIndex index of the shard to update in subscribedShardsState;
 *                        this index should be the returned value from
 *                        {@link KinesisDataFetcher#registerNewSubscribedShardState(KinesisStreamShardState)}, called
 *                        when the shard state was registered.
 * @param lastSequenceNumber the last sequence number value to update
 */
protected final void updateState(int shardStateIndex, SequenceNumber lastSequenceNumber) {
	synchronized (checkpointLock) {
		subscribedShardsState.get(shardStateIndex).setLastProcessedSequenceNum(lastSequenceNumber);

		// if a shard's state is updated to be SENTINEL_SHARD_ENDING_SEQUENCE_NUM by its consumer thread,
		// we've finished reading the shard and should determine it to be non-active
		if (lastSequenceNumber.equals(SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get())) {
			LOG.info("Subtask {} has reached the end of subscribed shard: {}",
				indexOfThisConsumerSubtask, subscribedShardsState.get(shardStateIndex).getStreamShardHandle());

			// check if we need to mark the source as idle;
			// note that on resharding, if registerNewSubscribedShardState was invoked for newly discovered shards
			// AFTER the old shards had reached the end, the subtask's status will be automatically toggled back to
			// be active immediately afterwards as soon as we collect records from the new shards
			if (this.numberOfActiveShards.decrementAndGet() == 0) {
				LOG.info("Subtask {} has reached the end of all currently subscribed shards; marking the subtask as temporarily idle ...",
					indexOfThisConsumerSubtask);

				sourceContext.markAsTemporarilyIdle();
			}
		}
	}
}
 
Example #2
Source File: DynamoDBStreamsDataFetcher.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Create a new DynamoDB streams shard consumer.
 *
 * @param subscribedShardStateIndex the state index of the shard this consumer is subscribed to
 * @param handle stream handle
 * @param lastSeqNum last sequence number
 * @param shardMetricsReporter the reporter to report metrics to
 * @return
 */
@Override
protected ShardConsumer createShardConsumer(
	Integer subscribedShardStateIndex,
	StreamShardHandle handle,
	SequenceNumber lastSeqNum,
	ShardMetricsReporter shardMetricsReporter) {

	return new ShardConsumer(
		this,
		subscribedShardStateIndex,
		handle,
		lastSeqNum,
		DynamoDBStreamsProxy.create(getConsumerConfiguration()),
		shardMetricsReporter);
}
 
Example #3
Source File: ShardConsumer.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
protected String getShardIteratorForRealSequenceNumber(SequenceNumber sequenceNumber)
		throws Exception {

	// if the last sequence number refers to an aggregated record, we need to clean up any dangling sub-records
	// from the last aggregated record; otherwise, we can simply start iterating from the record right after.

	if (sequenceNumber.isAggregated()) {
		return getShardIteratorForAggregatedSequenceNumber(sequenceNumber);
	} else {
		// the last record was non-aggregated, so we can simply start from the next record
		return kinesis.getShardIterator(
				subscribedShard,
				ShardIteratorType.AFTER_SEQUENCE_NUMBER.toString(),
				sequenceNumber.getSequenceNumber());
	}
}
 
Example #4
Source File: ShardConsumer.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
protected String getShardIteratorForSentinel(SequenceNumber sentinelSequenceNumber) throws InterruptedException {
	String nextShardItr;

	if (sentinelSequenceNumber.equals(SentinelSequenceNumber.SENTINEL_LATEST_SEQUENCE_NUM.get())) {
		// if the shard is already closed, there will be no latest next record to get for this shard
		if (subscribedShard.isClosed()) {
			nextShardItr = null;
		} else {
			nextShardItr = kinesis.getShardIterator(subscribedShard, ShardIteratorType.LATEST.toString(), null);
		}
	} else if (sentinelSequenceNumber.equals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get())) {
		nextShardItr = kinesis.getShardIterator(subscribedShard, ShardIteratorType.TRIM_HORIZON.toString(), null);
	} else if (sentinelSequenceNumber.equals(SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get())) {
		nextShardItr = null;
	} else if (sentinelSequenceNumber.equals(SentinelSequenceNumber.SENTINEL_AT_TIMESTAMP_SEQUENCE_NUM.get())) {
		nextShardItr = kinesis.getShardIterator(subscribedShard, ShardIteratorType.AT_TIMESTAMP.toString(), initTimestamp);
	} else {
		throw new RuntimeException("Unknown sentinel type: " + sentinelSequenceNumber);
	}

	return nextShardItr;
}
 
Example #5
Source File: ShardConsumer.java    From flink with Apache License 2.0 6 votes vote down vote up
protected String getShardIteratorForSentinel(SequenceNumber sentinelSequenceNumber) throws InterruptedException {
	String nextShardItr;

	if (sentinelSequenceNumber.equals(SentinelSequenceNumber.SENTINEL_LATEST_SEQUENCE_NUM.get())) {
		// if the shard is already closed, there will be no latest next record to get for this shard
		if (subscribedShard.isClosed()) {
			nextShardItr = null;
		} else {
			nextShardItr = kinesis.getShardIterator(subscribedShard, ShardIteratorType.LATEST.toString(), null);
		}
	} else if (sentinelSequenceNumber.equals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get())) {
		nextShardItr = kinesis.getShardIterator(subscribedShard, ShardIteratorType.TRIM_HORIZON.toString(), null);
	} else if (sentinelSequenceNumber.equals(SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get())) {
		nextShardItr = null;
	} else if (sentinelSequenceNumber.equals(SentinelSequenceNumber.SENTINEL_AT_TIMESTAMP_SEQUENCE_NUM.get())) {
		nextShardItr = kinesis.getShardIterator(subscribedShard, ShardIteratorType.AT_TIMESTAMP.toString(), initTimestamp);
	} else {
		throw new RuntimeException("Unknown sentinel type: " + sentinelSequenceNumber);
	}

	return nextShardItr;
}
 
Example #6
Source File: ShardConsumer.java    From flink with Apache License 2.0 6 votes vote down vote up
protected String getShardIteratorForRealSequenceNumber(SequenceNumber sequenceNumber)
		throws Exception {

	// if the last sequence number refers to an aggregated record, we need to clean up any dangling sub-records
	// from the last aggregated record; otherwise, we can simply start iterating from the record right after.

	if (sequenceNumber.isAggregated()) {
		return getShardIteratorForAggregatedSequenceNumber(sequenceNumber);
	} else {
		// the last record was non-aggregated, so we can simply start from the next record
		return kinesis.getShardIterator(
				subscribedShard,
				ShardIteratorType.AFTER_SEQUENCE_NUMBER.toString(),
				sequenceNumber.getSequenceNumber());
	}
}
 
Example #7
Source File: DynamoDBStreamsDataFetcher.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Create a new DynamoDB streams shard consumer.
 *
 * @param subscribedShardStateIndex the state index of the shard this consumer is subscribed to
 * @param handle stream handle
 * @param lastSeqNum last sequence number
 * @param shardMetricsReporter the reporter to report metrics to
 * @return
 */
@Override
protected ShardConsumer createShardConsumer(
	Integer subscribedShardStateIndex,
	StreamShardHandle handle,
	SequenceNumber lastSeqNum,
	ShardMetricsReporter shardMetricsReporter) {

	return new ShardConsumer(
		this,
		subscribedShardStateIndex,
		handle,
		lastSeqNum,
		DynamoDBStreamsProxy.create(getConsumerConfiguration()),
		shardMetricsReporter);
}
 
Example #8
Source File: KinesisDataFetcher.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Atomic operation to collect a record and update state to the sequence number of the record.
 * This method is called by {@link ShardConsumer}s.
 *
 * @param record the record to collect
 * @param recordTimestamp timestamp to attach to the collected record
 * @param shardStateIndex index of the shard to update in subscribedShardsState;
 *                        this index should be the returned value from
 *                        {@link KinesisDataFetcher#registerNewSubscribedShardState(KinesisStreamShardState)}, called
 *                        when the shard state was registered.
 * @param lastSequenceNumber the last sequence number value to update
 */
protected void emitRecordAndUpdateState(T record, long recordTimestamp, int shardStateIndex, SequenceNumber lastSequenceNumber) {
	ShardWatermarkState sws = shardWatermarks.get(shardStateIndex);
	Preconditions.checkNotNull(
		sws, "shard watermark state initialized in registerNewSubscribedShardState");
	Watermark watermark = null;
	if (sws.periodicWatermarkAssigner != null) {
		recordTimestamp =
			sws.periodicWatermarkAssigner.extractTimestamp(record, sws.lastRecordTimestamp);
		// track watermark per record since extractTimestamp has side effect
		watermark = sws.periodicWatermarkAssigner.getCurrentWatermark();
	}
	sws.lastRecordTimestamp = recordTimestamp;
	sws.lastUpdated = getCurrentTimeMillis();

	RecordWrapper<T> recordWrapper = new RecordWrapper<>(record, recordTimestamp);
	recordWrapper.shardStateIndex = shardStateIndex;
	recordWrapper.lastSequenceNumber = lastSequenceNumber;
	recordWrapper.watermark = watermark;
	try {
		sws.emitQueue.put(recordWrapper);
	} catch (InterruptedException e) {
		throw new RuntimeException(e);
	}
}
 
Example #9
Source File: KinesisDataFetcher.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Update the shard to last processed sequence number state.
 * This method is called by {@link ShardConsumer}s.
 *
 * @param shardStateIndex index of the shard to update in subscribedShardsState;
 *                        this index should be the returned value from
 *                        {@link KinesisDataFetcher#registerNewSubscribedShardState(KinesisStreamShardState)}, called
 *                        when the shard state was registered.
 * @param lastSequenceNumber the last sequence number value to update
 */
protected final void updateState(int shardStateIndex, SequenceNumber lastSequenceNumber) {
	synchronized (checkpointLock) {
		subscribedShardsState.get(shardStateIndex).setLastProcessedSequenceNum(lastSequenceNumber);

		// if a shard's state is updated to be SENTINEL_SHARD_ENDING_SEQUENCE_NUM by its consumer thread,
		// we've finished reading the shard and should determine it to be non-active
		if (lastSequenceNumber.equals(SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get())) {
			LOG.info("Subtask {} has reached the end of subscribed shard: {}",
				indexOfThisConsumerSubtask, subscribedShardsState.get(shardStateIndex).getStreamShardHandle());

			// check if we need to mark the source as idle;
			// note that on resharding, if registerNewSubscribedShardState was invoked for newly discovered shards
			// AFTER the old shards had reached the end, the subtask's status will be automatically toggled back to
			// be active immediately afterwards as soon as we collect records from the new shards
			if (this.numberOfActiveShards.decrementAndGet() == 0) {
				LOG.info("Subtask {} has reached the end of all currently subscribed shards; marking the subtask as temporarily idle ...",
					indexOfThisConsumerSubtask);

				sourceContext.markAsTemporarilyIdle();
			}
		}
	}
}
 
Example #10
Source File: ShardConsumer.java    From flink with Apache License 2.0 6 votes vote down vote up
protected String getShardIteratorForRealSequenceNumber(SequenceNumber sequenceNumber)
		throws Exception {

	// if the last sequence number refers to an aggregated record, we need to clean up any dangling sub-records
	// from the last aggregated record; otherwise, we can simply start iterating from the record right after.

	if (sequenceNumber.isAggregated()) {
		return getShardIteratorForAggregatedSequenceNumber(sequenceNumber);
	} else {
		// the last record was non-aggregated, so we can simply start from the next record
		return kinesis.getShardIterator(
				subscribedShard,
				ShardIteratorType.AFTER_SEQUENCE_NUMBER.toString(),
				sequenceNumber.getSequenceNumber());
	}
}
 
Example #11
Source File: DynamoDBStreamsDataFetcher.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Create a new DynamoDB streams shard consumer.
 *
 * @param subscribedShardStateIndex the state index of the shard this consumer is subscribed to
 * @param handle stream handle
 * @param lastSeqNum last sequence number
 * @param shardMetricsReporter the reporter to report metrics to
 * @return
 */
@Override
protected ShardConsumer<T> createShardConsumer(
	Integer subscribedShardStateIndex,
	StreamShardHandle handle,
	SequenceNumber lastSeqNum,
	ShardMetricsReporter shardMetricsReporter,
	KinesisDeserializationSchema<T> shardDeserializer) {

	return new ShardConsumer(
		this,
		subscribedShardStateIndex,
		handle,
		lastSeqNum,
		DynamoDBStreamsProxy.create(getConsumerConfiguration()),
		shardMetricsReporter,
		shardDeserializer);
}
 
Example #12
Source File: KinesisDataFetcher.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Create a new shard consumer.
 * Override this method to customize shard consumer behavior in subclasses.
 * @param subscribedShardStateIndex the state index of the shard this consumer is subscribed to
 * @param subscribedShard the shard this consumer is subscribed to
 * @param lastSequenceNum the sequence number in the shard to start consuming
 * @param shardMetricsReporter the reporter to report metrics to
 * @return shard consumer
 */
protected ShardConsumer<T> createShardConsumer(
	Integer subscribedShardStateIndex,
	StreamShardHandle subscribedShard,
	SequenceNumber lastSequenceNum,
	ShardMetricsReporter shardMetricsReporter,
	KinesisDeserializationSchema<T> shardDeserializer) {
	return new ShardConsumer<>(
		this,
		subscribedShardStateIndex,
		subscribedShard,
		lastSequenceNum,
		this.kinesisProxyFactory.create(configProps),
		shardMetricsReporter,
		shardDeserializer);
}
 
Example #13
Source File: ShardConsumer.java    From flink with Apache License 2.0 5 votes vote down vote up
protected String getShardIteratorForAggregatedSequenceNumber(SequenceNumber sequenceNumber)
		throws Exception {

	String itrForLastAggregatedRecord =
			kinesis.getShardIterator(
					subscribedShard,
					ShardIteratorType.AT_SEQUENCE_NUMBER.toString(),
					sequenceNumber.getSequenceNumber());

	// get only the last aggregated record
	GetRecordsResult getRecordsResult = getRecords(itrForLastAggregatedRecord, 1);

	List<UserRecord> fetchedRecords = deaggregateRecords(
			getRecordsResult.getRecords(),
			subscribedShard.getShard().getHashKeyRange().getStartingHashKey(),
			subscribedShard.getShard().getHashKeyRange().getEndingHashKey());

	long lastSubSequenceNum = sequenceNumber.getSubSequenceNumber();
	for (UserRecord record : fetchedRecords) {
		// we have found a dangling sub-record if it has a larger subsequence number
		// than our last sequence number; if so, collect the record and update state
		if (record.getSubSequenceNumber() > lastSubSequenceNum) {
			deserializeRecordForCollectionAndUpdateState(record);
		}
	}

	return getRecordsResult.getNextShardIterator();
}
 
Example #14
Source File: FlinkKinesisConsumerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private HashMap<StreamShardHandle, SequenceNumber> getFakeRestoredStore(String streamName) {
	HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = new HashMap<>();

	if (streamName.equals("fakeStream1") || streamName.equals("all")) {
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream1",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
			new SequenceNumber(UUID.randomUUID().toString()));
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream1",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
			new SequenceNumber(UUID.randomUUID().toString()));
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream1",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
			new SequenceNumber(UUID.randomUUID().toString()));
	}

	if (streamName.equals("fakeStream2") || streamName.equals("all")) {
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream2",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
			new SequenceNumber(UUID.randomUUID().toString()));
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream2",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
			new SequenceNumber(UUID.randomUUID().toString()));
	}

	return fakeRestoredState;
}
 
Example #15
Source File: FlinkKinesisConsumerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private HashMap<StreamShardHandle, SequenceNumber> getFakeRestoredStore(String streamName) {
	HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = new HashMap<>();

	if (streamName.equals("fakeStream1") || streamName.equals("all")) {
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream1",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
			new SequenceNumber(UUID.randomUUID().toString()));
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream1",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
			new SequenceNumber(UUID.randomUUID().toString()));
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream1",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
			new SequenceNumber(UUID.randomUUID().toString()));
	}

	if (streamName.equals("fakeStream2") || streamName.equals("all")) {
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream2",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
			new SequenceNumber(UUID.randomUUID().toString()));
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream2",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
			new SequenceNumber(UUID.randomUUID().toString()));
	}

	return fakeRestoredState;
}
 
Example #16
Source File: ShardConsumer.java    From flink with Apache License 2.0 5 votes vote down vote up
protected String getShardIteratorForAggregatedSequenceNumber(SequenceNumber sequenceNumber)
		throws Exception {

	String itrForLastAggregatedRecord =
			kinesis.getShardIterator(
					subscribedShard,
					ShardIteratorType.AT_SEQUENCE_NUMBER.toString(),
					sequenceNumber.getSequenceNumber());

	// get only the last aggregated record
	GetRecordsResult getRecordsResult = getRecords(itrForLastAggregatedRecord, 1);

	List<UserRecord> fetchedRecords = deaggregateRecords(
			getRecordsResult.getRecords(),
			subscribedShard.getShard().getHashKeyRange().getStartingHashKey(),
			subscribedShard.getShard().getHashKeyRange().getEndingHashKey());

	long lastSubSequenceNum = sequenceNumber.getSubSequenceNumber();
	for (UserRecord record : fetchedRecords) {
		// we have found a dangling sub-record if it has a larger subsequence number
		// than our last sequence number; if so, collect the record and update state
		if (record.getSubSequenceNumber() > lastSubSequenceNum) {
			deserializeRecordForCollectionAndUpdateState(record);
		}
	}

	return getRecordsResult.getNextShardIterator();
}
 
Example #17
Source File: ShardConsumer.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Deserializes a record for collection, and accordingly updates the shard state in the fetcher. The last
 * successfully collected sequence number in this shard consumer is also updated so that
 * {@link ShardConsumer#getRecords(String, int)} may be able to use the correct sequence number to refresh shard
 * iterators if necessary.
 *
 * <p>Note that the server-side Kinesis timestamp is attached to the record when collected. When the
 * user programs uses {@link TimeCharacteristic#EventTime}, this timestamp will be used by default.
 *
 * @param record record to deserialize and collect
 * @throws IOException
 */
private void deserializeRecordForCollectionAndUpdateState(UserRecord record)
	throws IOException {
	ByteBuffer recordData = record.getData();

	byte[] dataBytes = new byte[recordData.remaining()];
	recordData.get(dataBytes);

	final long approxArrivalTimestamp = record.getApproximateArrivalTimestamp().getTime();

	final T value = deserializer.deserialize(
		dataBytes,
		record.getPartitionKey(),
		record.getSequenceNumber(),
		approxArrivalTimestamp,
		subscribedShard.getStreamName(),
		subscribedShard.getShard().getShardId());

	SequenceNumber collectedSequenceNumber = (record.isAggregated())
		? new SequenceNumber(record.getSequenceNumber(), record.getSubSequenceNumber())
		: new SequenceNumber(record.getSequenceNumber());

	fetcherRef.emitRecordAndUpdateState(
		value,
		approxArrivalTimestamp,
		subscribedShardStateIndex,
		collectedSequenceNumber);

	lastSequenceNum = collectedSequenceNumber;
}
 
Example #18
Source File: KinesisDataFetcher.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a snapshot of the current last processed sequence numbers of each subscribed shard.
 *
 * @return state snapshot
 */
public HashMap<StreamShardMetadata, SequenceNumber> snapshotState() {
	// this method assumes that the checkpoint lock is held
	assert Thread.holdsLock(checkpointLock);

	HashMap<StreamShardMetadata, SequenceNumber> stateSnapshot = new HashMap<>();
	for (KinesisStreamShardState shardWithState : subscribedShardsState) {
		stateSnapshot.put(shardWithState.getStreamShardMetadata(), shardWithState.getLastProcessedSequenceNum());
	}
	return stateSnapshot;
}
 
Example #19
Source File: ShardConsumer.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a shard iterator for the given {@link SequenceNumber}.
 *
 * @return shard iterator
 * @throws Exception
 */
protected String getShardIterator(SequenceNumber sequenceNumber) throws Exception {

	if (isSentinelSequenceNumber(sequenceNumber)) {
		return getShardIteratorForSentinel(sequenceNumber);
	} else {
		// we will be starting from an actual sequence number (due to restore from failure).
		return getShardIteratorForRealSequenceNumber(sequenceNumber);
	}
}
 
Example #20
Source File: KinesisDataFetcher.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Create a new shard consumer.
 * Override this method to customize shard consumer behavior in subclasses.
 * @param subscribedShardStateIndex the state index of the shard this consumer is subscribed to
 * @param subscribedShard the shard this consumer is subscribed to
 * @param lastSequenceNum the sequence number in the shard to start consuming
 * @param shardMetricsReporter the reporter to report metrics to
 * @return shard consumer
 */
protected ShardConsumer createShardConsumer(
	Integer subscribedShardStateIndex,
	StreamShardHandle subscribedShard,
	SequenceNumber lastSequenceNum,
	ShardMetricsReporter shardMetricsReporter) {
	return new ShardConsumer<>(
		this,
		subscribedShardStateIndex,
		subscribedShard,
		lastSequenceNum,
		this.kinesisProxyFactory.create(configProps),
		shardMetricsReporter);
}
 
Example #21
Source File: KinesisDataFetcher.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a snapshot of the current last processed sequence numbers of each subscribed shard.
 *
 * @return state snapshot
 */
public HashMap<StreamShardMetadata, SequenceNumber> snapshotState() {
	// this method assumes that the checkpoint lock is held
	assert Thread.holdsLock(checkpointLock);

	HashMap<StreamShardMetadata, SequenceNumber> stateSnapshot = new HashMap<>();
	for (KinesisStreamShardState shardWithState : subscribedShardsState) {
		stateSnapshot.put(shardWithState.getStreamShardMetadata(), shardWithState.getLastProcessedSequenceNum());
	}
	return stateSnapshot;
}
 
Example #22
Source File: KinesisDataFetcher.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Create a new shard consumer.
 * Override this method to customize shard consumer behavior in subclasses.
 * @param subscribedShardStateIndex the state index of the shard this consumer is subscribed to
 * @param subscribedShard the shard this consumer is subscribed to
 * @param lastSequenceNum the sequence number in the shard to start consuming
 * @param shardMetricsReporter the reporter to report metrics to
 * @return shard consumer
 */
protected ShardConsumer createShardConsumer(
	Integer subscribedShardStateIndex,
	StreamShardHandle subscribedShard,
	SequenceNumber lastSequenceNum,
	ShardMetricsReporter shardMetricsReporter) {
	return new ShardConsumer<>(
		this,
		subscribedShardStateIndex,
		subscribedShard,
		lastSequenceNum,
		this.kinesisProxyFactory.create(configProps),
		shardMetricsReporter);
}
 
Example #23
Source File: FlinkKinesisConsumer.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
	TypeInformation<Tuple2<StreamShardMetadata, SequenceNumber>> shardsStateTypeInfo = new TupleTypeInfo<>(
		TypeInformation.of(StreamShardMetadata.class),
		TypeInformation.of(SequenceNumber.class));

	sequenceNumsStateForCheckpoint = context.getOperatorStateStore().getUnionListState(
		new ListStateDescriptor<>(sequenceNumsStateStoreName, shardsStateTypeInfo));

	if (context.isRestored()) {
		if (sequenceNumsToRestore == null) {
			sequenceNumsToRestore = new HashMap<>();
			for (Tuple2<StreamShardMetadata, SequenceNumber> kinesisSequenceNumber : sequenceNumsStateForCheckpoint.get()) {
				sequenceNumsToRestore.put(
					// we wrap the restored metadata inside an equivalence wrapper that checks only stream name and shard id,
					// so that if a shard had been closed (due to a Kinesis reshard operation, for example) since
					// the savepoint and has a different metadata than what we last stored,
					// we will still be able to match it in sequenceNumsToRestore. Please see FLINK-8484 for details.
					new StreamShardMetadata.EquivalenceWrapper(kinesisSequenceNumber.f0),
					kinesisSequenceNumber.f1);
			}

			LOG.info("Setting restore state in the FlinkKinesisConsumer. Using the following offsets: {}",
				sequenceNumsToRestore);
		}
	} else {
		LOG.info("No restore state for FlinkKinesisConsumer.");
	}
}
 
Example #24
Source File: ShardConsumerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testMetricsReporting() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(
			KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard,
			new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	ShardMetricsReporter shardMetricsReporter = new ShardMetricsReporter();
	long millisBehindLatest = 500L;
	new ShardConsumer<>(
		fetcher,
		0,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, millisBehindLatest),
		shardMetricsReporter).run();

	// the millisBehindLatest metric should have been reported
	assertEquals(millisBehindLatest, shardMetricsReporter.getMillisBehindLatest());
}
 
Example #25
Source File: ShardConsumerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedState() {
	StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);

	LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
	subscribedShardsStateUnderTest.add(
		new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard),
			fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));

	TestSourceContext<String> sourceContext = new TestSourceContext<>();

	TestableKinesisDataFetcher<String> fetcher =
		new TestableKinesisDataFetcher<>(
			Collections.singletonList("fakeStream"),
			sourceContext,
			new Properties(),
			new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()),
			10,
			2,
			new AtomicReference<>(),
			subscribedShardsStateUnderTest,
			KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")),
			Mockito.mock(KinesisProxyInterface.class));

	int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
	new ShardConsumer<>(
		fetcher,
		shardIndex,
		subscribedShardsStateUnderTest.get(0).getStreamShardHandle(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(),
		FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCalls(1000, 9, 500L),
		new ShardMetricsReporter()).run();

	assertEquals(1000, sourceContext.getCollectedOutputs().size());
	assertEquals(
		SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get(),
		subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
}
 
Example #26
Source File: FlinkKinesisConsumerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private HashMap<StreamShardHandle, SequenceNumber> getFakeRestoredStore(String streamName) {
	HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = new HashMap<>();

	if (streamName.equals("fakeStream1") || streamName.equals("all")) {
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream1",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
			new SequenceNumber(UUID.randomUUID().toString()));
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream1",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
			new SequenceNumber(UUID.randomUUID().toString()));
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream1",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))),
			new SequenceNumber(UUID.randomUUID().toString()));
	}

	if (streamName.equals("fakeStream2") || streamName.equals("all")) {
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream2",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))),
			new SequenceNumber(UUID.randomUUID().toString()));
		fakeRestoredState.put(
			new StreamShardHandle("fakeStream2",
				new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))),
			new SequenceNumber(UUID.randomUUID().toString()));
	}

	return fakeRestoredState;
}
 
Example #27
Source File: FlinkKinesisConsumerMigrationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private static Map<StreamShardMetadata, SequenceNumber> removeEquivalenceWrappers(
		Map<StreamShardMetadata.EquivalenceWrapper, SequenceNumber> equivalenceWrappedMap) {

	Map<StreamShardMetadata, SequenceNumber> unwrapped = new HashMap<>();
	for (Map.Entry<StreamShardMetadata.EquivalenceWrapper, SequenceNumber> wrapped : equivalenceWrappedMap.entrySet()) {
		unwrapped.put(wrapped.getKey().getShardMetadata(), wrapped.getValue());
	}

	return unwrapped;
}
 
Example #28
Source File: FlinkKinesisConsumerMigrationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private static Map<StreamShardMetadata, SequenceNumber> removeEquivalenceWrappers(
		Map<StreamShardMetadata.EquivalenceWrapper, SequenceNumber> equivalenceWrappedMap) {

	Map<StreamShardMetadata, SequenceNumber> unwrapped = new HashMap<>();
	for (Map.Entry<StreamShardMetadata.EquivalenceWrapper, SequenceNumber> wrapped : equivalenceWrappedMap.entrySet()) {
		unwrapped.put(wrapped.getKey().getShardMetadata(), wrapped.getValue());
	}

	return unwrapped;
}
 
Example #29
Source File: FlinkKinesisConsumerMigrationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
public TestFetcher(
		List<String> streams,
		SourceFunction.SourceContext<T> sourceContext,
		RuntimeContext runtimeContext,
		Properties configProps,
		KinesisDeserializationSchema<T> deserializationSchema,
		HashMap<StreamShardMetadata, SequenceNumber> testStateSnapshot,
		List<StreamShardHandle> testInitialDiscoveryShards) {

	super(streams, sourceContext, runtimeContext, configProps, deserializationSchema, DEFAULT_SHARD_ASSIGNER, null, null);

	this.testStateSnapshot = testStateSnapshot;
	this.testInitialDiscoveryShards = testInitialDiscoveryShards;
}
 
Example #30
Source File: ShardConsumer.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Deserializes a record for collection, and accordingly updates the shard state in the fetcher. The last
 * successfully collected sequence number in this shard consumer is also updated so that
 * {@link ShardConsumer#getRecords(String, int)} may be able to use the correct sequence number to refresh shard
 * iterators if necessary.
 *
 * <p>Note that the server-side Kinesis timestamp is attached to the record when collected. When the
 * user programs uses {@link TimeCharacteristic#EventTime}, this timestamp will be used by default.
 *
 * @param record record to deserialize and collect
 * @throws IOException
 */
private void deserializeRecordForCollectionAndUpdateState(UserRecord record)
	throws IOException {
	ByteBuffer recordData = record.getData();

	byte[] dataBytes = new byte[recordData.remaining()];
	recordData.get(dataBytes);

	final long approxArrivalTimestamp = record.getApproximateArrivalTimestamp().getTime();

	final T value = deserializer.deserialize(
		dataBytes,
		record.getPartitionKey(),
		record.getSequenceNumber(),
		approxArrivalTimestamp,
		subscribedShard.getStreamName(),
		subscribedShard.getShard().getShardId());

	SequenceNumber collectedSequenceNumber = (record.isAggregated())
		? new SequenceNumber(record.getSequenceNumber(), record.getSubSequenceNumber())
		: new SequenceNumber(record.getSequenceNumber());

	fetcherRef.emitRecordAndUpdateState(
		value,
		approxArrivalTimestamp,
		subscribedShardStateIndex,
		collectedSequenceNumber);

	lastSequenceNum = collectedSequenceNumber;
}