Java Code Examples for org.apache.flink.streaming.api.functions.source.SourceFunction#SourceContext

The following examples show how to use org.apache.flink.streaming.api.functions.source.SourceFunction#SourceContext . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PulsarRowFetcher.java    From pulsar-flink with Apache License 2.0 6 votes vote down vote up
public PulsarRowFetcher(
        SourceFunction.SourceContext<Row> sourceContext,
        Map<String, MessageId> seedTopicsWithInitialOffsets,
        SerializedValue<AssignerWithPeriodicWatermarks<Row>> watermarksPeriodic,
        SerializedValue<AssignerWithPunctuatedWatermarks<Row>> watermarksPunctuated,
        ProcessingTimeService processingTimeProvider,
        long autoWatermarkInterval,
        ClassLoader userCodeClassLoader,
        StreamingRuntimeContext runtimeContext,
        ClientConfigurationData clientConf,
        Map<String, Object> readerConf,
        int pollTimeoutMs,
        DeserializationSchema<Row> deserializer,
        PulsarMetadataReader metadataReader) throws Exception {

    super(sourceContext, seedTopicsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, processingTimeProvider, autoWatermarkInterval, userCodeClassLoader, runtimeContext, clientConf, readerConf, pollTimeoutMs, deserializer, metadataReader);
}
 
Example 2
Source File: HeapMonitorSource.java    From flink-tutorials with Apache License 2.0 6 votes vote down vote up
@Override
public void run(SourceFunction.SourceContext<HeapMetrics> sourceContext) throws Exception {
	LOG.info("starting HeapMonitorSource");

	int subtaskIndex = this.getRuntimeContext().getIndexOfThisSubtask();
	String hostname = InetAddress.getLocalHost().getHostName();

	while (running) {
		Thread.sleep(sleepMillis);

		for (MemoryPoolMXBean mpBean : ManagementFactory.getMemoryPoolMXBeans()) {
			if (mpBean.getType() == MemoryType.HEAP) {
				MemoryUsage memoryUsage = mpBean.getUsage();
				long used = memoryUsage.getUsed();
				long max = memoryUsage.getMax();

				synchronized (sourceContext.getCheckpointLock()) {
					sourceContext.collect(new HeapMetrics(mpBean.getName(), used, max, (double) used / max, subtaskIndex, hostname));
				}
			}
		}
	}
}
 
Example 3
Source File: DynamoDBStreamsDataFetcher.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 *
 * @param streams list of streams to fetch data
 * @param sourceContext source context
 * @param runtimeContext runtime context
 * @param configProps config properties
 * @param deserializationSchema deserialization schema
 * @param shardAssigner shard assigner
 */
public DynamoDBStreamsDataFetcher(List<String> streams,
	SourceFunction.SourceContext<T> sourceContext,
	RuntimeContext runtimeContext,
	Properties configProps,
	KinesisDeserializationSchema<T> deserializationSchema,
	KinesisShardAssigner shardAssigner) {

	super(streams,
		sourceContext,
		sourceContext.getCheckpointLock(),
		runtimeContext,
		configProps,
		deserializationSchema,
		shardAssigner,
		null,
		null,
		new AtomicReference<>(),
		new ArrayList<>(),
		createInitialSubscribedStreamsToLastDiscoveredShardsState(streams),
		// use DynamoDBStreamsProxy
		DynamoDBStreamsProxy::create);
}
 
Example 4
Source File: KinesisDataFetcher.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a Kinesis Data Fetcher.
 *
 * @param streams the streams to subscribe to
 * @param sourceContext context of the source function
 * @param runtimeContext this subtask's runtime context
 * @param configProps the consumer configuration properties
 * @param deserializationSchema deserialization schema
 */
public KinesisDataFetcher(List<String> streams,
						SourceFunction.SourceContext<T> sourceContext,
						RuntimeContext runtimeContext,
						Properties configProps,
						KinesisDeserializationSchema<T> deserializationSchema,
						KinesisShardAssigner shardAssigner,
						AssignerWithPeriodicWatermarks<T> periodicWatermarkAssigner,
						WatermarkTracker watermarkTracker) {
	this(streams,
		sourceContext,
		sourceContext.getCheckpointLock(),
		runtimeContext,
		configProps,
		deserializationSchema,
		shardAssigner,
		periodicWatermarkAssigner,
		watermarkTracker,
		new AtomicReference<>(),
		new ArrayList<>(),
		createInitialSubscribedStreamsToLastDiscoveredShardsState(streams),
		KinesisProxy::create);
}
 
Example 5
Source File: FlinkKinesisConsumerMigrationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
public TestFetcher(
		List<String> streams,
		SourceFunction.SourceContext<T> sourceContext,
		RuntimeContext runtimeContext,
		Properties configProps,
		KinesisDeserializationSchema<T> deserializationSchema,
		HashMap<StreamShardMetadata, SequenceNumber> testStateSnapshot,
		List<StreamShardHandle> testInitialDiscoveryShards) {

	super(streams, sourceContext, runtimeContext, configProps, deserializationSchema, DEFAULT_SHARD_ASSIGNER, null, null);

	this.testStateSnapshot = testStateSnapshot;
	this.testInitialDiscoveryShards = testInitialDiscoveryShards;
}
 
Example 6
Source File: EventTimeWindowCheckpointingITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void emitEvent(SourceFunction.SourceContext<Tuple2<Long, IntType>> ctx, int eventSequenceNo) {
	final IntType intTypeNext = new IntType(eventSequenceNo);
	for (long i = 0; i < keyUniverseSize; i++) {
		final Tuple2<Long, IntType> generatedEvent = new Tuple2<>(i, intTypeNext);
		ctx.collectWithTimestamp(generatedEvent, eventSequenceNo);
	}

	ctx.emitWatermark(new Watermark(eventSequenceNo - watermarkTrailing));
}
 
Example 7
Source File: SourceFunctionUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
private static <T extends Serializable> List<T> runNonRichSourceFunction(SourceFunction<T> sourceFunction) {
	final List<T> outputs = new ArrayList<>();
	try {
		SourceFunction.SourceContext<T> ctx = new CollectingSourceContext<T>(new Object(), outputs);
		sourceFunction.run(ctx);
	} catch (Exception e) {
		throw new RuntimeException("Cannot invoke source.", e);
	}
	return outputs;
}
 
Example 8
Source File: TestableKinesisDataFetcher.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public TestableKinesisDataFetcher(
		List<String> fakeStreams,
		SourceFunction.SourceContext<T> sourceContext,
		Properties fakeConfiguration,
		KinesisDeserializationSchema<T> deserializationSchema,
		int fakeTotalCountOfSubtasks,
		int fakeIndexOfThisSubtask,
		AtomicReference<Throwable> thrownErrorUnderTest,
		LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest,
		HashMap<String, String> subscribedStreamsToLastDiscoveredShardIdsStateUnderTest,
		KinesisProxyInterface fakeKinesis) {
	super(
		fakeStreams,
		sourceContext,
		sourceContext.getCheckpointLock(),
		getMockedRuntimeContext(fakeTotalCountOfSubtasks, fakeIndexOfThisSubtask),
		fakeConfiguration,
		deserializationSchema,
		DEFAULT_SHARD_ASSIGNER,
		null,
		null,
		thrownErrorUnderTest,
		subscribedShardsStateUnderTest,
		subscribedStreamsToLastDiscoveredShardIdsStateUnderTest,
		(properties) -> fakeKinesis);

	this.runWaiter = new OneShotLatch();
	this.initialDiscoveryWaiter = new OneShotLatch();
	this.shutdownWaiter = new OneShotLatch();

	this.running = true;
}
 
Example 9
Source File: TestableKinesisDataFetcherForShardConsumerException.java    From flink with Apache License 2.0 5 votes vote down vote up
public TestableKinesisDataFetcherForShardConsumerException(final List<String> fakeStreams,
		final SourceFunction.SourceContext<T> sourceContext,
		final Properties fakeConfiguration,
		final KinesisDeserializationSchema<T> deserializationSchema,
		final int fakeTotalCountOfSubtasks,
		final int fakeIndexOfThisSubtask,
		final AtomicReference<Throwable> thrownErrorUnderTest,
		final LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest,
		final HashMap<String, String> subscribedStreamsToLastDiscoveredShardIdsStateUnderTest,
		final KinesisProxyInterface fakeKinesis) {
	super(fakeStreams, sourceContext, fakeConfiguration, deserializationSchema, fakeTotalCountOfSubtasks,
		fakeIndexOfThisSubtask, thrownErrorUnderTest, subscribedShardsStateUnderTest,
		subscribedStreamsToLastDiscoveredShardIdsStateUnderTest, fakeKinesis);
}
 
Example 10
Source File: SourceFunctionUtil.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private static <T extends Serializable> List<T> runNonRichSourceFunction(SourceFunction<T> sourceFunction) {
	final List<T> outputs = new ArrayList<>();
	try {
		SourceFunction.SourceContext<T> ctx = new CollectingSourceContext<T>(new Object(), outputs);
		sourceFunction.run(ctx);
	} catch (Exception e) {
		throw new RuntimeException("Cannot invoke source.", e);
	}
	return outputs;
}
 
Example 11
Source File: StreamSourceContexts.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Depending on the {@link TimeCharacteristic}, this method will return the adequate
 * {@link org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext}. That is:
 * <ul>
 *     <li>{@link TimeCharacteristic#IngestionTime} = {@code AutomaticWatermarkContext}</li>
 *     <li>{@link TimeCharacteristic#ProcessingTime} = {@code NonTimestampContext}</li>
 *     <li>{@link TimeCharacteristic#EventTime} = {@code ManualWatermarkContext}</li>
 * </ul>
 * */
public static <OUT> SourceFunction.SourceContext<OUT> getSourceContext(
		TimeCharacteristic timeCharacteristic,
		ProcessingTimeService processingTimeService,
		Object checkpointLock,
		StreamStatusMaintainer streamStatusMaintainer,
		Output<StreamRecord<OUT>> output,
		long watermarkInterval,
		long idleTimeout) {

	final SourceFunction.SourceContext<OUT> ctx;
	switch (timeCharacteristic) {
		case EventTime:
			ctx = new ManualWatermarkContext<>(
				output,
				processingTimeService,
				checkpointLock,
				streamStatusMaintainer,
				idleTimeout);

			break;
		case IngestionTime:
			ctx = new AutomaticWatermarkContext<>(
				output,
				watermarkInterval,
				processingTimeService,
				checkpointLock,
				streamStatusMaintainer,
				idleTimeout);

			break;
		case ProcessingTime:
			ctx = new NonTimestampContext<>(checkpointLock, output);
			break;
		default:
			throw new IllegalArgumentException(String.valueOf(timeCharacteristic));
	}
	return ctx;
}
 
Example 12
Source File: KafkaFetcher.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public KafkaFetcher(
	SourceFunction.SourceContext<T> sourceContext,
	Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets,
	SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic,
	SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated,
	ProcessingTimeService processingTimeProvider,
	long autoWatermarkInterval,
	ClassLoader userCodeClassLoader,
	String taskNameWithSubtasks,
	KafkaDeserializationSchema<T> deserializer,
	Properties kafkaProperties,
	long pollTimeout,
	MetricGroup subtaskMetricGroup,
	MetricGroup consumerMetricGroup,
	boolean useMetrics) throws Exception {
	super(
		sourceContext,
		assignedPartitionsWithInitialOffsets,
		watermarksPeriodic,
		watermarksPunctuated,
		processingTimeProvider,
		autoWatermarkInterval,
		userCodeClassLoader,
		consumerMetricGroup,
		useMetrics);

	this.deserializer = deserializer;
	this.handover = new Handover();

	this.consumerThread = new KafkaConsumerThread(
		LOG,
		handover,
		kafkaProperties,
		unassignedPartitionsQueue,
		getFetcherName() + " for " + taskNameWithSubtasks,
		pollTimeout,
		useMetrics,
		consumerMetricGroup,
		subtaskMetricGroup);
}
 
Example 13
Source File: ParallelReader.java    From alibaba-flink-connectors with Apache License 2.0 5 votes vote down vote up
public WatermarkEmitter(
		ParallelReader provider,
		long watermarkInterval,
		SourceFunction.SourceContext<OUT> ctx) {
	this.provider = provider;
	this.ctx = ctx;
	this.watermarkInterval = watermarkInterval;
}
 
Example 14
Source File: ParallelReader.java    From alibaba-flink-connectors with Apache License 2.0 5 votes vote down vote up
private void processRecord(SourceFunction.SourceContext<OUT> ctx, ReaderRunner<OUT, CURSOR> readerRunner) {
	synchronized (ctx.getCheckpointLock()) {
		Tuple3<OUT, Long, Long> record = readerRunner.pollRecord();
		if (record != null) {
			ctx.collectWithTimestamp(record.f0, record.f1);
			tpsMetric.markEvent();

			if (record.f2 > 0) {
				partitionLatency.update(record.f2);
			}
		}
	}
}
 
Example 15
Source File: FromElementsFunctionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testCheckpointAndRestore() {
	try {
		final int numElements = 10000;

		List<Integer> data = new ArrayList<Integer>(numElements);
		List<Integer> result = new ArrayList<Integer>(numElements);

		for (int i = 0; i < numElements; i++) {
			data.add(i);
		}

		final FromElementsFunction<Integer> source = new FromElementsFunction<>(IntSerializer.INSTANCE, data);
		StreamSource<Integer, FromElementsFunction<Integer>> src = new StreamSource<>(source);
		AbstractStreamOperatorTestHarness<Integer> testHarness =
			new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
		testHarness.open();

		final SourceFunction.SourceContext<Integer> ctx = new ListSourceContext<Integer>(result, 2L);

		final Throwable[] error = new Throwable[1];

		// run the source asynchronously
		Thread runner = new Thread() {
			@Override
			public void run() {
				try {
					source.run(ctx);
				}
				catch (Throwable t) {
					error[0] = t;
				}
			}
		};
		runner.start();

		// wait for a bit
		Thread.sleep(1000);

		// make a checkpoint
		List<Integer> checkpointData = new ArrayList<>(numElements);
		OperatorSubtaskState handles = null;
		synchronized (ctx.getCheckpointLock()) {
			handles = testHarness.snapshot(566, System.currentTimeMillis());
			checkpointData.addAll(result);
		}

		// cancel the source
		source.cancel();
		runner.join();

		// check for errors
		if (error[0] != null) {
			System.err.println("Error in asynchronous source runner");
			error[0].printStackTrace();
			fail("Error in asynchronous source runner");
		}

		final FromElementsFunction<Integer> sourceCopy = new FromElementsFunction<>(IntSerializer.INSTANCE, data);
		StreamSource<Integer, FromElementsFunction<Integer>> srcCopy = new StreamSource<>(sourceCopy);
		AbstractStreamOperatorTestHarness<Integer> testHarnessCopy =
			new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
		testHarnessCopy.setup();
		testHarnessCopy.initializeState(handles);
		testHarnessCopy.open();

		// recovery run
		SourceFunction.SourceContext<Integer> newCtx = new ListSourceContext<>(checkpointData);

		sourceCopy.run(newCtx);

		assertEquals(data, checkpointData);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 16
Source File: FlinkKafkaConsumerBaseTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
protected TestingFetcher(SourceFunction.SourceContext<T> sourceContext, Map<KafkaTopicPartition, Long> seedPartitionsWithInitialOffsets, SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic, SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated, ProcessingTimeService processingTimeProvider, long autoWatermarkInterval, ClassLoader userCodeClassLoader, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception {
	super(sourceContext, seedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, processingTimeProvider, autoWatermarkInterval, userCodeClassLoader, consumerMetricGroup, useMetrics);
}
 
Example 17
Source File: SourceSinkDataTestKit.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public void run(SourceFunction.SourceContext<Tuple2<Integer, String>> ctx) throws Exception {
	for (int i = 0; i < NUM_ELEMENTS && running; i++) {
		ctx.collect(Tuple2.of(i, DATA_PREFIX + i));
	}
}
 
Example 18
Source File: ParallelReader.java    From alibaba-flink-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Run.
 *
 * @param ctx the ctx
 * @throws Exception the exception
 */
protected void runImpl(SourceFunction.SourceContext<OUT> ctx) throws Exception {
	while (!stop && !readerRunners.isEmpty()) {
		Iterator<ReaderRunner<OUT, CURSOR>> it = readerRunners.iterator();
		boolean idle = true;
		// Gather record from all splits
		while (it.hasNext()) {
			ReaderRunner<OUT, CURSOR> readerRunner = it.next();
			// If SplitReader failed, nothing we can do but throw exception to upper layer
			if (readerRunner.isStopped() && readerRunner.getCause() != null) {
				LOG.error(String.format(
						"SplitReader for split[%d][%s] failed, cause: %s",
						readerRunner.getSplit().getSplitNumber(),
						readerRunner.getSplit().toString(),
						readerRunner.getCause()));
				throw new RuntimeException(readerRunner.getCause());
			}
			// Some SplitReader is exhausted, just remove it
			if (readerRunner.isExhausted()) {
				LOG.info(String.format(
						"SplitReader for split[%d][%s] finished",
						readerRunner.getSplit().getSplitNumber(),
						readerRunner.getSplit().toString()));
				exitedReadRunnerSplitCursor.put(readerRunner.getSplit(), readerRunner.getProgress());
				it.remove();
			} else {
				if (readerRunner.hasRecord()) {
					idle = false;
					inputCount++;
					if (tracingMetricEnabled && inputCount % sampleInterval == 0) {
						long now = System.nanoTime();
						processRecord(ctx, readerRunner);
						processLatency.update(System.nanoTime() - now);
					} else {
						processRecord(ctx, readerRunner);
					}
				}
			}
		}
		// If all pipes have no data, sleep for a while
		if (idle) {
			Thread.sleep(idleInterval);
		}
	}
	ctx.markAsTemporarilyIdle();
	LOG.info(String.format("This subTask [%d]/[%d] has finished, idle...", context.getIndexOfThisSubtask(),
						context.getNumberOfParallelSubtasks()));
	// Avoid the finish of this subtask causing the cp can not do normally
	while (!stop && !exitAfterReadFinished) {
		Thread.sleep(1000);
	}
}
 
Example 19
Source File: StreamSourceContextIdleDetectionTests.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Test scenario (idleTimeout = 100):
 * (1) Start from 0 as initial time.
 * (2) As soon as time reaches 100, status should have been toggled to IDLE.
 * (3) After some arbitrary time (until 300), the status should remain IDLE.
 * (4) Emit a record at 310. Status should become ACTIVE. This should fire a idleness detection at 410.
 * (5) Emit another record at 320 (which is before the next check). This should make the idleness check pass.
 * (6) Advance time to 410 and trigger idleness detection.
 *     The status should still be ACTIVE due to step (5). Another idleness detection should be fired at 510.
 * (7) Advance time to 510 and trigger idleness detection. Since no records were collected in-between the two
 *     idleness detections, status should have been toggle back to IDLE.
 *
 * <p>Inline comments will refer to the corresponding tested steps in the scenario.
 */
@Test
public void testManualWatermarkContext() throws Exception {
	long idleTimeout = 100;

	long initialTime = 0;
	TestProcessingTimeService processingTimeService = new TestProcessingTimeService();
	processingTimeService.setCurrentTime(initialTime);

	final List<StreamElement> output = new ArrayList<>();

	MockStreamStatusMaintainer mockStreamStatusMaintainer = new MockStreamStatusMaintainer();

	SourceFunction.SourceContext<String> context = StreamSourceContexts.getSourceContext(
		TimeCharacteristic.EventTime,
		processingTimeService,
		new Object(),
		mockStreamStatusMaintainer,
		new CollectorOutput<String>(output),
		0,
		idleTimeout);

	// -------------------------- begin test scenario --------------------------

	// corresponds to step (2) of scenario (please see method-level Javadoc comment)
	processingTimeService.setCurrentTime(initialTime + idleTimeout);
	assertTrue(mockStreamStatusMaintainer.getStreamStatus().isIdle());

	// corresponds to step (3) of scenario (please see method-level Javadoc comment)
	processingTimeService.setCurrentTime(initialTime + 2 * idleTimeout);
	processingTimeService.setCurrentTime(initialTime + 3 * idleTimeout);
	assertTrue(mockStreamStatusMaintainer.getStreamStatus().isIdle());

	// corresponds to step (4) of scenario (please see method-level Javadoc comment)
	processingTimeService.setCurrentTime(initialTime + 3 * idleTimeout + idleTimeout / 10);
	switch (testMethod) {
		case COLLECT:
			context.collect("msg");
			break;
		case COLLECT_WITH_TIMESTAMP:
			context.collectWithTimestamp("msg", processingTimeService.getCurrentProcessingTime());
			break;
		case EMIT_WATERMARK:
			context.emitWatermark(new Watermark(processingTimeService.getCurrentProcessingTime()));
			break;
	}
	assertTrue(mockStreamStatusMaintainer.getStreamStatus().isActive());

	// corresponds to step (5) of scenario (please see method-level Javadoc comment)
	processingTimeService.setCurrentTime(initialTime + 3 * idleTimeout + 2 * idleTimeout / 10);
	switch (testMethod) {
		case COLLECT:
			context.collect("msg");
			break;
		case COLLECT_WITH_TIMESTAMP:
			context.collectWithTimestamp("msg", processingTimeService.getCurrentProcessingTime());
			break;
		case EMIT_WATERMARK:
			context.emitWatermark(new Watermark(processingTimeService.getCurrentProcessingTime()));
			break;
	}
	assertTrue(mockStreamStatusMaintainer.getStreamStatus().isActive());

	// corresponds to step (6) of scenario (please see method-level Javadoc comment)
	processingTimeService.setCurrentTime(initialTime + 4 * idleTimeout + idleTimeout / 10);
	assertTrue(mockStreamStatusMaintainer.getStreamStatus().isActive());

	// corresponds to step (7) of scenario (please see method-level Javadoc comment)
	processingTimeService.setCurrentTime(initialTime + 5 * idleTimeout + idleTimeout / 10);
	assertTrue(mockStreamStatusMaintainer.getStreamStatus().isIdle());
}
 
Example 20
Source File: ParallelReader.java    From alibaba-flink-connectors with Apache License 2.0 4 votes vote down vote up
private void runWatermarkEmitter(SourceFunction.SourceContext<OUT> ctx) {
	if (watermarkInterval > 0) {
		watermarkEmitter = new WatermarkEmitter<>(this, watermarkInterval, ctx);
		readerPool.submit(watermarkEmitter);
	}
}