Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setMaxParallelism()

The following examples show how to use org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setMaxParallelism() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ExecutionContext.java    From flink with Apache License 2.0 5 votes vote down vote up
private StreamExecutionEnvironment createStreamExecutionEnvironment() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(mergedEnv.getExecution().getRestartStrategy());
	env.setParallelism(mergedEnv.getExecution().getParallelism());
	env.setMaxParallelism(mergedEnv.getExecution().getMaxParallelism());
	env.setStreamTimeCharacteristic(mergedEnv.getExecution().getTimeCharacteristic());
	if (env.getStreamTimeCharacteristic() == TimeCharacteristic.EventTime) {
		env.getConfig().setAutoWatermarkInterval(mergedEnv.getExecution().getPeriodicWatermarksInterval());
	}
	return env;
}
 
Example 2
Source File: RegionFailoverITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}
 
Example 3
Source File: RegionFailoverITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();
		env.getConfig().disableSysoutLogging();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}
 
Example 4
Source File: ReinterpretDataStreamAsKeyedStreamITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * This test checks that reinterpreting a data stream to a keyed stream works as expected. This test consists of
 * two jobs. The first job materializes a keyBy into files, one files per partition. The second job opens the
 * files created by the first jobs as sources (doing the correct assignment of files to partitions) and
 * reinterprets the sources as keyed, because we know they have been partitioned in a keyBy from the first job.
 */
@Test
public void testReinterpretAsKeyedStream() throws Exception {

	final int maxParallelism = 8;
	final int numEventsPerInstance = 100;
	final int parallelism = 3;
	final int numTotalEvents = numEventsPerInstance * parallelism;
	final int numUniqueKeys = 100;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
	env.setMaxParallelism(maxParallelism);
	env.setParallelism(parallelism);
	env.enableCheckpointing(100);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));

	final List<File> partitionFiles = new ArrayList<>(parallelism);
	for (int i = 0; i < parallelism; ++i) {
		File partitionFile = temporaryFolder.newFile();
		partitionFiles.add(i, partitionFile);
	}

	env.addSource(new RandomTupleSource(numEventsPerInstance, numUniqueKeys))
		.keyBy(0)
		.addSink(new ToPartitionFileSink(partitionFiles));

	env.execute();

	DataStreamUtils.reinterpretAsKeyedStream(
		env.addSource(new FromPartitionFileSource(partitionFiles)),
		(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
		TypeInformation.of(Integer.class))
		.timeWindow(Time.seconds(1)) // test that also timers and aggregated state work as expected
		.reduce((ReduceFunction<Tuple2<Integer, Integer>>) (value1, value2) ->
			new Tuple2<>(value1.f0, value1.f1 + value2.f1))
		.addSink(new ValidatingSink(numTotalEvents)).setParallelism(1);

	env.execute();
}
 
Example 5
Source File: ExecutionContext.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private StreamExecutionEnvironment createStreamExecutionEnvironment() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(mergedEnv.getExecution().getRestartStrategy());
	env.setParallelism(mergedEnv.getExecution().getParallelism());
	env.setMaxParallelism(mergedEnv.getExecution().getMaxParallelism());
	env.setStreamTimeCharacteristic(mergedEnv.getExecution().getTimeCharacteristic());
	if (env.getStreamTimeCharacteristic() == TimeCharacteristic.EventTime) {
		env.getConfig().setAutoWatermarkInterval(mergedEnv.getExecution().getPeriodicWatermarksInterval());
	}
	return env;
}
 
Example 6
Source File: ReinterpretDataStreamAsKeyedStreamITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * This test checks that reinterpreting a data stream to a keyed stream works as expected. This test consists of
 * two jobs. The first job materializes a keyBy into files, one files per partition. The second job opens the
 * files created by the first jobs as sources (doing the correct assignment of files to partitions) and
 * reinterprets the sources as keyed, because we know they have been partitioned in a keyBy from the first job.
 */
@Test
public void testReinterpretAsKeyedStream() throws Exception {

	final int maxParallelism = 8;
	final int numEventsPerInstance = 100;
	final int parallelism = 3;
	final int numTotalEvents = numEventsPerInstance * parallelism;
	final int numUniqueKeys = 100;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
	env.setMaxParallelism(maxParallelism);
	env.setParallelism(parallelism);
	env.enableCheckpointing(100);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));

	final List<File> partitionFiles = new ArrayList<>(parallelism);
	for (int i = 0; i < parallelism; ++i) {
		File partitionFile = temporaryFolder.newFile();
		partitionFiles.add(i, partitionFile);
	}

	env.addSource(new RandomTupleSource(numEventsPerInstance, numUniqueKeys))
		.keyBy(0)
		.addSink(new ToPartitionFileSink(partitionFiles));

	env.execute();

	DataStreamUtils.reinterpretAsKeyedStream(
		env.addSource(new FromPartitionFileSource(partitionFiles)),
		(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
		TypeInformation.of(Integer.class))
		.timeWindow(Time.seconds(1)) // test that also timers and aggregated state work as expected
		.reduce((ReduceFunction<Tuple2<Integer, Integer>>) (value1, value2) ->
			new Tuple2<>(value1.f0, value1.f1 + value2.f1))
		.addSink(new ValidatingSink(numTotalEvents)).setParallelism(1);

	env.execute();
}
 
Example 7
Source File: ReinterpretDataStreamAsKeyedStreamITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * This test checks that reinterpreting a data stream to a keyed stream works as expected. This test consists of
 * two jobs. The first job materializes a keyBy into files, one files per partition. The second job opens the
 * files created by the first jobs as sources (doing the correct assignment of files to partitions) and
 * reinterprets the sources as keyed, because we know they have been partitioned in a keyBy from the first job.
 */
@Test
public void testReinterpretAsKeyedStream() throws Exception {

	final int maxParallelism = 8;
	final int numEventsPerInstance = 100;
	final int parallelism = 3;
	final int numTotalEvents = numEventsPerInstance * parallelism;
	final int numUniqueKeys = 100;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
	env.setMaxParallelism(maxParallelism);
	env.setParallelism(parallelism);
	env.enableCheckpointing(100);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));

	final List<File> partitionFiles = new ArrayList<>(parallelism);
	for (int i = 0; i < parallelism; ++i) {
		File partitionFile = temporaryFolder.newFile();
		partitionFiles.add(i, partitionFile);
	}

	env.addSource(new RandomTupleSource(numEventsPerInstance, numUniqueKeys))
		.keyBy(0)
		.addSink(new ToPartitionFileSink(partitionFiles));

	env.execute();

	DataStreamUtils.reinterpretAsKeyedStream(
		env.addSource(new FromPartitionFileSource(partitionFiles)),
		(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
		TypeInformation.of(Integer.class))
		.timeWindow(Time.seconds(1)) // test that also timers and aggregated state work as expected
		.reduce((ReduceFunction<Tuple2<Integer, Integer>>) (value1, value2) ->
			new Tuple2<>(value1.f0, value1.f1 + value2.f1))
		.addSink(new ValidatingSink(numTotalEvents)).setParallelism(1);

	env.execute();
}
 
Example 8
Source File: EventTimeWindowCheckpointingITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSlidingTimeWindow() {
	final int numElementsPerKey = numElementsPerKey();
	final int windowSize = windowSize();
	final int windowSlide = windowSlide();
	final int numKeys = numKeys();

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setMaxParallelism(2 * PARALLELISM);
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();
		env.setStateBackend(this.stateBackend);
		env.getConfig().setUseSnapshotCompression(true);

		env
				.addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS))
				.apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> values,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						int sum = 0;
						long key = -1;

						for (Tuple2<Long, IntType> value : values) {
							sum += value.f1.value;
							key = value.f0;
						}
						final Tuple4<Long, Long, Long, IntType> output =
							new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum));
						out.collect(output);
					}
				})
			.addSink(new ValidatingSink<>(
				new SinkValidatorUpdateFun(numElementsPerKey),
				new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 9
Source File: LegacyStatefulJobSavepointMigrationITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Manually run this to write binary snapshot data.
 */
@Test
@Ignore
public void writeSavepoint() throws Exception {

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (flinkGenerateSavepointBackendType) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(4);
	env.setMaxParallelism(4);

	env
		.addSource(new LegacyCheckpointedSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
		.flatMap(new LegacyCheckpointedFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
		.keyBy(0)
		.flatMap(new LegacyCheckpointedFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
		.keyBy(0)
		.flatMap(new KeyedStateSettingFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
		.keyBy(0)
		.transform(
			"custom_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new CheckpointedUdfOperator(new LegacyCheckpointedFlatMapWithKeyedState())).uid("LegacyCheckpointedOperator")
		.keyBy(0)
		.transform(
			"timely_stateful_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new TimelyStatefulOperator()).uid("TimelyStatefulOperator")
		.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>());

	executeAndSavepoint(
		env,
		"src/test/resources/" + getSavepointPath(flinkGenerateSavepointVersion, flinkGenerateSavepointBackendType),
		new Tuple2<>(AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
 
Example 10
Source File: AbstractFlinkClient.java    From alchemy with Apache License 2.0 4 votes vote down vote up
private void setBaseInfo(StreamExecutionEnvironment execEnv, SqlSubmitFlinkRequest request) {
    execEnv.setParallelism(request.getParallelism());
    if (request.getMaxParallelism() != null) {
        execEnv.setMaxParallelism(request.getMaxParallelism());
    }
    if (org.apache.commons.lang3.StringUtils.isNotEmpty(request.getTimeCharacteristic())) {
        execEnv.setStreamTimeCharacteristic(TimeCharacteristic.valueOf(request.getTimeCharacteristic()));
    } else {
        execEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    }
    if (request.getBufferTimeout() != null) {
        execEnv.setBufferTimeout(request.getBufferTimeout());
    }
    if (org.apache.commons.lang3.StringUtils.isNotEmpty(request.getRestartStrategies())) {
        String strategies = request.getRestartStrategies();
        com.dfire.platform.alchemy.common.RestartStrategies restartStrategies
            = com.dfire.platform.alchemy.common.RestartStrategies.valueOf(strategies.toUpperCase());
        Map<String, Object> restartParams = request.getRestartParams();
        switch (restartStrategies) {
            case NO:
                execEnv.setRestartStrategy(RestartStrategies.noRestart());
                break;
            case FIXED:
                int restartAttempts = restartParams == null ? Constants.RESTART_ATTEMPTS
                    : Integer.parseInt(restartParams.get(CONFIG_KEY_RESTART_ATTEMPTS).toString());
                long delayBetweenAttempts = restartParams == null ? Constants.DELAY_BETWEEN_ATTEMPTS
                    : Long.parseLong(restartParams.get(CONFIG_KEY_DELAY_BETWEEN_ATTEMPTS).toString());
                execEnv
                    .setRestartStrategy(RestartStrategies.fixedDelayRestart(restartAttempts, delayBetweenAttempts));
                break;
            case FAILURE:
                int failureRate = restartParams == null ? Constants.FAILURE_RATE
                    : Integer.parseInt(restartParams.get(CONFIG_KEY_FAILURE_RATE).toString());
                long failureInterval = restartParams == null ? Constants.FAILURE_INTERVAL
                    : Long.parseLong(restartParams.get(CONFIG_KEY_FAILURE_INTERVAL).toString());
                long delayInterval = restartParams == null ? Constants.DELAY_INTERVAL
                    : Long.parseLong(restartParams.get(CONFIG_KEY_DELAY_INTERVAL).toString());
                execEnv.setRestartStrategy(RestartStrategies.failureRateRestart(failureRate,
                    Time.of(failureInterval, TimeUnit.MILLISECONDS),
                    Time.of(delayInterval, TimeUnit.MILLISECONDS)));
                break;
            case FALLBACK:
                execEnv.setRestartStrategy(RestartStrategies.fallBackRestart());
                break;
            default:
        }
    }
    if (request.getCheckpointCfg() != null) {
        CheckpointConfig checkpointConfig = execEnv.getCheckpointConfig();
        BeanUtils.copyProperties(request.getCheckpointCfg(), checkpointConfig);
    }

}
 
Example 11
Source File: StatefulJobSavepointMigrationITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSavepoint() throws Exception {

	final int parallelism = 4;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (testStateBackend) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setMaxParallelism(parallelism);

	SourceFunction<Tuple2<Long, Long>> nonParallelSource;
	SourceFunction<Tuple2<Long, Long>> parallelSource;
	RichFlatMapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>> flatMap;
	OneInputStreamOperator<Tuple2<Long, Long>, Tuple2<Long, Long>> timelyOperator;

	if (executionMode == ExecutionMode.PERFORM_SAVEPOINT) {
		nonParallelSource = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
		parallelSource = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
		flatMap = new CheckpointingKeyedStateFlatMap();
		timelyOperator = new CheckpointingTimelyStatefulOperator();
	} else if (executionMode == ExecutionMode.VERIFY_SAVEPOINT) {
		nonParallelSource = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
		parallelSource = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
		flatMap = new CheckingKeyedStateFlatMap();
		timelyOperator = new CheckingTimelyStatefulOperator();
	} else {
		throw new IllegalStateException("Unknown ExecutionMode " + executionMode);
	}

	env
		.addSource(nonParallelSource).uid("CheckpointingSource1")
		.keyBy(0)
		.flatMap(flatMap).startNewChain().uid("CheckpointingKeyedStateFlatMap1")
		.keyBy(0)
		.transform(
			"timely_stateful_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			timelyOperator).uid("CheckpointingTimelyStatefulOperator1")
		.addSink(new MigrationTestUtils.AccumulatorCountingSink<>());

	env
		.addSource(parallelSource).uid("CheckpointingSource2")
		.keyBy(0)
		.flatMap(flatMap).startNewChain().uid("CheckpointingKeyedStateFlatMap2")
		.keyBy(0)
		.transform(
			"timely_stateful_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			timelyOperator).uid("CheckpointingTimelyStatefulOperator2")
		.addSink(new MigrationTestUtils.AccumulatorCountingSink<>());

	if (executionMode == ExecutionMode.PERFORM_SAVEPOINT) {
		executeAndSavepoint(
			env,
			"src/test/resources/" + getSavepointPath(testMigrateVersion, testStateBackend),
			new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2));
	} else {
		restoreAndExecute(
			env,
			getResourceFilename(getSavepointPath(testMigrateVersion, testStateBackend)),
			new Tuple2<>(MigrationTestUtils.CheckingNonParallelSourceWithListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, 1),
			new Tuple2<>(MigrationTestUtils.CheckingParallelSourceWithUnionListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, parallelism),
			new Tuple2<>(CheckingKeyedStateFlatMap.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2),
			new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESS_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2),
			new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_EVENT_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2),
			new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESSING_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2),
			new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2));
	}
}
 
Example 12
Source File: TypeSerializerSnapshotMigrationITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSavepoint() throws Exception {
	final int parallelism = 1;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (testStateBackend) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setMaxParallelism(parallelism);

	SourceFunction<Tuple2<Long, Long>> nonParallelSource =
		new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);

	env.addSource(nonParallelSource)
		.keyBy(0)
		.map(new TestMapFunction())
		.addSink(new MigrationTestUtils.AccumulatorCountingSink<>());

	if (executionMode == ExecutionMode.PERFORM_SAVEPOINT) {
		executeAndSavepoint(
			env,
			"src/test/resources/" + getSavepointPath(testMigrateVersion, testStateBackend),
			new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
	} else {
		restoreAndExecute(
			env,
			getResourceFilename(getSavepointPath(testMigrateVersion, testStateBackend)),
			new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
	}
}
 
Example 13
Source File: LegacyStatefulJobSavepointMigrationITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Manually run this to write binary snapshot data.
 */
@Test
@Ignore
public void writeSavepoint() throws Exception {

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (flinkGenerateSavepointBackendType) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(4);
	env.setMaxParallelism(4);

	env
		.addSource(new LegacyCheckpointedSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
		.flatMap(new LegacyCheckpointedFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
		.keyBy(0)
		.flatMap(new LegacyCheckpointedFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
		.keyBy(0)
		.flatMap(new KeyedStateSettingFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
		.keyBy(0)
		.transform(
			"custom_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new CheckpointedUdfOperator(new LegacyCheckpointedFlatMapWithKeyedState())).uid("LegacyCheckpointedOperator")
		.keyBy(0)
		.transform(
			"timely_stateful_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new TimelyStatefulOperator()).uid("TimelyStatefulOperator")
		.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>());

	executeAndSavepoint(
		env,
		"src/test/resources/" + getSavepointPath(flinkGenerateSavepointVersion, flinkGenerateSavepointBackendType),
		new Tuple2<>(AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
 
Example 14
Source File: LegacyStatefulJobSavepointMigrationITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSavepointRestore() throws Exception {

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (testStateBackend) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(4);
	env.setMaxParallelism(4);

	env
		.addSource(new CheckingRestoringSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
		.flatMap(new CheckingRestoringFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
		.keyBy(0)
		.flatMap(new CheckingRestoringFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
		.keyBy(0)
		.flatMap(new CheckingKeyedStateFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
		.keyBy(0)
		.transform(
			"custom_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new CheckingRestoringUdfOperator(new CheckingRestoringFlatMapWithKeyedStateInOperator())).uid("LegacyCheckpointedOperator")
		.keyBy(0)
		.transform(
			"timely_stateful_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new CheckingTimelyStatefulOperator()).uid("TimelyStatefulOperator")
		.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>());

	restoreAndExecute(
		env,
		getResourceFilename(getSavepointPath(testMigrateVersion, testStateBackend)),
		new Tuple2<>(CheckingRestoringSource.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, 1),
		new Tuple2<>(CheckingRestoringFlatMap.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingRestoringFlatMapWithKeyedState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingKeyedStateFlatMap.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingRestoringUdfOperator.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingRestoringFlatMapWithKeyedStateInOperator.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESS_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_EVENT_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESSING_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS),
		new Tuple2<>(AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
 
Example 15
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 4 votes vote down vote up
private static void setupParallelism(final StreamExecutionEnvironment env, final ParameterTool pt) {
	env.setParallelism(pt.getInt(ENVIRONMENT_PARALLELISM.key(), ENVIRONMENT_PARALLELISM.defaultValue()));
	env.setMaxParallelism(pt.getInt(ENVIRONMENT_MAX_PARALLELISM.key(), ENVIRONMENT_MAX_PARALLELISM.defaultValue()));
}
 
Example 16
Source File: EventTimeWindowCheckpointingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSlidingTimeWindow() {
	final int numElementsPerKey = numElementsPerKey();
	final int windowSize = windowSize();
	final int windowSlide = windowSlide();
	final int numKeys = numKeys();

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setMaxParallelism(2 * PARALLELISM);
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
		env.getConfig().disableSysoutLogging();
		env.setStateBackend(this.stateBackend);
		env.getConfig().setUseSnapshotCompression(true);

		env
				.addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS))
				.apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> values,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						int sum = 0;
						long key = -1;

						for (Tuple2<Long, IntType> value : values) {
							sum += value.f1.value;
							key = value.f0;
						}
						final Tuple4<Long, Long, Long, IntType> output =
							new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum));
						out.collect(output);
					}
				})
			.addSink(new ValidatingSink<>(
				new SinkValidatorUpdateFun(numElementsPerKey),
				new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 17
Source File: LegacyStatefulJobSavepointMigrationITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Manually run this to write binary snapshot data.
 */
@Test
@Ignore
public void writeSavepoint() throws Exception {

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (flinkGenerateSavepointBackendType) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(4);
	env.setMaxParallelism(4);

	env
		.addSource(new LegacyCheckpointedSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
		.flatMap(new LegacyCheckpointedFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
		.keyBy(0)
		.flatMap(new LegacyCheckpointedFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
		.keyBy(0)
		.flatMap(new KeyedStateSettingFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
		.keyBy(0)
		.transform(
			"custom_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new CheckpointedUdfOperator(new LegacyCheckpointedFlatMapWithKeyedState())).uid("LegacyCheckpointedOperator")
		.keyBy(0)
		.transform(
			"timely_stateful_operator",
			new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
			new TimelyStatefulOperator()).uid("TimelyStatefulOperator")
		.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>());

	executeAndSavepoint(
		env,
		"src/test/resources/" + getSavepointPath(flinkGenerateSavepointVersion, flinkGenerateSavepointBackendType),
		new Tuple2<>(AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
 
Example 18
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 4 votes vote down vote up
private static void setupParallelism(final StreamExecutionEnvironment env, final ParameterTool pt) {
	env.setParallelism(pt.getInt(ENVIRONMENT_PARALLELISM.key(), ENVIRONMENT_PARALLELISM.defaultValue()));
	env.setMaxParallelism(pt.getInt(ENVIRONMENT_MAX_PARALLELISM.key(), ENVIRONMENT_MAX_PARALLELISM.defaultValue()));
}
 
Example 19
Source File: TypeSerializerSnapshotMigrationITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSavepoint() throws Exception {
	final int parallelism = 1;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setRestartStrategy(RestartStrategies.noRestart());
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	switch (testStateBackend) {
		case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
			env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend()));
			break;
		case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
			env.setStateBackend(new MemoryStateBackend());
			break;
		default:
			throw new UnsupportedOperationException();
	}

	env.enableCheckpointing(500);
	env.setParallelism(parallelism);
	env.setMaxParallelism(parallelism);

	SourceFunction<Tuple2<Long, Long>> nonParallelSource =
		new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);

	env.addSource(nonParallelSource)
		.keyBy(0)
		.map(new TestMapFunction())
		.addSink(new MigrationTestUtils.AccumulatorCountingSink<>());

	if (executionMode == ExecutionMode.PERFORM_SAVEPOINT) {
		executeAndSavepoint(
			env,
			"src/test/resources/" + getSavepointPath(testMigrateVersion, testStateBackend),
			new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
	} else {
		restoreAndExecute(
			env,
			getResourceFilename(getSavepointPath(testMigrateVersion, testStateBackend)),
			new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
	}
}
 
Example 20
Source File: BenchmarkJob.java    From scotty-window-processor with Apache License 2.0 2 votes vote down vote up
public BenchmarkJob(List<Window> assigner, StreamExecutionEnvironment env, final long runtime,
					final int throughput, final List<Tuple2<Long, Long>> gaps) {


	Map<String, String> configMap = new HashMap<>();
	ParameterTool parameters = ParameterTool.fromMap(configMap);

	env.getConfig().setGlobalJobParameters(parameters);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);
	env.setMaxParallelism(1);


	KeyedScottyWindowOperator<Tuple, Tuple4<String, Integer, Long, Long>, Tuple4<String, Integer, Long, Long>> windowOperator =
			new KeyedScottyWindowOperator<>(new SumAggregation());

	for(Window w: assigner){
		windowOperator.addWindow(w);
	}


	DataStream<Tuple4<String, Integer, Long, Long>> messageStream = env
		.addSource(new de.tub.dima.scotty.flinkBenchmark.LoadGeneratorSource(runtime, throughput,  gaps));

	messageStream.flatMap(new de.tub.dima.scotty.flinkBenchmark.ThroughputLogger<>(200, throughput));



	final SingleOutputStreamOperator<Tuple4<String, Integer, Long, Long>> timestampsAndWatermarks = messageStream
		.assignTimestampsAndWatermarks(new TimestampsAndWatermarks());



	timestampsAndWatermarks
			.keyBy(0)
			.process(windowOperator)
			.addSink(new SinkFunction() {

				@Override
				public void invoke(final Object value) throws Exception {
					//System.out.println(value);
				}
			});

	try {
		env.execute();

	} catch (Exception e) {
		e.printStackTrace();
	}

}