org.apache.flink.runtime.executiongraph.restart.RestartStrategyFactory Java Examples

The following examples show how to use org.apache.flink.runtime.executiongraph.restart.RestartStrategyFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SchedulerNGFactoryFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
static SchedulerNGFactory createSchedulerNGFactory(
		final Configuration configuration,
		final RestartStrategyFactory restartStrategyFactory) {

	final String schedulerName = configuration.getString(JobManagerOptions.SCHEDULER);
	switch (schedulerName) {
		case "legacy":
			return new LegacySchedulerFactory(restartStrategyFactory);

		case "ng":
			return new DefaultSchedulerFactory();

		default:
			throw new IllegalArgumentException(String.format(
				"Illegal value [%s] for config option [%s]",
				schedulerName,
				JobManagerOptions.SCHEDULER.key()));
	}
}
 
Example #2
Source File: JobManagerSharedServices.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public JobManagerSharedServices(
		ScheduledExecutorService scheduledExecutorService,
		LibraryCacheManager libraryCacheManager,
		RestartStrategyFactory restartStrategyFactory,
		StackTraceSampleCoordinator stackTraceSampleCoordinator,
		BackPressureStatsTracker backPressureStatsTracker,
		@Nonnull BlobWriter blobWriter) {

	this.scheduledExecutorService = checkNotNull(scheduledExecutorService);
	this.libraryCacheManager = checkNotNull(libraryCacheManager);
	this.restartStrategyFactory = checkNotNull(restartStrategyFactory);
	this.stackTraceSampleCoordinator = checkNotNull(stackTraceSampleCoordinator);
	this.backPressureStatsTracker = checkNotNull(backPressureStatsTracker);
	this.blobWriter = blobWriter;
}
 
Example #3
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that in a streaming use case where checkpointing is enabled, a
 * fixed delay with Integer.MAX_VALUE retries is instantiated if no other restart
 * strategy has been specified.
 */
@Test
public void testAutomaticRestartingWhenCheckpointing() throws Exception {
	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(
		completedCheckpointStore,
		new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
	final JobMaster jobMaster = createJobMaster(
		new Configuration(),
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder()
			.setRestartStrategyFactory(RestartStrategyFactory.createRestartStrategyFactory(configuration))
			.build());

	RestartStrategy restartStrategy = jobMaster.getRestartStrategy();

	assertNotNull(restartStrategy);
	assertTrue(restartStrategy instanceof FixedDelayRestartStrategy);
}
 
Example #4
Source File: JobManagerSharedServices.java    From flink with Apache License 2.0 5 votes vote down vote up
public JobManagerSharedServices(
		ScheduledExecutorService scheduledExecutorService,
		LibraryCacheManager libraryCacheManager,
		RestartStrategyFactory restartStrategyFactory,
		StackTraceSampleCoordinator stackTraceSampleCoordinator,
		BackPressureStatsTracker backPressureStatsTracker,
		@Nonnull BlobWriter blobWriter) {

	this.scheduledExecutorService = checkNotNull(scheduledExecutorService);
	this.libraryCacheManager = checkNotNull(libraryCacheManager);
	this.restartStrategyFactory = checkNotNull(restartStrategyFactory);
	this.stackTraceSampleCoordinator = checkNotNull(stackTraceSampleCoordinator);
	this.backPressureStatsTracker = checkNotNull(backPressureStatsTracker);
	this.blobWriter = blobWriter;
}
 
Example #5
Source File: JobManagerSharedServices.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public RestartStrategyFactory getRestartStrategyFactory() {
	return restartStrategyFactory;
}
 
Example #6
Source File: JobManagerSharedServices.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static JobManagerSharedServices fromConfiguration(
		Configuration config,
		BlobServer blobServer) throws Exception {

	checkNotNull(config);
	checkNotNull(blobServer);

	final String classLoaderResolveOrder =
		config.getString(CoreOptions.CLASSLOADER_RESOLVE_ORDER);

	final String[] alwaysParentFirstLoaderPatterns = CoreOptions.getParentFirstLoaderPatterns(config);

	final BlobLibraryCacheManager libraryCacheManager =
		new BlobLibraryCacheManager(
			blobServer,
			FlinkUserCodeClassLoaders.ResolveOrder.fromString(classLoaderResolveOrder),
			alwaysParentFirstLoaderPatterns);

	final FiniteDuration timeout;
	try {
		timeout = AkkaUtils.getTimeout(config);
	} catch (NumberFormatException e) {
		throw new IllegalConfigurationException(AkkaUtils.formatDurationParsingErrorMessage());
	}

	final ScheduledExecutorService futureExecutor = Executors.newScheduledThreadPool(
			Hardware.getNumberCPUCores(),
			new ExecutorThreadFactory("jobmanager-future"));

	final StackTraceSampleCoordinator stackTraceSampleCoordinator =
		new StackTraceSampleCoordinator(futureExecutor, timeout.toMillis());
	final int cleanUpInterval = config.getInteger(WebOptions.BACKPRESSURE_CLEANUP_INTERVAL);
	final BackPressureStatsTrackerImpl backPressureStatsTracker = new BackPressureStatsTrackerImpl(
		stackTraceSampleCoordinator,
		cleanUpInterval,
		config.getInteger(WebOptions.BACKPRESSURE_NUM_SAMPLES),
		config.getInteger(WebOptions.BACKPRESSURE_REFRESH_INTERVAL),
		Time.milliseconds(config.getInteger(WebOptions.BACKPRESSURE_DELAY)));

	futureExecutor.scheduleWithFixedDelay(
		backPressureStatsTracker::cleanUpOperatorStatsCache,
		cleanUpInterval,
		cleanUpInterval,
		TimeUnit.MILLISECONDS);

	return new JobManagerSharedServices(
		futureExecutor,
		libraryCacheManager,
		RestartStrategyFactory.createRestartStrategyFactory(config),
		stackTraceSampleCoordinator,
		backPressureStatsTracker,
		blobServer);
}
 
Example #7
Source File: TestingJobManagerSharedServicesBuilder.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public TestingJobManagerSharedServicesBuilder setRestartStrategyFactory(RestartStrategyFactory restartStrategyFactory) {
	this.restartStrategyFactory = restartStrategyFactory;
	return this;
}
 
Example #8
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testRequestNextInputSplit() throws Exception {
	final List<TestingInputSplit> expectedInputSplits = Arrays.asList(
		new TestingInputSplit(1),
		new TestingInputSplit(42),
		new TestingInputSplit(1337));

	// build one node JobGraph
	InputSplitSource<TestingInputSplit> inputSplitSource = new TestingInputSplitSource(expectedInputSplits);

	JobVertex source = new JobVertex("vertex1");
	source.setParallelism(1);
	source.setInputSplitSource(inputSplitSource);
	source.setInvokableClass(AbstractInvokable.class);

	final JobGraph testJobGraph = new JobGraph(source);
	testJobGraph.setAllowQueuedScheduling(true);

	configuration.setLong(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_ATTEMPTS, 1);
	configuration.setString(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_DELAY, "0 s");

	final JobManagerSharedServices jobManagerSharedServices =
		new TestingJobManagerSharedServicesBuilder()
			.setRestartStrategyFactory(RestartStrategyFactory.createRestartStrategyFactory(configuration))
			.build();

	final JobMaster jobMaster = createJobMaster(
		configuration,
		testJobGraph,
		haServices,
		jobManagerSharedServices);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		ExecutionGraph eg = jobMaster.getExecutionGraph();

		ExecutionVertex ev = eg.getAllExecutionVertices().iterator().next();

		final SupplierWithException<SerializedInputSplit, Exception> inputSplitSupplier = () -> jobMasterGateway.requestNextInputSplit(
			source.getID(),
			ev.getCurrentExecutionAttempt().getAttemptId()).get();

		List<InputSplit> actualInputSplits = getInputSplits(
			expectedInputSplits.size(),
			inputSplitSupplier);

		final Matcher<Iterable<? extends InputSplit>> expectedInputSplitsMatcher = containsInAnyOrder(expectedInputSplits.toArray(EMPTY_TESTING_INPUT_SPLITS));
		assertThat(actualInputSplits, expectedInputSplitsMatcher);

		final long maxWaitMillis = 2000L;
		ExecutionGraphTestUtils.waitUntilExecutionVertexState(ev, ExecutionState.SCHEDULED, maxWaitMillis);

		CompletableFuture.runAsync(() -> eg.failGlobal(new Exception("Testing exception")), eg.getJobMasterMainThreadExecutor()).get();

		ExecutionGraphTestUtils.waitUntilExecutionVertexState(ev, ExecutionState.SCHEDULED, maxWaitMillis);

		actualInputSplits = getInputSplits(
			expectedInputSplits.size(),
			inputSplitSupplier);

		assertThat(actualInputSplits, expectedInputSplitsMatcher);
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #9
Source File: JobManagerSharedServices.java    From flink with Apache License 2.0 4 votes vote down vote up
public RestartStrategyFactory getRestartStrategyFactory() {
	return restartStrategyFactory;
}
 
Example #10
Source File: JobManagerSharedServices.java    From flink with Apache License 2.0 4 votes vote down vote up
public static JobManagerSharedServices fromConfiguration(
		Configuration config,
		BlobServer blobServer) throws Exception {

	checkNotNull(config);
	checkNotNull(blobServer);

	final String classLoaderResolveOrder =
		config.getString(CoreOptions.CLASSLOADER_RESOLVE_ORDER);

	final String[] alwaysParentFirstLoaderPatterns = CoreOptions.getParentFirstLoaderPatterns(config);

	final BlobLibraryCacheManager libraryCacheManager =
		new BlobLibraryCacheManager(
			blobServer,
			FlinkUserCodeClassLoaders.ResolveOrder.fromString(classLoaderResolveOrder),
			alwaysParentFirstLoaderPatterns);

	final FiniteDuration timeout;
	try {
		timeout = AkkaUtils.getTimeout(config);
	} catch (NumberFormatException e) {
		throw new IllegalConfigurationException(AkkaUtils.formatDurationParsingErrorMessage());
	}

	final ScheduledExecutorService futureExecutor = Executors.newScheduledThreadPool(
			Hardware.getNumberCPUCores(),
			new ExecutorThreadFactory("jobmanager-future"));

	final StackTraceSampleCoordinator stackTraceSampleCoordinator =
		new StackTraceSampleCoordinator(futureExecutor, timeout.toMillis());
	final int cleanUpInterval = config.getInteger(WebOptions.BACKPRESSURE_CLEANUP_INTERVAL);
	final BackPressureStatsTrackerImpl backPressureStatsTracker = new BackPressureStatsTrackerImpl(
		stackTraceSampleCoordinator,
		cleanUpInterval,
		config.getInteger(WebOptions.BACKPRESSURE_NUM_SAMPLES),
		config.getInteger(WebOptions.BACKPRESSURE_REFRESH_INTERVAL),
		Time.milliseconds(config.getInteger(WebOptions.BACKPRESSURE_DELAY)));

	futureExecutor.scheduleWithFixedDelay(
		backPressureStatsTracker::cleanUpOperatorStatsCache,
		cleanUpInterval,
		cleanUpInterval,
		TimeUnit.MILLISECONDS);

	return new JobManagerSharedServices(
		futureExecutor,
		libraryCacheManager,
		RestartStrategyFactory.createRestartStrategyFactory(config),
		stackTraceSampleCoordinator,
		backPressureStatsTracker,
		blobServer);
}
 
Example #11
Source File: LegacySchedulerFactory.java    From flink with Apache License 2.0 4 votes vote down vote up
public LegacySchedulerFactory(final RestartStrategyFactory restartStrategyFactory) {
	this.restartStrategyFactory = checkNotNull(restartStrategyFactory);
}
 
Example #12
Source File: LegacyScheduler.java    From flink with Apache License 2.0 4 votes vote down vote up
public LegacyScheduler(
		final Logger log,
		final JobGraph jobGraph,
		final BackPressureStatsTracker backPressureStatsTracker,
		final Executor ioExecutor,
		final Configuration jobMasterConfiguration,
		final SlotProvider slotProvider,
		final ScheduledExecutorService futureExecutor,
		final ClassLoader userCodeLoader,
		final CheckpointRecoveryFactory checkpointRecoveryFactory,
		final Time rpcTimeout,
		final RestartStrategyFactory restartStrategyFactory,
		final BlobWriter blobWriter,
		final JobManagerJobMetricGroup jobManagerJobMetricGroup,
		final Time slotRequestTimeout,
		final ShuffleMaster<?> shuffleMaster,
		final PartitionTracker partitionTracker) throws Exception {

	this.log = checkNotNull(log);
	this.jobGraph = checkNotNull(jobGraph);
	this.backPressureStatsTracker = checkNotNull(backPressureStatsTracker);
	this.ioExecutor = checkNotNull(ioExecutor);
	this.jobMasterConfiguration = checkNotNull(jobMasterConfiguration);
	this.slotProvider = checkNotNull(slotProvider);
	this.futureExecutor = checkNotNull(futureExecutor);
	this.userCodeLoader = checkNotNull(userCodeLoader);
	this.checkpointRecoveryFactory = checkNotNull(checkpointRecoveryFactory);
	this.rpcTimeout = checkNotNull(rpcTimeout);

	final RestartStrategies.RestartStrategyConfiguration restartStrategyConfiguration =
		jobGraph.getSerializedExecutionConfig()
			.deserializeValue(userCodeLoader)
			.getRestartStrategy();

	this.restartStrategy = RestartStrategyResolving.resolve(restartStrategyConfiguration,
		restartStrategyFactory,
		jobGraph.isCheckpointingEnabled());

	log.info("Using restart strategy {} for {} ({}).", this.restartStrategy, jobGraph.getName(), jobGraph.getJobID());

	this.blobWriter = checkNotNull(blobWriter);
	this.slotRequestTimeout = checkNotNull(slotRequestTimeout);

	this.executionGraph = createAndRestoreExecutionGraph(jobManagerJobMetricGroup, checkNotNull(shuffleMaster), checkNotNull(partitionTracker));
}
 
Example #13
Source File: TestingJobManagerSharedServicesBuilder.java    From flink with Apache License 2.0 4 votes vote down vote up
public TestingJobManagerSharedServicesBuilder setRestartStrategyFactory(RestartStrategyFactory restartStrategyFactory) {
	this.restartStrategyFactory = restartStrategyFactory;
	return this;
}
 
Example #14
Source File: SchedulerBase.java    From flink with Apache License 2.0 4 votes vote down vote up
public SchedulerBase(
	final Logger log,
	final JobGraph jobGraph,
	final BackPressureStatsTracker backPressureStatsTracker,
	final Executor ioExecutor,
	final Configuration jobMasterConfiguration,
	final SlotProvider slotProvider,
	final ScheduledExecutorService futureExecutor,
	final ClassLoader userCodeLoader,
	final CheckpointRecoveryFactory checkpointRecoveryFactory,
	final Time rpcTimeout,
	final RestartStrategyFactory restartStrategyFactory,
	final BlobWriter blobWriter,
	final JobManagerJobMetricGroup jobManagerJobMetricGroup,
	final Time slotRequestTimeout,
	final ShuffleMaster<?> shuffleMaster,
	final JobMasterPartitionTracker partitionTracker,
	final ExecutionVertexVersioner executionVertexVersioner,
	final boolean legacyScheduling) throws Exception {

	this.log = checkNotNull(log);
	this.jobGraph = checkNotNull(jobGraph);
	this.backPressureStatsTracker = checkNotNull(backPressureStatsTracker);
	this.ioExecutor = checkNotNull(ioExecutor);
	this.jobMasterConfiguration = checkNotNull(jobMasterConfiguration);
	this.slotProvider = checkNotNull(slotProvider);
	this.futureExecutor = checkNotNull(futureExecutor);
	this.userCodeLoader = checkNotNull(userCodeLoader);
	this.checkpointRecoveryFactory = checkNotNull(checkpointRecoveryFactory);
	this.rpcTimeout = checkNotNull(rpcTimeout);

	final RestartStrategies.RestartStrategyConfiguration restartStrategyConfiguration =
		jobGraph.getSerializedExecutionConfig()
			.deserializeValue(userCodeLoader)
			.getRestartStrategy();

	this.restartStrategy = RestartStrategyResolving.resolve(restartStrategyConfiguration,
		restartStrategyFactory,
		jobGraph.isCheckpointingEnabled());

	if (legacyScheduling) {
		log.info("Using restart strategy {} for {} ({}).", this.restartStrategy, jobGraph.getName(), jobGraph.getJobID());
	}

	this.blobWriter = checkNotNull(blobWriter);
	this.jobManagerJobMetricGroup = checkNotNull(jobManagerJobMetricGroup);
	this.slotRequestTimeout = checkNotNull(slotRequestTimeout);
	this.executionVertexVersioner = checkNotNull(executionVertexVersioner);
	this.legacyScheduling = legacyScheduling;

	this.executionGraph = createAndRestoreExecutionGraph(jobManagerJobMetricGroup, checkNotNull(shuffleMaster), checkNotNull(partitionTracker));
	this.schedulingTopology = executionGraph.getSchedulingTopology();

	final StateLocationRetriever stateLocationRetriever =
		executionVertexId -> getExecutionVertex(executionVertexId).getPreferredLocationBasedOnState();
	final InputsLocationsRetriever inputsLocationsRetriever = new ExecutionGraphToInputsLocationsRetrieverAdapter(executionGraph);
	this.preferredLocationsRetriever = new DefaultPreferredLocationsRetriever(stateLocationRetriever, inputsLocationsRetriever);

	this.coordinatorMap = createCoordinatorMap();
}