org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration Java Examples

The following examples show how to use org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CheckpointCoordinatorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that no minimum delay between savepoints is enforced.
 */
@Test
public void testMinDelayBetweenSavepoints() throws Exception {
	CheckpointCoordinatorConfiguration chkConfig =
		new CheckpointCoordinatorConfigurationBuilder()
			.setMinPauseBetweenCheckpoints(100000000L) // very long min delay => should not affect savepoints
			.setMaxConcurrentCheckpoints(1)
			.build();
	CheckpointCoordinator coord =
		new CheckpointCoordinatorBuilder()
			.setCheckpointCoordinatorConfiguration(chkConfig)
			.setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2))
			.setTimer(manuallyTriggeredScheduledExecutor)
			.build();

	String savepointDir = tmpFolder.newFolder().getAbsolutePath();

	CompletableFuture<CompletedCheckpoint> savepoint0 = coord.triggerSavepoint(savepointDir);
	assertFalse("Did not trigger savepoint", savepoint0.isDone());

	CompletableFuture<CompletedCheckpoint> savepoint1 = coord.triggerSavepoint(savepointDir);
	assertFalse("Did not trigger savepoint", savepoint1.isDone());
}
 
Example #2
Source File: JobGraphTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private static JobCheckpointingSettings createCheckpointSettingsWithInterval(final long checkpointInterval) {
	final CheckpointCoordinatorConfiguration checkpointCoordinatorConfiguration = new CheckpointCoordinatorConfiguration(
		checkpointInterval,
		Long.MAX_VALUE,
		Long.MAX_VALUE,
		Integer.MAX_VALUE,
		CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
		true,
		false,
		false,
		0);

	return new JobCheckpointingSettings(
		Collections.emptyList(),
		Collections.emptyList(),
		Collections.emptyList(),
		checkpointCoordinatorConfiguration,
		null);
}
 
Example #3
Source File: CheckpointCoordinatorMasterHooksTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private static CheckpointCoordinator instantiateCheckpointCoordinator(JobID jid, ExecutionVertex... ackVertices) {
	CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration(
		10000000L,
		600000L,
		0L,
		1,
		CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
		true,
		false,
		0);
	return new CheckpointCoordinator(
			jid,
			chkConfig,
			new ExecutionVertex[0],
			ackVertices,
			new ExecutionVertex[0],
			new StandaloneCheckpointIDCounter(),
			new StandaloneCompletedCheckpointStore(10),
			new MemoryStateBackend(),
			Executors.directExecutor(),
			SharedStateRegistry.DEFAULT_FACTORY,
			new CheckpointFailureManager(
				0,
				NoOpFailJobCall.INSTANCE));
}
 
Example #4
Source File: CheckpointConfigHandler.java    From flink with Apache License 2.0 6 votes vote down vote up
private static CheckpointConfigInfo createCheckpointConfigInfo(AccessExecutionGraph executionGraph) throws RestHandlerException {
	final CheckpointCoordinatorConfiguration checkpointCoordinatorConfiguration = executionGraph.getCheckpointCoordinatorConfiguration();

	if (checkpointCoordinatorConfiguration == null) {
		throw new RestHandlerException(
			"Checkpointing is not enabled for this job (" + executionGraph.getJobID() + ").",
			HttpResponseStatus.NOT_FOUND);
	} else {
		CheckpointRetentionPolicy retentionPolicy = checkpointCoordinatorConfiguration.getCheckpointRetentionPolicy();

		CheckpointConfigInfo.ExternalizedCheckpointInfo externalizedCheckpointInfo = new CheckpointConfigInfo.ExternalizedCheckpointInfo(
				retentionPolicy != CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
				retentionPolicy != CheckpointRetentionPolicy.RETAIN_ON_CANCELLATION);

		return new CheckpointConfigInfo(
			checkpointCoordinatorConfiguration.isExactlyOnce() ? CheckpointConfigInfo.ProcessingMode.EXACTLY_ONCE : CheckpointConfigInfo.ProcessingMode.AT_LEAST_ONCE,
			checkpointCoordinatorConfiguration.getCheckpointInterval(),
			checkpointCoordinatorConfiguration.getCheckpointTimeout(),
			checkpointCoordinatorConfiguration.getMinPauseBetweenCheckpoints(),
			checkpointCoordinatorConfiguration.getMaxConcurrentCheckpoints(),
			externalizedCheckpointInfo);
	}
}
 
Example #5
Source File: FailoverRegionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void enableCheckpointing(ExecutionGraph eg) {
	ArrayList<ExecutionJobVertex> jobVertices = new ArrayList<>(eg.getAllVertices().values());
	CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration(
		1000,
		100,
		0,
		1,
		CheckpointRetentionPolicy.RETAIN_ON_CANCELLATION,
		true,
		false,
		0);
	eg.enableCheckpointing(
			chkConfig,
			jobVertices,
			jobVertices,
			jobVertices,
			Collections.emptyList(),
			new StandaloneCheckpointIDCounter(),
			new StandaloneCompletedCheckpointStore(1),
			new MemoryStateBackend(),
			new CheckpointStatsTracker(
				0,
				jobVertices,
				mock(CheckpointCoordinatorConfiguration.class),
				new UnregisteredMetricsGroup()));
}
 
Example #6
Source File: CheckpointConfigHandler.java    From flink with Apache License 2.0 6 votes vote down vote up
private static CheckpointConfigInfo createCheckpointConfigInfo(AccessExecutionGraph executionGraph) throws RestHandlerException {
	final CheckpointCoordinatorConfiguration checkpointCoordinatorConfiguration = executionGraph.getCheckpointCoordinatorConfiguration();

	if (checkpointCoordinatorConfiguration == null) {
		throw new RestHandlerException(
			"Checkpointing is not enabled for this job (" + executionGraph.getJobID() + ").",
			HttpResponseStatus.NOT_FOUND);
	} else {
		CheckpointRetentionPolicy retentionPolicy = checkpointCoordinatorConfiguration.getCheckpointRetentionPolicy();

		CheckpointConfigInfo.ExternalizedCheckpointInfo externalizedCheckpointInfo = new CheckpointConfigInfo.ExternalizedCheckpointInfo(
				retentionPolicy != CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
				retentionPolicy != CheckpointRetentionPolicy.RETAIN_ON_CANCELLATION);

		String stateBackendName = executionGraph.getStateBackendName().orElse(null);

		return new CheckpointConfigInfo(
			checkpointCoordinatorConfiguration.isExactlyOnce() ? CheckpointConfigInfo.ProcessingMode.EXACTLY_ONCE : CheckpointConfigInfo.ProcessingMode.AT_LEAST_ONCE,
			checkpointCoordinatorConfiguration.getCheckpointInterval(),
			checkpointCoordinatorConfiguration.getCheckpointTimeout(),
			checkpointCoordinatorConfiguration.getMinPauseBetweenCheckpoints(),
			checkpointCoordinatorConfiguration.getMaxConcurrentCheckpoints(),
			externalizedCheckpointInfo,
			stateBackendName);
	}
}
 
Example #7
Source File: JobMasterTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Nonnull
private JobGraph createJobGraphFromJobVerticesWithCheckpointing(SavepointRestoreSettings savepointRestoreSettings, JobVertex... jobVertices) {
	final JobGraph jobGraph = new JobGraph(jobVertices);

	// enable checkpointing which is required to resume from a savepoint
	final CheckpointCoordinatorConfiguration checkpoinCoordinatorConfiguration = new CheckpointCoordinatorConfiguration(
		1000L,
		1000L,
		1000L,
		1,
		CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
		true,
		false,
		false,
		0);
	final JobCheckpointingSettings checkpointingSettings = new JobCheckpointingSettings(
		Collections.emptyList(),
		Collections.emptyList(),
		Collections.emptyList(),
		checkpoinCoordinatorConfiguration,
		null);
	jobGraph.setSnapshotSettings(checkpointingSettings);
	jobGraph.setSavepointRestoreSettings(savepointRestoreSettings);

	return jobGraph;
}
 
Example #8
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Nonnull
private JobGraph createJobGraphFromJobVerticesWithCheckpointing(SavepointRestoreSettings savepointRestoreSettings, JobVertex... jobVertices) {
	final JobGraph jobGraph = new JobGraph(jobVertices);

	// enable checkpointing which is required to resume from a savepoint
	final CheckpointCoordinatorConfiguration checkpoinCoordinatorConfiguration = new CheckpointCoordinatorConfiguration(
		1000L,
		1000L,
		1000L,
		1,
		CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
		true);
	final JobCheckpointingSettings checkpointingSettings = new JobCheckpointingSettings(
		Collections.emptyList(),
		Collections.emptyList(),
		Collections.emptyList(),
		checkpoinCoordinatorConfiguration,
		null);
	jobGraph.setSnapshotSettings(checkpointingSettings);
	jobGraph.setSavepointRestoreSettings(savepointRestoreSettings);

	return jobGraph;
}
 
Example #9
Source File: CheckpointStatsTrackerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a "disabled" checkpoint tracker for tests.
 */
static CheckpointStatsTracker createTestTracker() {
	ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
	when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
	when(jobVertex.getParallelism()).thenReturn(1);

	return new CheckpointStatsTracker(
		0,
		Collections.singletonList(jobVertex),
		mock(CheckpointCoordinatorConfiguration.class),
		new UnregisteredMetricsGroup());
}
 
Example #10
Source File: ArchivedExecutionGraph.java    From flink with Apache License 2.0 5 votes vote down vote up
public ArchivedExecutionGraph(
		JobID jobID,
		String jobName,
		Map<JobVertexID, ArchivedExecutionJobVertex> tasks,
		List<ArchivedExecutionJobVertex> verticesInCreationOrder,
		long[] stateTimestamps,
		JobStatus state,
		@Nullable ErrorInfo failureCause,
		String jsonPlan,
		StringifiedAccumulatorResult[] archivedUserAccumulators,
		Map<String, SerializedValue<OptionalFailure<Object>>> serializedUserAccumulators,
		ArchivedExecutionConfig executionConfig,
		boolean isStoppable,
		@Nullable CheckpointCoordinatorConfiguration jobCheckpointingConfiguration,
		@Nullable CheckpointStatsSnapshot checkpointStatsSnapshot) {

	this.jobID = Preconditions.checkNotNull(jobID);
	this.jobName = Preconditions.checkNotNull(jobName);
	this.tasks = Preconditions.checkNotNull(tasks);
	this.verticesInCreationOrder = Preconditions.checkNotNull(verticesInCreationOrder);
	this.stateTimestamps = Preconditions.checkNotNull(stateTimestamps);
	this.state = Preconditions.checkNotNull(state);
	this.failureCause = failureCause;
	this.jsonPlan = Preconditions.checkNotNull(jsonPlan);
	this.archivedUserAccumulators = Preconditions.checkNotNull(archivedUserAccumulators);
	this.serializedUserAccumulators = Preconditions.checkNotNull(serializedUserAccumulators);
	this.archivedExecutionConfig = Preconditions.checkNotNull(executionConfig);
	this.isStoppable = isStoppable;
	this.jobCheckpointingConfiguration = jobCheckpointingConfiguration;
	this.checkpointStatsSnapshot = checkpointStatsSnapshot;
}
 
Example #11
Source File: CheckpointStatsTrackerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests access to the snapshotting settings.
 */
@Test
public void testGetSnapshottingSettings() throws Exception {
	ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
	when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
	when(jobVertex.getParallelism()).thenReturn(1);

	JobCheckpointingSettings snapshottingSettings = new JobCheckpointingSettings(
		Collections.singletonList(new JobVertexID()),
		Collections.singletonList(new JobVertexID()),
		Collections.singletonList(new JobVertexID()),
		new CheckpointCoordinatorConfiguration(
			181238123L,
			19191992L,
			191929L,
			123,
			CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
			false
		),
		null);

	CheckpointStatsTracker tracker = new CheckpointStatsTracker(
		0,
		Collections.singletonList(jobVertex),
		snapshottingSettings.getCheckpointCoordinatorConfiguration(),
		new UnregisteredMetricsGroup());

	assertEquals(snapshottingSettings.getCheckpointCoordinatorConfiguration(), tracker.getJobCheckpointingConfiguration());
}
 
Example #12
Source File: CheckpointCoordinatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private CheckpointCoordinator getCheckpointCoordinator(
	JobID jobId,
	ExecutionVertex vertex1,
	ExecutionVertex vertex2) {

	return new CheckpointCoordinatorBuilder()
		.setJobId(jobId)
		.setTasks(new ExecutionVertex[]{ vertex1, vertex2 })
		.setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build())
		.setTimer(manuallyTriggeredScheduledExecutor)
		.build();
}
 
Example #13
Source File: ExecutionGraphDeploymentTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private ExecutionGraph createExecutionGraph(Configuration configuration) throws Exception {
	final ScheduledExecutorService executor = TestingUtils.defaultExecutor();

	final JobID jobId = new JobID();
	final JobGraph jobGraph = new JobGraph(jobId, "test");
	jobGraph.setSnapshotSettings(
		new JobCheckpointingSettings(
			Collections.<JobVertexID>emptyList(),
			Collections.<JobVertexID>emptyList(),
			Collections.<JobVertexID>emptyList(),
			new CheckpointCoordinatorConfiguration(
				100,
				10 * 60 * 1000,
				0,
				1,
				CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
				false),
			null));

	final Time timeout = Time.seconds(10L);
	return ExecutionGraphBuilder.buildGraph(
		null,
		jobGraph,
		configuration,
		executor,
		executor,
		new ProgrammedSlotProvider(1),
		getClass().getClassLoader(),
		new StandaloneCheckpointRecoveryFactory(),
		timeout,
		new NoRestartStrategy(),
		new UnregisteredMetricsGroup(),
		1,
		blobWriter,
		timeout,
		LoggerFactory.getLogger(getClass()));
}
 
Example #14
Source File: ExecutionGraph.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public CheckpointCoordinatorConfiguration getCheckpointCoordinatorConfiguration() {
	if (checkpointStatsTracker != null) {
		return checkpointStatsTracker.getJobCheckpointingConfiguration();
	} else {
		return null;
	}
}
 
Example #15
Source File: CheckpointCoordinatorTriggeringTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * This test only fails eventually.
 */
@Test
public void discardingTriggeringCheckpointWillExecuteNextCheckpointRequest() throws Exception {
	final ExecutionVertex executionVertex = mockExecutionVertex(new ExecutionAttemptID());

	final ScheduledExecutorService scheduledExecutorService = Executors.newSingleThreadScheduledExecutor();
	final CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder()
		.setTasks(new ExecutionVertex[]{executionVertex})
		.setTimer(new ScheduledExecutorServiceAdapter(scheduledExecutorService))
		.setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder()
			.build())
		.build();

	final CompletableFuture<String> masterHookCheckpointFuture = new CompletableFuture<>();
	final OneShotLatch triggerCheckpointLatch = new OneShotLatch();
	checkpointCoordinator.addMasterHook(new TestingMasterHook(masterHookCheckpointFuture, triggerCheckpointLatch));

	try {
		checkpointCoordinator.triggerCheckpoint(false);
		final CompletableFuture<CompletedCheckpoint> secondCheckpoint = checkpointCoordinator.triggerCheckpoint(false);

		triggerCheckpointLatch.await();
		masterHookCheckpointFuture.complete("Completed");

		// discard triggering checkpoint
		checkpointCoordinator.abortPendingCheckpoints(new CheckpointException(CheckpointFailureReason.CHECKPOINT_DECLINED));

		try {
			// verify that the second checkpoint request will be executed and eventually times out
			secondCheckpoint.get();
			fail("Expected the second checkpoint to fail.");
		} catch (ExecutionException ee) {
			assertThat(ExceptionUtils.stripExecutionException(ee), instanceOf(CheckpointException.class));
		}
	} finally {
		checkpointCoordinator.shutdown(JobStatus.FINISHED);
		ExecutorUtils.gracefulShutdown(10L, TimeUnit.SECONDS, scheduledExecutorService);
	}
}
 
Example #16
Source File: CheckpointStatsTrackerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests access to the snapshotting settings.
 */
@Test
public void testGetSnapshottingSettings() throws Exception {
	ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
	when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
	when(jobVertex.getParallelism()).thenReturn(1);

	JobCheckpointingSettings snapshottingSettings = new JobCheckpointingSettings(
		Collections.singletonList(new JobVertexID()),
		Collections.singletonList(new JobVertexID()),
		Collections.singletonList(new JobVertexID()),
		new CheckpointCoordinatorConfiguration(
			181238123L,
			19191992L,
			191929L,
			123,
			CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
			false,
			false,
			false,
			0
		),
		null);

	CheckpointStatsTracker tracker = new CheckpointStatsTracker(
		0,
		Collections.singletonList(jobVertex),
		snapshottingSettings.getCheckpointCoordinatorConfiguration(),
		new UnregisteredMetricsGroup());

	assertEquals(snapshottingSettings.getCheckpointCoordinatorConfiguration(), tracker.getJobCheckpointingConfiguration());
}
 
Example #17
Source File: ArchivedExecutionGraph.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public ArchivedExecutionGraph(
		JobID jobID,
		String jobName,
		Map<JobVertexID, ArchivedExecutionJobVertex> tasks,
		List<ArchivedExecutionJobVertex> verticesInCreationOrder,
		long[] stateTimestamps,
		JobStatus state,
		@Nullable ErrorInfo failureCause,
		String jsonPlan,
		StringifiedAccumulatorResult[] archivedUserAccumulators,
		Map<String, SerializedValue<OptionalFailure<Object>>> serializedUserAccumulators,
		ArchivedExecutionConfig executionConfig,
		boolean isStoppable,
		@Nullable CheckpointCoordinatorConfiguration jobCheckpointingConfiguration,
		@Nullable CheckpointStatsSnapshot checkpointStatsSnapshot) {

	this.jobID = Preconditions.checkNotNull(jobID);
	this.jobName = Preconditions.checkNotNull(jobName);
	this.tasks = Preconditions.checkNotNull(tasks);
	this.verticesInCreationOrder = Preconditions.checkNotNull(verticesInCreationOrder);
	this.stateTimestamps = Preconditions.checkNotNull(stateTimestamps);
	this.state = Preconditions.checkNotNull(state);
	this.failureCause = failureCause;
	this.jsonPlan = Preconditions.checkNotNull(jsonPlan);
	this.archivedUserAccumulators = Preconditions.checkNotNull(archivedUserAccumulators);
	this.serializedUserAccumulators = Preconditions.checkNotNull(serializedUserAccumulators);
	this.archivedExecutionConfig = Preconditions.checkNotNull(executionConfig);
	this.isStoppable = isStoppable;
	this.jobCheckpointingConfiguration = jobCheckpointingConfiguration;
	this.checkpointStatsSnapshot = checkpointStatsSnapshot;
}
 
Example #18
Source File: CheckpointCoordinatorMasterHooksTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private CheckpointCoordinator instantiateCheckpointCoordinator(
	JobID jid,
	ScheduledExecutor testingScheduledExecutor,
	ExecutionVertex... ackVertices) {

	CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration(
		10000000L,
		600000L,
		0L,
		1,
		CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
		true,
		false,
		false,
		0);
	return new CheckpointCoordinator(
			jid,
			chkConfig,
			new ExecutionVertex[0],
			ackVertices,
			new ExecutionVertex[0],
			Collections.emptyList(),
			new StandaloneCheckpointIDCounter(),
			new StandaloneCompletedCheckpointStore(10),
			new MemoryStateBackend(),
			Executors.directExecutor(),
			testingScheduledExecutor,
			SharedStateRegistry.DEFAULT_FACTORY,
			new CheckpointFailureManager(
				0,
				NoOpFailJobCall.INSTANCE));
}
 
Example #19
Source File: CheckpointStatsTracker.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new checkpoint stats tracker.
 *
 * @param numRememberedCheckpoints Maximum number of checkpoints to remember, including in progress ones.
 * @param jobVertices Job vertices involved in the checkpoints.
 * @param jobCheckpointingConfiguration Checkpointing configuration.
 * @param metricGroup Metric group for exposed metrics
 */
public CheckpointStatsTracker(
	int numRememberedCheckpoints,
	List<ExecutionJobVertex> jobVertices,
	CheckpointCoordinatorConfiguration jobCheckpointingConfiguration,
	MetricGroup metricGroup) {

	checkArgument(numRememberedCheckpoints >= 0, "Negative number of remembered checkpoints");
	this.history = new CheckpointStatsHistory(numRememberedCheckpoints);
	this.jobVertices = checkNotNull(jobVertices, "JobVertices");
	this.jobCheckpointingConfiguration = checkNotNull(jobCheckpointingConfiguration);

	// Compute the total subtask count. We do this here in order to only
	// do it once.
	int count = 0;
	for (ExecutionJobVertex vertex : jobVertices) {
		count += vertex.getParallelism();
	}
	this.totalSubtaskCount = count;

	// Latest snapshot is empty
	latestSnapshot = new CheckpointStatsSnapshot(
		counts.createSnapshot(),
		summary.createSnapshot(),
		history.createSnapshot(),
		null);

	// Register the metrics
	registerMetrics(metricGroup);
}
 
Example #20
Source File: SchedulerTestingUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void enableCheckpointing(final JobGraph jobGraph, @Nullable StateBackend stateBackend) {
	final List<JobVertexID> triggerVertices = new ArrayList<>();
	final List<JobVertexID> allVertices = new ArrayList<>();

	for (JobVertex vertex : jobGraph.getVertices()) {
		if (vertex.isInputVertex()) {
			triggerVertices.add(vertex.getID());
		}
		allVertices.add(vertex.getID());
	}

	final CheckpointCoordinatorConfiguration config = new CheckpointCoordinatorConfiguration(
		Long.MAX_VALUE, // disable periodical checkpointing
		DEFAULT_CHECKPOINT_TIMEOUT_MS,
		0,
		1,
		CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
		false,
		false,
		false,
		0);

	SerializedValue<StateBackend> serializedStateBackend = null;
	if (stateBackend != null) {
		try {
			serializedStateBackend = new SerializedValue<>(stateBackend);
		} catch (IOException e) {
			throw new RuntimeException("could not serialize state backend", e);
		}
	}

	jobGraph.setSnapshotSettings(new JobCheckpointingSettings(
			triggerVertices, allVertices, allVertices,
			config, serializedStateBackend));
}
 
Example #21
Source File: ExecutionGraph.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public CheckpointCoordinatorConfiguration getCheckpointCoordinatorConfiguration() {
	if (checkpointStatsTracker != null) {
		return checkpointStatsTracker.getJobCheckpointingConfiguration();
	} else {
		return null;
	}
}
 
Example #22
Source File: CheckpointStatsTracker.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new checkpoint stats tracker.
 *
 * @param numRememberedCheckpoints Maximum number of checkpoints to remember, including in progress ones.
 * @param jobVertices Job vertices involved in the checkpoints.
 * @param jobCheckpointingConfiguration Checkpointing configuration.
 * @param metricGroup Metric group for exposed metrics
 */
public CheckpointStatsTracker(
	int numRememberedCheckpoints,
	List<ExecutionJobVertex> jobVertices,
	CheckpointCoordinatorConfiguration jobCheckpointingConfiguration,
	MetricGroup metricGroup) {

	checkArgument(numRememberedCheckpoints >= 0, "Negative number of remembered checkpoints");
	this.history = new CheckpointStatsHistory(numRememberedCheckpoints);
	this.jobVertices = checkNotNull(jobVertices, "JobVertices");
	this.jobCheckpointingConfiguration = checkNotNull(jobCheckpointingConfiguration);

	// Compute the total subtask count. We do this here in order to only
	// do it once.
	int count = 0;
	for (ExecutionJobVertex vertex : jobVertices) {
		count += vertex.getParallelism();
	}
	this.totalSubtaskCount = count;

	// Latest snapshot is empty
	latestSnapshot = new CheckpointStatsSnapshot(
		counts.createSnapshot(),
		summary.createSnapshot(),
		history.createSnapshot(),
		null);

	// Register the metrics
	registerMetrics(metricGroup);
}
 
Example #23
Source File: CheckpointCoordinatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that no minimum delay between savepoints is enforced.
 */
@Test
public void testMinDelayBetweenSavepoints() throws Exception {
	JobID jobId = new JobID();

	final ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
	ExecutionVertex vertex1 = mockExecutionVertex(attemptID1);

	CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration(
		100000,
		200000,
		100000000L, // very long min delay => should not affect savepoints
		1,
		CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
		true,
		false,
		0);
	CheckpointCoordinator coord = new CheckpointCoordinator(
		jobId,
		chkConfig,
		new ExecutionVertex[] { vertex1 },
		new ExecutionVertex[] { vertex1 },
		new ExecutionVertex[] { vertex1 },
		new StandaloneCheckpointIDCounter(),
		new StandaloneCompletedCheckpointStore(2),
		new MemoryStateBackend(),
		Executors.directExecutor(),
		SharedStateRegistry.DEFAULT_FACTORY,
		failureManager);

	String savepointDir = tmpFolder.newFolder().getAbsolutePath();

	CompletableFuture<CompletedCheckpoint> savepoint0 = coord.triggerSavepoint(0, savepointDir);
	assertFalse("Did not trigger savepoint", savepoint0.isDone());

	CompletableFuture<CompletedCheckpoint> savepoint1 = coord.triggerSavepoint(1, savepointDir);
	assertFalse("Did not trigger savepoint", savepoint1.isDone());
}
 
Example #24
Source File: CheckpointCoordinator.java    From flink with Apache License 2.0 5 votes vote down vote up
public CheckpointCoordinator(
	JobID job,
	CheckpointCoordinatorConfiguration chkConfig,
	ExecutionVertex[] tasksToTrigger,
	ExecutionVertex[] tasksToWaitFor,
	ExecutionVertex[] tasksToCommitTo,
	Collection<OperatorCoordinatorCheckpointContext> coordinatorsToCheckpoint,
	CheckpointIDCounter checkpointIDCounter,
	CompletedCheckpointStore completedCheckpointStore,
	StateBackend checkpointStateBackend,
	Executor executor,
	ScheduledExecutor timer,
	SharedStateRegistryFactory sharedStateRegistryFactory,
	CheckpointFailureManager failureManager) {

	this(
		job,
		chkConfig,
		tasksToTrigger,
		tasksToWaitFor,
		tasksToCommitTo,
		coordinatorsToCheckpoint,
		checkpointIDCounter,
		completedCheckpointStore,
		checkpointStateBackend,
		executor,
		timer,
		sharedStateRegistryFactory,
		failureManager,
		SystemClock.getInstance());
}
 
Example #25
Source File: CheckpointCoordinatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private CheckpointCoordinator getCheckpointCoordinator(
		final JobID jobId,
		final ExecutionVertex vertex1,
		final ExecutionVertex vertex2,
		final CheckpointFailureManager failureManager) {

	final CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration(
			600000,
			600000,
			0,
			Integer.MAX_VALUE,
			CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
			true,
			false,
			0);

	return new CheckpointCoordinator(
			jobId,
			chkConfig,
			new ExecutionVertex[]{vertex1, vertex2},
			new ExecutionVertex[]{vertex1, vertex2},
			new ExecutionVertex[]{vertex1, vertex2},
			new StandaloneCheckpointIDCounter(),
			new StandaloneCompletedCheckpointStore(1),
			new MemoryStateBackend(),
			Executors.directExecutor(),
			SharedStateRegistry.DEFAULT_FACTORY,
			failureManager);
}
 
Example #26
Source File: CheckpointStatsTrackerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests access to the snapshotting settings.
 */
@Test
public void testGetSnapshottingSettings() throws Exception {
	ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
	when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
	when(jobVertex.getParallelism()).thenReturn(1);

	JobCheckpointingSettings snapshottingSettings = new JobCheckpointingSettings(
		Collections.singletonList(new JobVertexID()),
		Collections.singletonList(new JobVertexID()),
		Collections.singletonList(new JobVertexID()),
		new CheckpointCoordinatorConfiguration(
			181238123L,
			19191992L,
			191929L,
			123,
			CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
			false,
			false,
			0
		),
		null);

	CheckpointStatsTracker tracker = new CheckpointStatsTracker(
		0,
		Collections.singletonList(jobVertex),
		snapshottingSettings.getCheckpointCoordinatorConfiguration(),
		new UnregisteredMetricsGroup());

	assertEquals(snapshottingSettings.getCheckpointCoordinatorConfiguration(), tracker.getJobCheckpointingConfiguration());
}
 
Example #27
Source File: AdaptedRestartPipelinedRegionStrategyNGAbortPendingCheckpointsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void enableCheckpointing(final ExecutionGraph executionGraph) {
	final List<ExecutionJobVertex> jobVertices = new ArrayList<>(executionGraph.getAllVertices().values());
	final CheckpointCoordinatorConfiguration checkpointCoordinatorConfiguration = new CheckpointCoordinatorConfiguration(
		Long.MAX_VALUE,
		Long.MAX_VALUE,
		0,
		1,
		CheckpointRetentionPolicy.RETAIN_ON_CANCELLATION,
		true,
		false,
		0);

	executionGraph.enableCheckpointing(
		checkpointCoordinatorConfiguration,
		jobVertices,
		jobVertices,
		jobVertices,
		Collections.emptyList(),
		new StandaloneCheckpointIDCounter(),
		new StandaloneCompletedCheckpointStore(1),
		new MemoryStateBackend(),
		new CheckpointStatsTracker(
			0,
			jobVertices,
			checkpointCoordinatorConfiguration,
			new UnregisteredMetricsGroup()));
}
 
Example #28
Source File: CheckpointStatsTrackerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a "disabled" checkpoint tracker for tests.
 */
static CheckpointStatsTracker createTestTracker() {
	ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
	when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
	when(jobVertex.getParallelism()).thenReturn(1);

	return new CheckpointStatsTracker(
		0,
		Collections.singletonList(jobVertex),
		mock(CheckpointCoordinatorConfiguration.class),
		new UnregisteredMetricsGroup());
}
 
Example #29
Source File: ExecutionGraphCheckpointCoordinatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private ExecutionGraph createExecutionGraphAndEnableCheckpointing(
		CheckpointIDCounter counter,
		CompletedCheckpointStore store) throws Exception {
	final Time timeout = Time.days(1L);

	JobVertex jobVertex = new JobVertex("MockVertex");
	jobVertex.setInvokableClass(AbstractInvokable.class);

	final ExecutionGraph executionGraph = new ExecutionGraphTestUtils.TestingExecutionGraphBuilder(jobVertex)
		.setRpcTimeout(timeout)
		.setAllocationTimeout(timeout)
		.allowQueuedScheduling()
		.build();

	executionGraph.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());

	CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration(
		100,
		100,
		100,
		1,
		CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
		true,
		false,
		0);

	executionGraph.enableCheckpointing(
			chkConfig,
			Collections.emptyList(),
			Collections.emptyList(),
			Collections.emptyList(),
			Collections.emptyList(),
			counter,
			store,
			new MemoryStateBackend(),
			CheckpointStatsTrackerTest.createTestTracker());

	return executionGraph;
}
 
Example #30
Source File: ExecutionGraphDeploymentTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private ExecutionGraph createExecutionGraph(Configuration configuration) throws Exception {
	final ScheduledExecutorService executor = TestingUtils.defaultExecutor();

	final JobID jobId = new JobID();
	final JobGraph jobGraph = new JobGraph(jobId, "test");
	jobGraph.setSnapshotSettings(
		new JobCheckpointingSettings(
			Collections.<JobVertexID>emptyList(),
			Collections.<JobVertexID>emptyList(),
			Collections.<JobVertexID>emptyList(),
			new CheckpointCoordinatorConfiguration(
				100,
				10 * 60 * 1000,
				0,
				1,
				CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
				false,
				false,
				0),
			null));

	final Time timeout = Time.seconds(10L);
	return ExecutionGraphBuilder.buildGraph(
		null,
		jobGraph,
		configuration,
		executor,
		executor,
		new ProgrammedSlotProvider(1),
		getClass().getClassLoader(),
		new StandaloneCheckpointRecoveryFactory(),
		timeout,
		new NoRestartStrategy(),
		new UnregisteredMetricsGroup(),
		blobWriter,
		timeout,
		LoggerFactory.getLogger(getClass()),
		NettyShuffleMaster.INSTANCE,
		NoOpPartitionTracker.INSTANCE);
}