org.apache.flink.runtime.checkpoint.CompletedCheckpoint Java Examples

The following examples show how to use org.apache.flink.runtime.checkpoint.CompletedCheckpoint. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OperatorCoordinatorSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private long takeCompleteCheckpoint(
		DefaultScheduler scheduler,
		TestingOperatorCoordinator testingOperatorCoordinator,
		byte[] coordinatorState) throws Exception {

	final CompletableFuture<CompletedCheckpoint> checkpointFuture = triggerCheckpoint(scheduler);

	testingOperatorCoordinator.getLastTriggeredCheckpoint().complete(coordinatorState);
	acknowledgeCurrentCheckpoint(scheduler);

	// wait until checkpoint has completed
	final long checkpointId = checkpointFuture.get().getCheckpointID();

	// now wait until it has been acknowledged
	while (!testingOperatorCoordinator.hasCompleteCheckpoint()) {
		executor.triggerAll();
		Thread.sleep(1);
	}

	return checkpointId;
}
 
Example #2
Source File: ZooKeeperUtils.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link ZooKeeperCompletedCheckpointStore} instance.
 *
 * @param client                         The {@link CuratorFramework} ZooKeeper client to use
 * @param configuration                  {@link Configuration} object
 * @param jobId                          ID of job to create the instance for
 * @param maxNumberOfCheckpointsToRetain The maximum number of checkpoints to retain
 * @param executor to run ZooKeeper callbacks
 * @return {@link ZooKeeperCompletedCheckpointStore} instance
 * @throws Exception if the completed checkpoint store cannot be created
 */
public static CompletedCheckpointStore createCompletedCheckpoints(
		CuratorFramework client,
		Configuration configuration,
		JobID jobId,
		int maxNumberOfCheckpointsToRetain,
		Executor executor) throws Exception {

	checkNotNull(configuration, "Configuration");

	String checkpointsPath = configuration.getString(
		HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINTS_PATH);

	RetrievableStateStorageHelper<CompletedCheckpoint> stateStorage = createFileSystemStateStorage(
		configuration,
		"completedCheckpoint");

	checkpointsPath += ZooKeeperSubmittedJobGraphStore.getPathForJob(jobId);

	final ZooKeeperCompletedCheckpointStore zooKeeperCompletedCheckpointStore = new ZooKeeperCompletedCheckpointStore(
		maxNumberOfCheckpointsToRetain,
		createZooKeeperStateHandleStore(client, checkpointsPath, stateStorage),
		executor);

	LOG.info("Initialized {} in '{}'.", ZooKeeperCompletedCheckpointStore.class.getSimpleName(), checkpointsPath);
	return zooKeeperCompletedCheckpointStore;
}
 
Example #3
Source File: RecoverableCompletedCheckpointStore.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		checkpoints.clear();
		suspended.clear();
	} else {
		suspended.clear();

		for (CompletedCheckpoint checkpoint : checkpoints) {
			suspended.add(checkpoint);
		}

		checkpoints.clear();
	}
}
 
Example #4
Source File: RecoverableCompletedCheckpointStore.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception {

	checkpoints.addLast(checkpoint);

	if (checkpoints.size() > maxRetainedCheckpoints) {
		removeOldestCheckpoint();
	}
}
 
Example #5
Source File: SchedulerTestingUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
public static CompletedCheckpoint takeCheckpoint(DefaultScheduler scheduler) throws Exception {
	final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);
	checkpointCoordinator.triggerCheckpoint(false);

	assertEquals("test setup inconsistent", 1, checkpointCoordinator.getNumberOfPendingCheckpoints());
	final PendingCheckpoint checkpoint = checkpointCoordinator.getPendingCheckpoints().values().iterator().next();
	final CompletableFuture<CompletedCheckpoint> future = checkpoint.getCompletionFuture();

	acknowledgePendingCheckpoint(scheduler, checkpoint.getCheckpointId());

	CompletedCheckpoint completed = future.getNow(null);
	assertNotNull("checkpoint not complete", completed);
	return completed;
}
 
Example #6
Source File: OperatorCoordinatorSchedulerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private CompletableFuture<CompletedCheckpoint> triggerCheckpoint(DefaultScheduler scheduler) throws Exception {
	final CompletableFuture<CompletedCheckpoint> future = SchedulerTestingUtils.triggerCheckpoint(scheduler);
	final TestingOperatorCoordinator coordinator = getCoordinator(scheduler);

	// the Checkpoint Coordinator executes parts of the logic in its timer thread, and delegates some calls
	// to the scheduler executor. so we need to do a mix of waiting for the timer thread and working off
	// tasks in the scheduler executor.
	// we can drop this here once the CheckpointCoordinator also runs in a 'main thread executor'.
	while (!(coordinator.hasTriggeredCheckpoint() || future.isDone())) {
		executor.triggerAll();
		Thread.sleep(1);
	}

	return future;
}
 
Example #7
Source File: OperatorCoordinatorSchedulerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTakeCheckpoint() throws Exception {
	final byte[] checkpointData = new byte[656];
	new Random().nextBytes(checkpointData);

	final DefaultScheduler scheduler = createSchedulerAndDeployTasks();
	final TestingOperatorCoordinator coordinator = getCoordinator(scheduler);

	final CompletableFuture<CompletedCheckpoint> checkpointFuture = triggerCheckpoint(scheduler);
	coordinator.getLastTriggeredCheckpoint().complete(checkpointData);
	acknowledgeCurrentCheckpoint(scheduler);

	final OperatorState state = checkpointFuture.get().getOperatorStates().get(testOperatorId);
	assertArrayEquals(checkpointData, getStateHandleContents(state.getCoordinatorState()));
}
 
Example #8
Source File: JobMasterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that a JobMaster will restore the given JobGraph from its savepoint upon
 * initial submission.
 */
@Test
public void testRestoringFromSavepoint() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false);

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #9
Source File: ZooKeeperUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link ZooKeeperCompletedCheckpointStore} instance.
 *
 * @param client                         The {@link CuratorFramework} ZooKeeper client to use
 * @param configuration                  {@link Configuration} object
 * @param jobId                          ID of job to create the instance for
 * @param maxNumberOfCheckpointsToRetain The maximum number of checkpoints to retain
 * @param executor to run ZooKeeper callbacks
 * @return {@link ZooKeeperCompletedCheckpointStore} instance
 * @throws Exception if the completed checkpoint store cannot be created
 */
public static CompletedCheckpointStore createCompletedCheckpoints(
		CuratorFramework client,
		Configuration configuration,
		JobID jobId,
		int maxNumberOfCheckpointsToRetain,
		Executor executor) throws Exception {

	checkNotNull(configuration, "Configuration");

	String checkpointsPath = configuration.getString(
		HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINTS_PATH);

	RetrievableStateStorageHelper<CompletedCheckpoint> stateStorage = createFileSystemStateStorage(
		configuration,
		HA_STORAGE_COMPLETED_CHECKPOINT);

	checkpointsPath += ZooKeeperJobGraphStore.getPathForJob(jobId);

	final ZooKeeperCompletedCheckpointStore zooKeeperCompletedCheckpointStore = new ZooKeeperCompletedCheckpointStore(
		maxNumberOfCheckpointsToRetain,
		createZooKeeperStateHandleStore(client, checkpointsPath, stateStorage),
		executor);

	LOG.info("Initialized {} in '{}'.", ZooKeeperCompletedCheckpointStore.class.getSimpleName(), checkpointsPath);
	return zooKeeperCompletedCheckpointStore;
}
 
Example #10
Source File: SchedulerBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<String> triggerSavepoint(final String targetDirectory, final boolean cancelJob) {
	mainThreadExecutor.assertRunningInMainThread();

	final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
	if (checkpointCoordinator == null) {
		throw new IllegalStateException(
			String.format("Job %s is not a streaming job.", jobGraph.getJobID()));
	} else if (targetDirectory == null && !checkpointCoordinator.getCheckpointStorage().hasDefaultSavepointLocation()) {
		log.info("Trying to cancel job {} with savepoint, but no savepoint directory configured.", jobGraph.getJobID());

		throw new IllegalStateException(
			"No savepoint directory configured. You can either specify a directory " +
				"while cancelling via -s :targetDirectory or configure a cluster-wide " +
				"default via key '" + CheckpointingOptions.SAVEPOINT_DIRECTORY.key() + "'.");
	}

	log.info("Triggering {}savepoint for job {}.", cancelJob ? "cancel-with-" : "", jobGraph.getJobID());

	if (cancelJob) {
		checkpointCoordinator.stopCheckpointScheduler();
	}

	return checkpointCoordinator
		.triggerSavepoint(targetDirectory)
		.thenApply(CompletedCheckpoint::getExternalPointer)
		.handleAsync((path, throwable) -> {
			if (throwable != null) {
				if (cancelJob) {
					startCheckpointScheduler(checkpointCoordinator);
				}
				throw new CompletionException(throwable);
			} else if (cancelJob) {
				log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID());
				cancel();
			}
			return path;
		}, mainThreadExecutor);
}
 
Example #11
Source File: RegionFailoverITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception {
	super.addCheckpoint(checkpoint);
	// we record the information when adding completed checkpoint instead of 'notifyCheckpointComplete' invoked
	// on task side to avoid race condition. See FLINK-13601.
	lastCompletedCheckpointId.set(checkpoint.getCheckpointID());
	numCompletedCheckpoints.incrementAndGet();
}
 
Example #12
Source File: NotifyCheckpointAbortedITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception {
	if (abortCheckpointLatch.isTriggered()) {
		super.addCheckpoint(checkpoint);
	} else {
		// tell main thread that all checkpoints on task side have been finished.
		addCheckpointLatch.trigger();
		// wait for the main thread to throw exception so that the checkpoint would be notified as aborted.
		abortCheckpointLatch.await();
		throw new ExpectedTestException();
	}
}
 
Example #13
Source File: RecoverableCompletedCheckpointStore.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		checkpoints.clear();
		suspended.clear();
	} else {
		suspended.clear();

		for (CompletedCheckpoint checkpoint : checkpoints) {
			suspended.add(checkpoint);
		}

		checkpoints.clear();
	}
}
 
Example #14
Source File: RecoverableCompletedCheckpointStore.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception {

	checkpoints.addLast(checkpoint);

	if (checkpoints.size() > maxRetainedCheckpoints) {
		removeOldestCheckpoint();
	}
}
 
Example #15
Source File: JobMasterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that a JobMaster will restore the given JobGraph from its savepoint upon
 * initial submission.
 */
@Test
public void testRestoringFromSavepoint() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false);

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #16
Source File: JobMaster.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<String> triggerSavepoint(
		@Nullable final String targetDirectory,
		final boolean cancelJob,
		final Time timeout) {

	final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
	if (checkpointCoordinator == null) {
		return FutureUtils.completedExceptionally(new IllegalStateException(
			String.format("Job %s is not a streaming job.", jobGraph.getJobID())));
	} else if (targetDirectory == null && !checkpointCoordinator.getCheckpointStorage().hasDefaultSavepointLocation()) {
		log.info("Trying to cancel job {} with savepoint, but no savepoint directory configured.", jobGraph.getJobID());

		return FutureUtils.completedExceptionally(new IllegalStateException(
			"No savepoint directory configured. You can either specify a directory " +
				"while cancelling via -s :targetDirectory or configure a cluster-wide " +
				"default via key '" + CheckpointingOptions.SAVEPOINT_DIRECTORY.key() + "'."));
	}

	if (cancelJob) {
		checkpointCoordinator.stopCheckpointScheduler();
	}
	return checkpointCoordinator
		.triggerSavepoint(System.currentTimeMillis(), targetDirectory)
		.thenApply(CompletedCheckpoint::getExternalPointer)
		.handleAsync((path, throwable) -> {
			if (throwable != null) {
				if (cancelJob) {
					startCheckpointScheduler(checkpointCoordinator);
				}
				throw new CompletionException(throwable);
			} else if (cancelJob) {
				log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID());
				cancel(timeout);
			}
			return path;
		}, getMainThreadExecutor());
}
 
Example #17
Source File: ZooKeeperUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link ZooKeeperCompletedCheckpointStore} instance.
 *
 * @param client                         The {@link CuratorFramework} ZooKeeper client to use
 * @param configuration                  {@link Configuration} object
 * @param jobId                          ID of job to create the instance for
 * @param maxNumberOfCheckpointsToRetain The maximum number of checkpoints to retain
 * @param executor to run ZooKeeper callbacks
 * @return {@link ZooKeeperCompletedCheckpointStore} instance
 * @throws Exception if the completed checkpoint store cannot be created
 */
public static CompletedCheckpointStore createCompletedCheckpoints(
		CuratorFramework client,
		Configuration configuration,
		JobID jobId,
		int maxNumberOfCheckpointsToRetain,
		Executor executor) throws Exception {

	checkNotNull(configuration, "Configuration");

	String checkpointsPath = configuration.getString(
		HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINTS_PATH);

	RetrievableStateStorageHelper<CompletedCheckpoint> stateStorage = createFileSystemStateStorage(
		configuration,
		"completedCheckpoint");

	checkpointsPath += ZooKeeperSubmittedJobGraphStore.getPathForJob(jobId);

	final ZooKeeperCompletedCheckpointStore zooKeeperCompletedCheckpointStore = new ZooKeeperCompletedCheckpointStore(
		maxNumberOfCheckpointsToRetain,
		createZooKeeperStateHandleStore(client, checkpointsPath, stateStorage),
		executor);

	LOG.info("Initialized {} in '{}'.", ZooKeeperCompletedCheckpointStore.class.getSimpleName(), checkpointsPath);
	return zooKeeperCompletedCheckpointStore;
}
 
Example #18
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that a JobMaster will restore the given JobGraph from its savepoint upon
 * initial submission.
 */
@Test
public void testRestoringFromSavepoint() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #19
Source File: LegacyScheduler.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<String> triggerSavepoint(final String targetDirectory, final boolean cancelJob) {
	mainThreadExecutor.assertRunningInMainThread();

	final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
	if (checkpointCoordinator == null) {
		throw new IllegalStateException(
			String.format("Job %s is not a streaming job.", jobGraph.getJobID()));
	} else if (targetDirectory == null && !checkpointCoordinator.getCheckpointStorage().hasDefaultSavepointLocation()) {
		log.info("Trying to cancel job {} with savepoint, but no savepoint directory configured.", jobGraph.getJobID());

		throw new IllegalStateException(
			"No savepoint directory configured. You can either specify a directory " +
				"while cancelling via -s :targetDirectory or configure a cluster-wide " +
				"default via key '" + CheckpointingOptions.SAVEPOINT_DIRECTORY.key() + "'.");
	}

	if (cancelJob) {
		checkpointCoordinator.stopCheckpointScheduler();
	}

	return checkpointCoordinator
		.triggerSavepoint(System.currentTimeMillis(), targetDirectory)
		.thenApply(CompletedCheckpoint::getExternalPointer)
		.handleAsync((path, throwable) -> {
			if (throwable != null) {
				if (cancelJob) {
					startCheckpointScheduler(checkpointCoordinator);
				}
				throw new CompletionException(throwable);
			} else if (cancelJob) {
				log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID());
				cancel();
			}
			return path;
		}, mainThreadExecutor);
}
 
Example #20
Source File: RecoverableCompletedCheckpointStore.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		checkpoints.clear();
		suspended.clear();
	} else {
		suspended.clear();

		for (CompletedCheckpoint checkpoint : checkpoints) {
			suspended.add(checkpoint);
		}

		checkpoints.clear();
	}
}
 
Example #21
Source File: RecoverableCompletedCheckpointStore.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception {

	checkpoints.addLast(checkpoint);

	if (checkpoints.size() > maxRetainedCheckpoints) {
		removeOldestCheckpoint();
	}
}
 
Example #22
Source File: RecoverableCompletedCheckpointStore.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public CompletedCheckpoint getLatestCheckpoint() throws Exception {
	return checkpoints.isEmpty() ? null : checkpoints.getLast();
}
 
Example #23
Source File: RecoverableCompletedCheckpointStore.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public List<CompletedCheckpoint> getAllCheckpoints() throws Exception {
	return new ArrayList<>(checkpoints);
}
 
Example #24
Source File: RecoverableCompletedCheckpointStore.java    From flink with Apache License 2.0 4 votes vote down vote up
public void removeOldestCheckpoint() throws Exception {
	CompletedCheckpoint checkpointToSubsume = checkpoints.removeFirst();
	checkpointToSubsume.discardOnSubsume();
}
 
Example #25
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a JobMaster will only restore a modified JobGraph if non
 * restored state is allowed.
 */
@Test
public void testRestoringModifiedJobFromSavepoint() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final OperatorID operatorID = new OperatorID();
	final File savepointFile = createSavepointWithOperatorState(savepointId, operatorID);

	// set savepoint settings which don't allow non restored state
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		false);

	// create a new operator
	final JobVertex jobVertex = new JobVertex("New operator");
	jobVertex.setInvokableClass(NoOpInvokable.class);
	final JobGraph jobGraphWithNewOperator = createJobGraphFromJobVerticesWithCheckpointing(savepointRestoreSettings, jobVertex);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);

	try {
		createJobMaster(
			configuration,
			jobGraphWithNewOperator,
			haServices,
			new TestingJobManagerSharedServicesBuilder().build());
		fail("Should fail because we cannot resume the changed JobGraph from the savepoint.");
	} catch (IllegalStateException expected) {
		// that was expected :-)
	}

	// allow for non restored state
	jobGraphWithNewOperator.setSavepointRestoreSettings(
		SavepointRestoreSettings.forPath(
			savepointFile.getAbsolutePath(),
			true));

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraphWithNewOperator,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #26
Source File: SchedulerTestingUtils.java    From flink with Apache License 2.0 4 votes vote down vote up
public static CompletableFuture<CompletedCheckpoint> triggerCheckpoint(DefaultScheduler scheduler) throws Exception {
	final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);
	return checkpointCoordinator.triggerCheckpoint(false);
}
 
Example #27
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that an existing checkpoint will have precedence over an savepoint.
 */
@Test
public void testCheckpointPrecedesSavepointRecovery() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath("" +
			savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final long checkpointId = 1L;

	final CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(
		jobGraph.getJobID(),
		checkpointId,
		1L,
		1L,
		Collections.emptyMap(),
		null,
		CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION),
		new DummyCheckpointStorageLocation());

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	completedCheckpointStore.addCheckpoint(completedCheckpoint);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(checkpointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #28
Source File: RecoverableCompletedCheckpointStore.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public void removeOldestCheckpoint() throws Exception {
	CompletedCheckpoint checkpointToSubsume = checkpoints.removeFirst();
	checkpointToSubsume.discardOnSubsume();
}
 
Example #29
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that an existing checkpoint will have precedence over an savepoint.
 */
@Test
public void testCheckpointPrecedesSavepointRecovery() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath("" +
			savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final long checkpointId = 1L;

	final CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(
		jobGraph.getJobID(),
		checkpointId,
		1L,
		1L,
		Collections.emptyMap(),
		null,
		CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION),
		new DummyCheckpointStorageLocation());

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	completedCheckpointStore.addCheckpoint(completedCheckpoint);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false);

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(checkpointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #30
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a JobMaster will only restore a modified JobGraph if non
 * restored state is allowed.
 */
@Test
public void testRestoringModifiedJobFromSavepoint() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final OperatorID operatorID = new OperatorID();
	final File savepointFile = createSavepointWithOperatorState(savepointId, operatorID);

	// set savepoint settings which don't allow non restored state
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		false);

	// create a new operator
	final JobVertex jobVertex = new JobVertex("New operator");
	jobVertex.setInvokableClass(NoOpInvokable.class);
	final JobGraph jobGraphWithNewOperator = createJobGraphFromJobVerticesWithCheckpointing(savepointRestoreSettings, jobVertex);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);

	try {
		createJobMaster(
			configuration,
			jobGraphWithNewOperator,
			haServices,
			new TestingJobManagerSharedServicesBuilder().build());
		fail("Should fail because we cannot resume the changed JobGraph from the savepoint.");
	} catch (IllegalStateException expected) {
		// that was expected :-)
	}

	// allow for non restored state
	jobGraphWithNewOperator.setSavepointRestoreSettings(
		SavepointRestoreSettings.forPath(
			savepointFile.getAbsolutePath(),
			true));

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraphWithNewOperator,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false);

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}