Java Code Examples for org.apache.flink.client.program.ClusterClient#triggerSavepoint()

The following examples show how to use org.apache.flink.client.program.ClusterClient#triggerSavepoint() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CliFrontend.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Sends a {@link org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint}
 * message to the job manager.
 */
private String triggerSavepoint(ClusterClient<?> clusterClient, JobID jobId, String savepointDirectory) throws FlinkException {
	logAndSysout("Triggering savepoint for job " + jobId + '.');
	CompletableFuture<String> savepointPathFuture = clusterClient.triggerSavepoint(jobId, savepointDirectory);

	logAndSysout("Waiting for response...");

	final String savepointPath;

	try {
		savepointPath = savepointPathFuture.get();
	}
	catch (Exception e) {
		Throwable cause = ExceptionUtils.stripExecutionException(e);
		throw new FlinkException("Triggering a savepoint for the job " + jobId + " failed.", cause);
	}

	logAndSysout("Savepoint completed. Path: " + savepointPath);
	logAndSysout("You can resume your program from this savepoint with the run command.");

	return savepointPath;
}
 
Example 2
Source File: CliFrontend.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Sends a SavepointTriggerMessage to the job manager.
 */
private String triggerSavepoint(ClusterClient<?> clusterClient, JobID jobId, String savepointDirectory) throws FlinkException {
	logAndSysout("Triggering savepoint for job " + jobId + '.');
	CompletableFuture<String> savepointPathFuture = clusterClient.triggerSavepoint(jobId, savepointDirectory);

	logAndSysout("Waiting for response...");

	final String savepointPath;

	try {
		savepointPath = savepointPathFuture.get();
	}
	catch (Exception e) {
		Throwable cause = ExceptionUtils.stripExecutionException(e);
		throw new FlinkException("Triggering a savepoint for the job " + jobId + " failed.", cause);
	}

	logAndSysout("Savepoint completed. Path: " + savepointPath);
	logAndSysout("You can resume your program from this savepoint with the run command.");

	return savepointPath;
}
 
Example 3
Source File: CliFrontend.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Sends a SavepointTriggerMessage to the job manager.
 */
private void triggerSavepoint(ClusterClient<?> clusterClient, JobID jobId, String savepointDirectory) throws FlinkException {
	logAndSysout("Triggering savepoint for job " + jobId + '.');

	CompletableFuture<String> savepointPathFuture = clusterClient.triggerSavepoint(jobId, savepointDirectory);

	logAndSysout("Waiting for response...");

	try {
		final String savepointPath = savepointPathFuture.get(clientTimeout.toMillis(), TimeUnit.MILLISECONDS);

		logAndSysout("Savepoint completed. Path: " + savepointPath);
		logAndSysout("You can resume your program from this savepoint with the run command.");
	} catch (Exception e) {
		Throwable cause = ExceptionUtils.stripExecutionException(e);
		throw new FlinkException("Triggering a savepoint for the job " + jobId + " failed.", cause);
	}
}
 
Example 4
Source File: SavepointReaderKeyedStateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private String takeSavepoint(JobGraph jobGraph) throws Exception {
	SavepointSource.initializeForTest();

	ClusterClient<?> client = miniClusterResource.getClusterClient();
	client.setDetached(true);

	JobID jobId = jobGraph.getJobID();

	Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5));

	String dirPath = getTempDirPath(new AbstractID().toHexString());

	try {
		client.setDetached(true);
		JobSubmissionResult result = client.submitJob(jobGraph, getClass().getClassLoader());

		boolean finished = false;
		while (deadline.hasTimeLeft()) {
			if (SavepointSource.isFinished()) {
				finished = true;

				break;
			}
		}

		if (!finished) {
			Assert.fail("Failed to initialize state within deadline");
		}

		CompletableFuture<String> path = client.triggerSavepoint(result.getJobID(), dirPath);
		return path.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
	} finally {
		client.cancel(jobId);
	}
}
 
Example 5
Source File: AbstractFlinkClient.java    From alchemy with Apache License 2.0 5 votes vote down vote up
public SavepointResponse savepoint(ClusterClient clusterClient, SavepointFlinkRequest request) throws Exception {
    if (StringUtils.isEmpty(request.getJobID())) {
        return new SavepointResponse("the job is not submit yet");
    }
    CompletableFuture<String> future
        = clusterClient.triggerSavepoint(JobID.fromHexString(request.getJobID()), request.getSavepointDirectory());
    return new SavepointResponse(true, future.get());
}
 
Example 6
Source File: SavepointReaderITTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
private String takeSavepoint(JobGraph jobGraph) throws Exception {
	SavepointSource.initializeForTest();

	ClusterClient<?> client = miniClusterResource.getClusterClient();
	JobID jobId = jobGraph.getJobID();

	Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5));

	String dirPath = getTempDirPath(new AbstractID().toHexString());

	try {
		JobSubmissionResult result = ClientUtils.submitJob(client, jobGraph);

		boolean finished = false;
		while (deadline.hasTimeLeft()) {
			if (SavepointSource.isFinished()) {
				finished = true;

				break;
			}

			try {
				Thread.sleep(2L);
			} catch (InterruptedException ignored) {
				Thread.currentThread().interrupt();
			}
		}

		if (!finished) {
			Assert.fail("Failed to initialize state within deadline");
		}

		CompletableFuture<String> path = client.triggerSavepoint(result.getJobID(), dirPath);
		return path.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
	} finally {
		client.cancel(jobId).get();
	}
}
 
Example 7
Source File: SavepointTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
private CompletableFuture<String> triggerSavepoint(ClusterClient<?> client, JobID jobID) throws RuntimeException {
	try {
		String dirPath = getTempDirPath(new AbstractID().toHexString());
		return client.triggerSavepoint(jobID, dirPath);
	} catch (IOException e) {
		throw new RuntimeException(e);
	}
}
 
Example 8
Source File: RescalingITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a job cannot be restarted from a savepoint with a different parallelism if the
 * rescaled operator has non-partitioned state.
 *
 * @throws Exception
 */
@Test
public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception {
	final int parallelism = numSlots / 2;
	final int parallelism2 = numSlots;
	final int maxParallelism = 13;

	Duration timeout = Duration.ofMinutes(3);
	Deadline deadline = Deadline.now().plus(timeout);

	ClusterClient<?> client = cluster.getClusterClient();

	try {
		JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);

		final JobID jobID = jobGraph.getJobID();

		client.setDetached(true);
		client.submitJob(jobGraph, RescalingITCase.class.getClassLoader());

		// wait until the operator is started
		StateSourceBase.workStartedLatch.await();

		CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobID, null);

		final String savepointPath = savepointPathFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);

		client.cancel(jobID);

		while (!getRunningJobs(client).isEmpty()) {
			Thread.sleep(50);
		}

		// job successfully removed
		JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);

		scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));

		client.setDetached(false);
		client.submitJob(scaledJobGraph, RescalingITCase.class.getClassLoader());
	} catch (JobExecutionException exception) {
		if (exception.getCause() instanceof IllegalStateException) {
			// we expect a IllegalStateException wrapped
			// in a JobExecutionException, because the job containing non-partitioned state
			// is being rescaled
		} else {
			throw exception;
		}
	}
}
 
Example 9
Source File: SavepointMigrationTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@SafeVarargs
protected final void executeAndSavepoint(
		StreamExecutionEnvironment env,
		String savepointPath,
		Tuple2<String, Integer>... expectedAccumulators) throws Exception {

	ClusterClient<?> client = miniClusterResource.getClusterClient();
	client.setDetached(true);

	// Submit the job
	JobGraph jobGraph = env.getStreamGraph().getJobGraph();

	JobSubmissionResult jobSubmissionResult = client.submitJob(jobGraph, SavepointMigrationTestBase.class.getClassLoader());

	LOG.info("Submitted job {} and waiting...", jobSubmissionResult.getJobID());

	boolean done = false;
	while (DEADLINE.hasTimeLeft()) {
		Thread.sleep(100);
		Map<String, OptionalFailure<Object>> accumulators = client.getAccumulators(jobSubmissionResult.getJobID());

		boolean allDone = true;
		for (Tuple2<String, Integer> acc : expectedAccumulators) {
			OptionalFailure<Object> accumOpt = accumulators.get(acc.f0);
			if (accumOpt == null) {
				allDone = false;
				break;
			}

			Integer numFinished = (Integer) accumOpt.get();
			if (numFinished == null) {
				allDone = false;
				break;
			}
			if (!numFinished.equals(acc.f1)) {
				allDone = false;
				break;
			}
		}
		if (allDone) {
			done = true;
			break;
		}
	}

	if (!done) {
		fail("Did not see the expected accumulator results within time limit.");
	}

	LOG.info("Triggering savepoint.");

	CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobSubmissionResult.getJobID(), null);

	String jobmanagerSavepointPath = savepointPathFuture.get(DEADLINE.timeLeft().toMillis(), TimeUnit.MILLISECONDS);

	File jobManagerSavepoint = new File(new URI(jobmanagerSavepointPath).getPath());
	// savepoints were changed to be directories in Flink 1.3
	if (jobManagerSavepoint.isDirectory()) {
		FileUtils.moveDirectory(jobManagerSavepoint, new File(savepointPath));
	} else {
		FileUtils.moveFile(jobManagerSavepoint, new File(savepointPath));
	}
}
 
Example 10
Source File: RescalingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a job cannot be restarted from a savepoint with a different parallelism if the
 * rescaled operator has non-partitioned state.
 *
 * @throws Exception
 */
@Test
public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception {
	final int parallelism = numSlots / 2;
	final int parallelism2 = numSlots;
	final int maxParallelism = 13;

	Duration timeout = Duration.ofMinutes(3);
	Deadline deadline = Deadline.now().plus(timeout);

	ClusterClient<?> client = cluster.getClusterClient();

	try {
		JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);

		final JobID jobID = jobGraph.getJobID();

		client.setDetached(true);
		client.submitJob(jobGraph, RescalingITCase.class.getClassLoader());

		// wait until the operator is started
		StateSourceBase.workStartedLatch.await();

		CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobID, null);

		final String savepointPath = savepointPathFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);

		client.cancel(jobID);

		while (!getRunningJobs(client).isEmpty()) {
			Thread.sleep(50);
		}

		// job successfully removed
		JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);

		scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));

		client.setDetached(false);
		client.submitJob(scaledJobGraph, RescalingITCase.class.getClassLoader());
	} catch (JobExecutionException exception) {
		if (exception.getCause() instanceof IllegalStateException) {
			// we expect a IllegalStateException wrapped
			// in a JobExecutionException, because the job containing non-partitioned state
			// is being rescaled
		} else {
			throw exception;
		}
	}
}
 
Example 11
Source File: SavepointMigrationTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
@SafeVarargs
protected final void executeAndSavepoint(
		StreamExecutionEnvironment env,
		String savepointPath,
		Tuple2<String, Integer>... expectedAccumulators) throws Exception {

	ClusterClient<?> client = miniClusterResource.getClusterClient();
	client.setDetached(true);

	// Submit the job
	JobGraph jobGraph = env.getStreamGraph().getJobGraph();

	JobSubmissionResult jobSubmissionResult = client.submitJob(jobGraph, SavepointMigrationTestBase.class.getClassLoader());

	LOG.info("Submitted job {} and waiting...", jobSubmissionResult.getJobID());

	boolean done = false;
	while (DEADLINE.hasTimeLeft()) {
		Thread.sleep(100);
		Map<String, OptionalFailure<Object>> accumulators = client.getAccumulators(jobSubmissionResult.getJobID());

		boolean allDone = true;
		for (Tuple2<String, Integer> acc : expectedAccumulators) {
			OptionalFailure<Object> accumOpt = accumulators.get(acc.f0);
			if (accumOpt == null) {
				allDone = false;
				break;
			}

			Integer numFinished = (Integer) accumOpt.get();
			if (numFinished == null) {
				allDone = false;
				break;
			}
			if (!numFinished.equals(acc.f1)) {
				allDone = false;
				break;
			}
		}
		if (allDone) {
			done = true;
			break;
		}
	}

	if (!done) {
		fail("Did not see the expected accumulator results within time limit.");
	}

	LOG.info("Triggering savepoint.");

	CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobSubmissionResult.getJobID(), null);

	String jobmanagerSavepointPath = savepointPathFuture.get(DEADLINE.timeLeft().toMillis(), TimeUnit.MILLISECONDS);

	File jobManagerSavepoint = new File(new URI(jobmanagerSavepointPath).getPath());
	// savepoints were changed to be directories in Flink 1.3
	if (jobManagerSavepoint.isDirectory()) {
		FileUtils.moveDirectory(jobManagerSavepoint, new File(savepointPath));
	} else {
		FileUtils.moveFile(jobManagerSavepoint, new File(savepointPath));
	}
}
 
Example 12
Source File: SavepointReaderITTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
private String takeSavepoint(JobGraph jobGraph) throws Exception {
	SavepointSource.initializeForTest();

	ClusterClient<?> client = miniClusterResource.getClusterClient();
	client.setDetached(true);

	JobID jobId = jobGraph.getJobID();

	Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5));

	String dirPath = getTempDirPath(new AbstractID().toHexString());

	try {
		client.setDetached(true);
		JobSubmissionResult result = client.submitJob(jobGraph, SavepointReaderITCase.class.getClassLoader());

		boolean finished = false;
		while (deadline.hasTimeLeft()) {
			if (SavepointSource.isFinished()) {
				finished = true;

				break;
			}

			try {
				Thread.sleep(2L);
			} catch (InterruptedException ignored) {
				Thread.currentThread().interrupt();
			}
		}

		if (!finished) {
			Assert.fail("Failed to initialize state within deadline");
		}

		CompletableFuture<String> path = client.triggerSavepoint(result.getJobID(), dirPath);
		return path.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
	} finally {
		client.cancel(jobId);
	}
}
 
Example 13
Source File: RescalingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a job cannot be restarted from a savepoint with a different parallelism if the
 * rescaled operator has non-partitioned state.
 *
 * @throws Exception
 */
@Test
public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception {
	final int parallelism = numSlots / 2;
	final int parallelism2 = numSlots;
	final int maxParallelism = 13;

	Duration timeout = Duration.ofMinutes(3);
	Deadline deadline = Deadline.now().plus(timeout);

	ClusterClient<?> client = cluster.getClusterClient();

	try {
		JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);

		final JobID jobID = jobGraph.getJobID();

		ClientUtils.submitJob(client, jobGraph);

		// wait until the operator is started
		StateSourceBase.workStartedLatch.await();

		CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobID, null);

		final String savepointPath = savepointPathFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);

		client.cancel(jobID).get();

		while (!getRunningJobs(client).isEmpty()) {
			Thread.sleep(50);
		}

		// job successfully removed
		JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);

		scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));

		ClientUtils.submitJobAndWaitForResult(client, scaledJobGraph, RescalingITCase.class.getClassLoader());
	} catch (JobExecutionException exception) {
		if (exception.getCause() instanceof IllegalStateException) {
			// we expect a IllegalStateException wrapped
			// in a JobExecutionException, because the job containing non-partitioned state
			// is being rescaled
		} else {
			throw exception;
		}
	}
}
 
Example 14
Source File: SavepointMigrationTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
@SafeVarargs
protected final void executeAndSavepoint(
		StreamExecutionEnvironment env,
		String savepointPath,
		Tuple2<String, Integer>... expectedAccumulators) throws Exception {

	final Deadline deadLine = Deadline.fromNow(Duration.ofMinutes(5));

	ClusterClient<?> client = miniClusterResource.getClusterClient();

	// Submit the job
	JobGraph jobGraph = env.getStreamGraph().getJobGraph();

	JobSubmissionResult jobSubmissionResult = ClientUtils.submitJob(client, jobGraph);

	LOG.info("Submitted job {} and waiting...", jobSubmissionResult.getJobID());

	boolean done = false;
	while (deadLine.hasTimeLeft()) {
		Thread.sleep(100);
		Map<String, Object> accumulators = client.getAccumulators(jobSubmissionResult.getJobID()).get();

		boolean allDone = true;
		for (Tuple2<String, Integer> acc : expectedAccumulators) {
			Object accumOpt = accumulators.get(acc.f0);
			if (accumOpt == null) {
				allDone = false;
				break;
			}

			Integer numFinished = (Integer) accumOpt;
			if (!numFinished.equals(acc.f1)) {
				allDone = false;
				break;
			}
		}
		if (allDone) {
			done = true;
			break;
		}
	}

	if (!done) {
		fail("Did not see the expected accumulator results within time limit.");
	}

	LOG.info("Triggering savepoint.");

	CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobSubmissionResult.getJobID(), null);

	String jobmanagerSavepointPath = savepointPathFuture.get(deadLine.timeLeft().toMillis(), TimeUnit.MILLISECONDS);

	File jobManagerSavepoint = new File(new URI(jobmanagerSavepointPath).getPath());
	// savepoints were changed to be directories in Flink 1.3
	if (jobManagerSavepoint.isDirectory()) {
		FileUtils.moveDirectory(jobManagerSavepoint, new File(savepointPath));
	} else {
		FileUtils.moveFile(jobManagerSavepoint, new File(savepointPath));
	}
}