Java Code Examples for org.apache.flink.runtime.jobgraph.JobStatus#RUNNING

The following examples show how to use org.apache.flink.runtime.jobgraph.JobStatus#RUNNING . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   File: JobMaster.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void declineCheckpoint(DeclineCheckpoint decline) {
	final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();

	if (checkpointCoordinator != null) {
		getRpcService().execute(() -> {
			try {
				checkpointCoordinator.receiveDeclineMessage(decline);
			} catch (Exception e) {
				log.error("Error in CheckpointCoordinator while processing {}", decline, e);
			}
		});
	} else {
		String errorMessage = "Received DeclineCheckpoint message for job {} with no CheckpointCoordinator";
		if (executionGraph.getState() == JobStatus.RUNNING) {
			log.error(errorMessage, jobGraph.getJobID());
		} else {
			log.debug(errorMessage, jobGraph.getJobID());
		}
	}
}
 
Example 2
Source Project: flink   File: UpTimeGauge.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Long getValue() {
	final JobStatus status = eg.getState();

	if (status == JobStatus.RUNNING) {
		// running right now - report the uptime
		final long runningTimestamp = eg.getStatusTimestamp(JobStatus.RUNNING);
		// we use 'Math.max' here to avoid negative timestamps when clocks change
		return Math.max(System.currentTimeMillis() - runningTimestamp, 0);
	}
	else if (status.isTerminalState()) {
		// not running any more -> finished or not on leader
		return NO_LONGER_RUNNING;
	}
	else {
		// not yet running or not up at the moment
		return 0L;
	}
}
 
Example 3
Source Project: flink   File: DownTimeGauge.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Long getValue() {
	final JobStatus status = eg.getState();

	if (status == JobStatus.RUNNING) {
		// running right now - no downtime
		return 0L;
	}
	else if (status.isTerminalState()) {
		// not running any more -> finished or not on leader
		return NO_LONGER_RUNNING;
	}
	else {
		final long runningTimestamp = eg.getStatusTimestamp(JobStatus.RUNNING);
		if (runningTimestamp > 0) {
			// job was running at some point and is not running now
			// we use 'Math.max' here to avoid negative timestamps when clocks change
			return Math.max(System.currentTimeMillis() - runningTimestamp, 0);
		}
		else {
			// job was never scheduled so far
			return NOT_YET_RUNNING;
		}
	}
}
 
Example 4
Source Project: Flink-CEPplus   File: JobDetailsTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests that we can marshal and unmarshal JobDetails instances.
 */
@Test
public void testJobDetailsMarshalling() throws JsonProcessingException {
	final JobDetails expected = new JobDetails(
		new JobID(),
		"foobar",
		1L,
		10L,
		9L,
		JobStatus.RUNNING,
		8L,
		new int[]{1, 3, 3, 7, 4, 2, 7, 3, 3},
		42);

	final ObjectMapper objectMapper = RestMapperUtils.getStrictObjectMapper();

	final JsonNode marshalled = objectMapper.valueToTree(expected);

	final JobDetails unmarshalled = objectMapper.treeToValue(marshalled, JobDetails.class);

	assertEquals(expected, unmarshalled);
}
 
Example 5
public SuspendableAccessExecutionGraph(JobID jobId) {
	super(
		jobId,
		"ExecutionGraphCacheTest",
		Collections.emptyMap(),
		Collections.emptyList(),
		new long[0],
		JobStatus.RUNNING,
		new ErrorInfo(new FlinkException("Test"), 42L),
		"",
		new StringifiedAccumulatorResult[0],
		Collections.emptyMap(),
		new ArchivedExecutionConfig(new ExecutionConfig()),
		false,
		null,
		null);

	jobStatus = super.getState();
}
 
Example 6
Source Project: flink   File: LegacyScheduler.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void declineCheckpoint(final DeclineCheckpoint decline) {
	mainThreadExecutor.assertRunningInMainThread();

	final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
	final String taskManagerLocationInfo = retrieveTaskManagerLocation(decline.getTaskExecutionId());

	if (checkpointCoordinator != null) {
		ioExecutor.execute(() -> {
			try {
				checkpointCoordinator.receiveDeclineMessage(decline, taskManagerLocationInfo);
			} catch (Exception e) {
				log.error("Error in CheckpointCoordinator while processing {}", decline, e);
			}
		});
	} else {
		String errorMessage = "Received DeclineCheckpoint message for job {} with no CheckpointCoordinator";
		if (executionGraph.getState() == JobStatus.RUNNING) {
			log.error(errorMessage, jobGraph.getJobID());
		} else {
			log.debug(errorMessage, jobGraph.getJobID());
		}
	}
}
 
Example 7
private void waitForJob() throws Exception {
	for (int i = 0; i < 60; i++) {
		try {
			final JobStatus jobStatus = clusterClient.getJobStatus(jobGraph.getJobID()).get(60, TimeUnit.SECONDS);
			assertThat(jobStatus.isGloballyTerminalState(), equalTo(false));
			if (jobStatus == JobStatus.RUNNING) {
				return;
			}
		} catch (ExecutionException ignored) {
			// JobManagerRunner is not yet registered in Dispatcher
		}
		Thread.sleep(1000);
	}
	throw new AssertionError("Job did not become running within timeout.");
}
 
Example 8
Source Project: flink   File: ExecutionGraph.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This method is a callback during cancellation/failover and called when all tasks
 * have reached a terminal state (cancelled/failed/finished).
 */
private void allVerticesInTerminalState(long expectedGlobalVersionForRestart) {

	assertRunningInJobMasterMainThread();

	// we are done, transition to the final state
	JobStatus current;
	while (true) {
		current = this.state;

		if (current == JobStatus.RUNNING) {
			failGlobal(new Exception("ExecutionGraph went into allVerticesInTerminalState() from RUNNING"));
		}
		else if (current == JobStatus.CANCELLING) {
			if (transitionState(current, JobStatus.CANCELED)) {
				onTerminalState(JobStatus.CANCELED);
				break;
			}
		}
		else if (current == JobStatus.FAILING) {
			if (tryRestartOrFail(expectedGlobalVersionForRestart)) {
				break;
			}
			// concurrent job status change, let's check again
		}
		else if (current.isGloballyTerminalState()) {
			LOG.warn("Job has entered globally terminal state without waiting for all " +
				"job vertices to reach final state.");
			break;
		}
		else {
			failGlobal(new Exception("ExecutionGraph went into final state from state " + current));
			break;
		}
	}
	// done transitioning the state
}
 
Example 9
Source Project: flink   File: ExecutionGraph.java    License: Apache License 2.0 5 votes vote down vote up
public void scheduleForExecution() throws JobException {

		assertRunningInJobMasterMainThread();

		final long currentGlobalModVersion = globalModVersion;

		if (transitionState(JobStatus.CREATED, JobStatus.RUNNING)) {

			final CompletableFuture<Void> newSchedulingFuture = SchedulingUtils.schedule(
				scheduleMode,
				getAllExecutionVertices(),
				this);

			if (state == JobStatus.RUNNING && currentGlobalModVersion == globalModVersion) {
				schedulingFuture = newSchedulingFuture;
				newSchedulingFuture.whenComplete(
					(Void ignored, Throwable throwable) -> {
						if (throwable != null) {
							final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);

							if (!(strippedThrowable instanceof CancellationException)) {
								// only fail if the scheduling future was not canceled
								failGlobal(strippedThrowable);
							}
						}
					});
			} else {
				newSchedulingFuture.cancel(false);
			}
		}
		else {
			throw new IllegalStateException("Job may only be scheduled from state " + JobStatus.CREATED);
		}
	}
 
Example 10
Source Project: Flink-CEPplus   File: JobMaster.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void acknowledgeCheckpoint(
		final JobID jobID,
		final ExecutionAttemptID executionAttemptID,
		final long checkpointId,
		final CheckpointMetrics checkpointMetrics,
		final TaskStateSnapshot checkpointState) {

	final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
	final AcknowledgeCheckpoint ackMessage = new AcknowledgeCheckpoint(
		jobID,
		executionAttemptID,
		checkpointId,
		checkpointMetrics,
		checkpointState);

	if (checkpointCoordinator != null) {
		getRpcService().execute(() -> {
			try {
				checkpointCoordinator.receiveAcknowledgeMessage(ackMessage);
			} catch (Throwable t) {
				log.warn("Error while processing checkpoint acknowledgement message", t);
			}
		});
	} else {
		String errorMessage = "Received AcknowledgeCheckpoint message for job {} with no CheckpointCoordinator";
		if (executionGraph.getState() == JobStatus.RUNNING) {
			log.error(errorMessage, jobGraph.getJobID());
		} else {
			log.debug(errorMessage, jobGraph.getJobID());
		}
	}
}
 
Example 11
Source Project: Flink-CEPplus   File: ExecutionGraph.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Called whenever a vertex reaches state FINISHED (completed successfully).
 * Once all vertices are in the FINISHED state, the program is successfully done.
 */
void vertexFinished() {
	assertRunningInJobMasterMainThread();
	final int numFinished = verticesFinished.incrementAndGet();
	if (numFinished == numVerticesTotal) {
		// done :-)

		// check whether we are still in "RUNNING" and trigger the final cleanup
		if (state == JobStatus.RUNNING) {
			// we do the final cleanup in the I/O executor, because it may involve
			// some heavier work

			try {
				for (ExecutionJobVertex ejv : verticesInCreationOrder) {
					ejv.getJobVertex().finalizeOnMaster(getUserClassLoader());
				}
			}
			catch (Throwable t) {
				ExceptionUtils.rethrowIfFatalError(t);
				failGlobal(new Exception("Failed to finalize execution on master", t));
				return;
			}

			// if we do not make this state transition, then a concurrent
			// cancellation or failure happened
			if (transitionState(JobStatus.RUNNING, JobStatus.FINISHED)) {
				onTerminalState(JobStatus.FINISHED);
			}
		}
	}
}
 
Example 12
Source Project: Flink-CEPplus   File: ExecutionGraph.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This method is a callback during cancellation/failover and called when all tasks
 * have reached a terminal state (cancelled/failed/finished).
 */
private void allVerticesInTerminalState(long expectedGlobalVersionForRestart) {

	assertRunningInJobMasterMainThread();

	// we are done, transition to the final state
	JobStatus current;
	while (true) {
		current = this.state;

		if (current == JobStatus.RUNNING) {
			failGlobal(new Exception("ExecutionGraph went into allVerticesInTerminalState() from RUNNING"));
		}
		else if (current == JobStatus.CANCELLING) {
			if (transitionState(current, JobStatus.CANCELED)) {
				onTerminalState(JobStatus.CANCELED);
				break;
			}
		}
		else if (current == JobStatus.FAILING) {
			if (tryRestartOrFail(expectedGlobalVersionForRestart)) {
				break;
			}
			// concurrent job status change, let's check again
		}
		else if (current.isGloballyTerminalState()) {
			LOG.warn("Job has entered globally terminal state without waiting for all " +
				"job vertices to reach final state.");
			break;
		}
		else {
			failGlobal(new Exception("ExecutionGraph went into final state from state " + current));
			break;
		}
	}
	// done transitioning the state
}
 
Example 13
Source Project: flink   File: CancelingTestBase.java    License: Apache License 2.0 5 votes vote down vote up
protected void runAndCancelJob(Plan plan, final int msecsTillCanceling, int maxTimeTillCanceled) throws Exception {
	// submit job
	final JobGraph jobGraph = getJobGraph(plan);

	ClusterClient<?> client = CLUSTER.getClusterClient();
	client.setDetached(true);

	JobSubmissionResult jobSubmissionResult = client.submitJob(jobGraph, CancelingTestBase.class.getClassLoader());

	Deadline submissionDeadLine = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();

	JobStatus jobStatus = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS);
	while (jobStatus != JobStatus.RUNNING && submissionDeadLine.hasTimeLeft()) {
		Thread.sleep(50);
		jobStatus = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS);
	}
	if (jobStatus != JobStatus.RUNNING) {
		Assert.fail("Job not in state RUNNING.");
	}

	Thread.sleep(msecsTillCanceling);

	client.cancel(jobSubmissionResult.getJobID());

	Deadline cancelDeadline = new FiniteDuration(maxTimeTillCanceled, TimeUnit.MILLISECONDS).fromNow();

	JobStatus jobStatusAfterCancel = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS);
	while (jobStatusAfterCancel != JobStatus.CANCELED && cancelDeadline.hasTimeLeft()) {
		Thread.sleep(50);
		jobStatusAfterCancel = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS);
	}
	if (jobStatusAfterCancel != JobStatus.CANCELED) {
		Assert.fail("Failed to cancel job with ID " + jobSubmissionResult.getJobID() + '.');
	}
}
 
Example 14
Source Project: Flink-CEPplus   File: ClassLoaderITCase.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests disposal of a savepoint, which contains custom user code KvState.
 */
@Test
public void testDisposeSavepointWithCustomKvState() throws Exception {
	ClusterClient<?> clusterClient = new MiniClusterClient(new Configuration(), miniClusterResource.getMiniCluster());

	Deadline deadline = new FiniteDuration(100, TimeUnit.SECONDS).fromNow();

	File checkpointDir = FOLDER.newFolder();
	File outputDir = FOLDER.newFolder();

	final PackagedProgram program = new PackagedProgram(
			new File(CUSTOM_KV_STATE_JAR_PATH),
			new String[] {
					String.valueOf(parallelism),
					checkpointDir.toURI().toString(),
					"5000",
					outputDir.toURI().toString()
			});

	TestStreamEnvironment.setAsContext(
		miniClusterResource.getMiniCluster(),
		parallelism,
		Collections.singleton(new Path(CUSTOM_KV_STATE_JAR_PATH)),
		Collections.<URL>emptyList()
	);

	// Execute detached
	Thread invokeThread = new Thread(new Runnable() {
		@Override
		public void run() {
			try {
				program.invokeInteractiveModeForExecution();
			} catch (ProgramInvocationException ignored) {
				if (ignored.getCause() == null ||
					!(ignored.getCause() instanceof JobCancellationException)) {
					ignored.printStackTrace();
				}
			}
		}
	});

	LOG.info("Starting program invoke thread");
	invokeThread.start();

	// The job ID
	JobID jobId = null;

	LOG.info("Waiting for job status running.");

	// Wait for running job
	while (jobId == null && deadline.hasTimeLeft()) {

		Collection<JobStatusMessage> jobs = clusterClient.listJobs().get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
		for (JobStatusMessage job : jobs) {
			if (job.getJobState() == JobStatus.RUNNING) {
				jobId = job.getJobId();
				LOG.info("Job running. ID: " + jobId);
				break;
			}
		}

		// Retry if job is not available yet
		if (jobId == null) {
			Thread.sleep(100L);
		}
	}

	// Trigger savepoint
	String savepointPath = null;
	for (int i = 0; i < 20; i++) {
		LOG.info("Triggering savepoint (" + (i + 1) + "/20).");
		try {
			savepointPath = clusterClient.triggerSavepoint(jobId, null)
				.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
		} catch (Exception cause) {
			LOG.info("Failed to trigger savepoint. Retrying...", cause);
			// This can fail if the operators are not opened yet
			Thread.sleep(500);
		}
	}

	assertNotNull("Failed to trigger savepoint", savepointPath);

	clusterClient.disposeSavepoint(savepointPath).get();

	clusterClient.cancel(jobId);

	// make sure, the execution is finished to not influence other test methods
	invokeThread.join(deadline.timeLeft().toMillis());
	assertFalse("Program invoke thread still running", invokeThread.isAlive());
}
 
Example 15
Source Project: flink   File: ExecutionGraph.java    License: Apache License 2.0 4 votes vote down vote up
public void cancel() {

		assertRunningInJobMasterMainThread();

		while (true) {
			JobStatus current = state;

			if (current == JobStatus.RUNNING || current == JobStatus.CREATED) {
				if (transitionState(current, JobStatus.CANCELLING)) {

					// make sure no concurrent local actions interfere with the cancellation
					final long globalVersionForRestart = incrementGlobalModVersion();

					final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;

					// cancel ongoing scheduling action
					if (ongoingSchedulingFuture != null) {
						ongoingSchedulingFuture.cancel(false);
					}

					final ConjunctFuture<Void> allTerminal = cancelVerticesAsync();
					allTerminal.whenComplete(
						(Void value, Throwable throwable) -> {
							if (throwable != null) {
								transitionState(
									JobStatus.CANCELLING,
									JobStatus.FAILED,
									new FlinkException(
										"Could not cancel job " + getJobName() + " because not all execution job vertices could be cancelled.",
										throwable));
							} else {
								// cancellations may currently be overridden by failures which trigger
								// restarts, so we need to pass a proper restart global version here
								allVerticesInTerminalState(globalVersionForRestart);
							}
						});

					return;
				}
			}
			// Executions are being canceled. Go into cancelling and wait for
			// all vertices to be in their final state.
			else if (current == JobStatus.FAILING) {
				if (transitionState(current, JobStatus.CANCELLING)) {
					return;
				}
			}
			// All vertices have been cancelled and it's safe to directly go
			// into the canceled state.
			else if (current == JobStatus.RESTARTING) {
				synchronized (progressLock) {
					if (transitionState(current, JobStatus.CANCELED)) {
						onTerminalState(JobStatus.CANCELED);

						LOG.info("Canceled during restart.");
						return;
					}
				}
			}
			else {
				// no need to treat other states
				return;
			}
		}
	}
 
Example 16
Source Project: Flink-CEPplus   File: ExecutionGraph.java    License: Apache License 2.0 4 votes vote down vote up
public void cancel() {

		assertRunningInJobMasterMainThread();

		while (true) {
			JobStatus current = state;

			if (current == JobStatus.RUNNING || current == JobStatus.CREATED) {
				if (transitionState(current, JobStatus.CANCELLING)) {

					// make sure no concurrent local actions interfere with the cancellation
					final long globalVersionForRestart = incrementGlobalModVersion();

					final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;

					// cancel ongoing scheduling action
					if (ongoingSchedulingFuture != null) {
						ongoingSchedulingFuture.cancel(false);
					}

					final ArrayList<CompletableFuture<?>> futures = new ArrayList<>(verticesInCreationOrder.size());

					// cancel all tasks (that still need cancelling)
					for (ExecutionJobVertex ejv : verticesInCreationOrder) {
						futures.add(ejv.cancelWithFuture());
					}

					// we build a future that is complete once all vertices have reached a terminal state
					final ConjunctFuture<Void> allTerminal = FutureUtils.waitForAll(futures);
					allTerminal.whenComplete(
						(Void value, Throwable throwable) -> {
							if (throwable != null) {
								transitionState(
									JobStatus.CANCELLING,
									JobStatus.FAILED,
									new FlinkException(
										"Could not cancel job " + getJobName() + " because not all execution job vertices could be cancelled.",
										throwable));
							} else {
								// cancellations may currently be overridden by failures which trigger
								// restarts, so we need to pass a proper restart global version here
								allVerticesInTerminalState(globalVersionForRestart);
							}
						});

					return;
				}
			}
			// Executions are being canceled. Go into cancelling and wait for
			// all vertices to be in their final state.
			else if (current == JobStatus.FAILING) {
				if (transitionState(current, JobStatus.CANCELLING)) {
					return;
				}
			}
			// All vertices have been cancelled and it's safe to directly go
			// into the canceled state.
			else if (current == JobStatus.RESTARTING) {
				synchronized (progressLock) {
					if (transitionState(current, JobStatus.CANCELED)) {
						onTerminalState(JobStatus.CANCELED);

						LOG.info("Canceled during restart.");
						return;
					}
				}
			}
			else {
				// no need to treat other states
				return;
			}
		}
	}
 
Example 17
/**
 * Tests that we can un/marshal {@link MultipleJobsDetails} objects.
 */
@Test
public void testMultipleJobsDetailsMarshalling() throws JsonProcessingException {
	int[] verticesPerState = new int[ExecutionState.values().length];

	for (int i = 0; i < verticesPerState.length; i++) {
		verticesPerState[i] = i;
	}

	final JobDetails running = new JobDetails(
		new JobID(),
		"running",
		1L,
		-1L,
		9L,
		JobStatus.RUNNING,
		9L,
		verticesPerState,
		9);

	final JobDetails finished = new JobDetails(
		new JobID(),
		"finished",
		1L,
		5L,
		4L,
		JobStatus.FINISHED,
		8L,
		verticesPerState,
		4);

	final MultipleJobsDetails expected = new MultipleJobsDetails(
		Arrays.asList(running, finished));

	final ObjectMapper objectMapper = RestMapperUtils.getStrictObjectMapper();

	final JsonNode marshalled = objectMapper.valueToTree(expected);

	final MultipleJobsDetails unmarshalled = objectMapper.treeToValue(marshalled, MultipleJobsDetails.class);

	assertEquals(expected, unmarshalled);
}
 
Example 18
Source Project: flink   File: RestClusterClientTest.java    License: Apache License 2.0 4 votes vote down vote up
@Override
protected CompletableFuture<MultipleJobsDetails> handleRequest(@Nonnull HandlerRequest<EmptyRequestBody, EmptyMessageParameters> request, @Nonnull DispatcherGateway gateway) throws RestHandlerException {
	JobDetails running = new JobDetails(new JobID(), "job1", 0, 0, 0, JobStatus.RUNNING, 0, new int[9], 0);
	JobDetails finished = new JobDetails(new JobID(), "job2", 0, 0, 0, JobStatus.FINISHED, 0, new int[9], 0);
	return CompletableFuture.completedFuture(new MultipleJobsDetails(Arrays.asList(running, finished)));
}
 
Example 19
Source Project: flink   File: MultipleJobsDetailsTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests that we can un/marshal {@link MultipleJobsDetails} objects.
 */
@Test
public void testMultipleJobsDetailsMarshalling() throws JsonProcessingException {
	int[] verticesPerState = new int[ExecutionState.values().length];

	for (int i = 0; i < verticesPerState.length; i++) {
		verticesPerState[i] = i;
	}

	final JobDetails running = new JobDetails(
		new JobID(),
		"running",
		1L,
		-1L,
		9L,
		JobStatus.RUNNING,
		9L,
		verticesPerState,
		9);

	final JobDetails finished = new JobDetails(
		new JobID(),
		"finished",
		1L,
		5L,
		4L,
		JobStatus.FINISHED,
		8L,
		verticesPerState,
		4);

	final MultipleJobsDetails expected = new MultipleJobsDetails(
		Arrays.asList(running, finished));

	final ObjectMapper objectMapper = RestMapperUtils.getStrictObjectMapper();

	final JsonNode marshalled = objectMapper.valueToTree(expected);

	final MultipleJobsDetails unmarshalled = objectMapper.treeToValue(marshalled, MultipleJobsDetails.class);

	assertEquals(expected, unmarshalled);
}
 
Example 20
Source Project: flink   File: ClassLoaderITCase.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests disposal of a savepoint, which contains custom user code KvState.
 */
@Test
public void testDisposeSavepointWithCustomKvState() throws Exception {
	ClusterClient<?> clusterClient = new MiniClusterClient(new Configuration(), miniClusterResource.getMiniCluster());

	Deadline deadline = new FiniteDuration(100, TimeUnit.SECONDS).fromNow();

	File checkpointDir = FOLDER.newFolder();
	File outputDir = FOLDER.newFolder();

	final PackagedProgram program = new PackagedProgram(
			new File(CUSTOM_KV_STATE_JAR_PATH),
			new String[] {
					String.valueOf(parallelism),
					checkpointDir.toURI().toString(),
					"5000",
					outputDir.toURI().toString()
			});

	TestStreamEnvironment.setAsContext(
		miniClusterResource.getMiniCluster(),
		parallelism,
		Collections.singleton(new Path(CUSTOM_KV_STATE_JAR_PATH)),
		Collections.<URL>emptyList()
	);

	// Execute detached
	Thread invokeThread = new Thread(new Runnable() {
		@Override
		public void run() {
			try {
				program.invokeInteractiveModeForExecution();
			} catch (ProgramInvocationException ignored) {
				if (ignored.getCause() == null ||
					!(ignored.getCause() instanceof JobCancellationException)) {
					ignored.printStackTrace();
				}
			}
		}
	});

	LOG.info("Starting program invoke thread");
	invokeThread.start();

	// The job ID
	JobID jobId = null;

	LOG.info("Waiting for job status running.");

	// Wait for running job
	while (jobId == null && deadline.hasTimeLeft()) {

		Collection<JobStatusMessage> jobs = clusterClient.listJobs().get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
		for (JobStatusMessage job : jobs) {
			if (job.getJobState() == JobStatus.RUNNING) {
				jobId = job.getJobId();
				LOG.info("Job running. ID: " + jobId);
				break;
			}
		}

		// Retry if job is not available yet
		if (jobId == null) {
			Thread.sleep(100L);
		}
	}

	// Trigger savepoint
	String savepointPath = null;
	for (int i = 0; i < 20; i++) {
		LOG.info("Triggering savepoint (" + (i + 1) + "/20).");
		try {
			savepointPath = clusterClient.triggerSavepoint(jobId, null)
				.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
		} catch (Exception cause) {
			LOG.info("Failed to trigger savepoint. Retrying...", cause);
			// This can fail if the operators are not opened yet
			Thread.sleep(500);
		}
	}

	assertNotNull("Failed to trigger savepoint", savepointPath);

	clusterClient.disposeSavepoint(savepointPath).get();

	clusterClient.cancel(jobId);

	// make sure, the execution is finished to not influence other test methods
	invokeThread.join(deadline.timeLeft().toMillis());
	assertFalse("Program invoke thread still running", invokeThread.isAlive());
}