Java Code Examples for org.apache.flink.runtime.jobgraph.JobStatus#isGloballyTerminalState()

The following examples show how to use org.apache.flink.runtime.jobgraph.JobStatus#isGloballyTerminalState() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ZooKeeperCheckpointIDCounter.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	synchronized (startStopLock) {
		if (isStarted) {
			LOG.info("Shutting down.");
			sharedCount.close();
			client.getConnectionStateListenable().removeListener(connStateListener);

			if (jobStatus.isGloballyTerminalState()) {
				LOG.info("Removing {} from ZooKeeper", counterPath);
				client.delete().deletingChildrenIfNeeded().inBackground().forPath(counterPath);
			}

			isStarted = false;
		}
	}
}
 
Example 2
Source File: ZooKeeperCheckpointIDCounter.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	synchronized (startStopLock) {
		if (isStarted) {
			LOG.info("Shutting down.");
			sharedCount.close();
			client.getConnectionStateListenable().removeListener(connStateListener);

			if (jobStatus.isGloballyTerminalState()) {
				LOG.info("Removing {} from ZooKeeper", counterPath);
				client.delete().deletingChildrenIfNeeded().inBackground().forPath(counterPath);
			}

			isStarted = false;
		}
	}
}
 
Example 3
Source File: ZooKeeperCompletedCheckpointStore.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		LOG.info("Shutting down");

		for (CompletedCheckpoint checkpoint : completedCheckpoints) {
			tryRemoveCompletedCheckpoint(
				checkpoint,
				completedCheckpoint -> completedCheckpoint.discardOnShutdown(jobStatus));
		}

		completedCheckpoints.clear();
		checkpointsInZooKeeper.deleteChildren();
	} else {
		LOG.info("Suspending");

		// Clear the local handles, but don't remove any state
		completedCheckpoints.clear();

		// Release the state handle locks in ZooKeeper such that they can be deleted
		checkpointsInZooKeeper.releaseAll();
	}
}
 
Example 4
Source File: ZooKeeperCompletedCheckpointStore.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		LOG.info("Shutting down");

		for (CompletedCheckpoint checkpoint : completedCheckpoints) {
			tryRemoveCompletedCheckpoint(
				checkpoint,
				completedCheckpoint -> completedCheckpoint.discardOnShutdown(jobStatus));
		}

		completedCheckpoints.clear();
		checkpointsInZooKeeper.deleteChildren();
	} else {
		LOG.info("Suspending");

		// Clear the local handles, but don't remove any state
		completedCheckpoints.clear();

		// Release the state handle locks in ZooKeeper such that they can be deleted
		checkpointsInZooKeeper.releaseAll();
	}
}
 
Example 5
Source File: JobMaster.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void jobStatusChanged(
		final JobStatus newJobStatus,
		long timestamp,
		@Nullable final Throwable error) {
	validateRunsInMainThread();

	if (newJobStatus.isGloballyTerminalState()) {
		final ArchivedExecutionGraph archivedExecutionGraph = ArchivedExecutionGraph.createFrom(executionGraph);
		scheduledExecutorService.execute(() -> jobCompletionActions.jobReachedGloballyTerminalState(archivedExecutionGraph));
	}
}
 
Example 6
Source File: BackPressureStatsTrackerImpl.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public Void apply(StackTraceSample stackTraceSample, Throwable throwable) {
	synchronized (lock) {
		try {
			if (shutDown) {
				return null;
			}

			// Job finished, ignore.
			JobStatus jobState = vertex.getGraph().getState();
			if (jobState.isGloballyTerminalState()) {
				LOG.debug("Ignoring sample, because job is in state " + jobState + ".");
			} else if (stackTraceSample != null) {
				OperatorBackPressureStats stats = createStatsFromSample(stackTraceSample);
				operatorStatsCache.put(vertex, stats);
			} else {
				LOG.debug("Failed to gather stack trace sample.", throwable);
			}
		} catch (Throwable t) {
			LOG.error("Error during stats completion.", t);
		} finally {
			pendingStats.remove(vertex);
		}

		return null;
	}
}
 
Example 7
Source File: ExecutionGraph.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * This method is a callback during cancellation/failover and called when all tasks
 * have reached a terminal state (cancelled/failed/finished).
 */
private void allVerticesInTerminalState(long expectedGlobalVersionForRestart) {

	assertRunningInJobMasterMainThread();

	// we are done, transition to the final state
	JobStatus current;
	while (true) {
		current = this.state;

		if (current == JobStatus.RUNNING) {
			failGlobal(new Exception("ExecutionGraph went into allVerticesInTerminalState() from RUNNING"));
		}
		else if (current == JobStatus.CANCELLING) {
			if (transitionState(current, JobStatus.CANCELED)) {
				onTerminalState(JobStatus.CANCELED);
				break;
			}
		}
		else if (current == JobStatus.FAILING) {
			if (tryRestartOrFail(expectedGlobalVersionForRestart)) {
				break;
			}
			// concurrent job status change, let's check again
		}
		else if (current.isGloballyTerminalState()) {
			LOG.warn("Job has entered globally terminal state without waiting for all " +
				"job vertices to reach final state.");
			break;
		}
		else {
			failGlobal(new Exception("ExecutionGraph went into final state from state " + current));
			break;
		}
	}
	// done transitioning the state
}
 
Example 8
Source File: RecoverableCompletedCheckpointStore.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		checkpoints.clear();
		suspended.clear();
	} else {
		suspended.clear();

		for (CompletedCheckpoint checkpoint : checkpoints) {
			suspended.add(checkpoint);
		}

		checkpoints.clear();
	}
}
 
Example 9
Source File: WebMonitorUtils.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static JobDetails createDetailsForJob(AccessExecutionGraph job) {
	JobStatus status = job.getState();

	long started = job.getStatusTimestamp(JobStatus.CREATED);
	long finished = status.isGloballyTerminalState() ? job.getStatusTimestamp(status) : -1L;
	long duration = (finished >= 0L ? finished : System.currentTimeMillis()) - started;

	int[] countsPerStatus = new int[ExecutionState.values().length];
	long lastChanged = 0;
	int numTotalTasks = 0;

	for (AccessExecutionJobVertex ejv : job.getVerticesTopologically()) {
		AccessExecutionVertex[] vertices = ejv.getTaskVertices();
		numTotalTasks += vertices.length;

		for (AccessExecutionVertex vertex : vertices) {
			ExecutionState state = vertex.getExecutionState();
			countsPerStatus[state.ordinal()]++;
			lastChanged = Math.max(lastChanged, vertex.getStateTimestamp(state));
		}
	}

	lastChanged = Math.max(lastChanged, finished);

	return new JobDetails(
		job.getJobID(),
		job.getJobName(),
		started,
		finished,
		duration,
		status,
		lastChanged,
		countsPerStatus,
		numTotalTasks);
}
 
Example 10
Source File: WebMonitorUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
public static JobDetails createDetailsForJob(AccessExecutionGraph job) {
	JobStatus status = job.getState();

	long started = job.getStatusTimestamp(JobStatus.CREATED);
	long finished = status.isGloballyTerminalState() ? job.getStatusTimestamp(status) : -1L;
	long duration = (finished >= 0L ? finished : System.currentTimeMillis()) - started;

	int[] countsPerStatus = new int[ExecutionState.values().length];
	long lastChanged = 0;
	int numTotalTasks = 0;

	for (AccessExecutionJobVertex ejv : job.getVerticesTopologically()) {
		AccessExecutionVertex[] vertices = ejv.getTaskVertices();
		numTotalTasks += vertices.length;

		for (AccessExecutionVertex vertex : vertices) {
			ExecutionState state = vertex.getExecutionState();
			countsPerStatus[state.ordinal()]++;
			lastChanged = Math.max(lastChanged, vertex.getStateTimestamp(state));
		}
	}

	lastChanged = Math.max(lastChanged, finished);

	return new JobDetails(
		job.getJobID(),
		job.getJobName(),
		started,
		finished,
		duration,
		status,
		lastChanged,
		countsPerStatus,
		numTotalTasks);
}
 
Example 11
Source File: JobMaster.java    From flink with Apache License 2.0 5 votes vote down vote up
private void jobStatusChanged(
		final JobStatus newJobStatus,
		long timestamp,
		@Nullable final Throwable error) {
	validateRunsInMainThread();

	if (newJobStatus.isGloballyTerminalState()) {
		runAsync(() -> registeredTaskManagers.keySet()
			.forEach(partitionTracker::stopTrackingAndReleasePartitionsFor));

		final ArchivedExecutionGraph archivedExecutionGraph = schedulerNG.requestJob();
		scheduledExecutorService.execute(() -> jobCompletionActions.jobReachedGloballyTerminalState(archivedExecutionGraph));
	}
}
 
Example 12
Source File: BackPressureStatsTrackerImpl.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public Void apply(StackTraceSample stackTraceSample, Throwable throwable) {
	synchronized (lock) {
		try {
			if (shutDown) {
				return null;
			}

			// Job finished, ignore.
			JobStatus jobState = vertex.getGraph().getState();
			if (jobState.isGloballyTerminalState()) {
				LOG.debug("Ignoring sample, because job is in state " + jobState + ".");
			} else if (stackTraceSample != null) {
				OperatorBackPressureStats stats = createStatsFromSample(stackTraceSample);
				operatorStatsCache.put(vertex, stats);
			} else {
				LOG.debug("Failed to gather stack trace sample.", throwable);
			}
		} catch (Throwable t) {
			LOG.error("Error during stats completion.", t);
		} finally {
			pendingStats.remove(vertex);
		}

		return null;
	}
}
 
Example 13
Source File: ExecutionGraph.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * This method is a callback during cancellation/failover and called when all tasks
 * have reached a terminal state (cancelled/failed/finished).
 */
private void allVerticesInTerminalState(long expectedGlobalVersionForRestart) {

	assertRunningInJobMasterMainThread();

	// we are done, transition to the final state
	JobStatus current;
	while (true) {
		current = this.state;

		if (current == JobStatus.RUNNING) {
			failGlobal(new Exception("ExecutionGraph went into allVerticesInTerminalState() from RUNNING"));
		}
		else if (current == JobStatus.CANCELLING) {
			if (transitionState(current, JobStatus.CANCELED)) {
				onTerminalState(JobStatus.CANCELED);
				break;
			}
		}
		else if (current == JobStatus.FAILING) {
			if (tryRestartOrFail(expectedGlobalVersionForRestart)) {
				break;
			}
			// concurrent job status change, let's check again
		}
		else if (current.isGloballyTerminalState()) {
			LOG.warn("Job has entered globally terminal state without waiting for all " +
				"job vertices to reach final state.");
			break;
		}
		else {
			failGlobal(new Exception("ExecutionGraph went into final state from state " + current));
			break;
		}
	}
	// done transitioning the state
}
 
Example 14
Source File: RecoverableCompletedCheckpointStore.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		checkpoints.clear();
		suspended.clear();
	} else {
		suspended.clear();

		for (CompletedCheckpoint checkpoint : checkpoints) {
			suspended.add(checkpoint);
		}

		checkpoints.clear();
	}
}
 
Example 15
Source File: LocalStreamEnvironmentWithAsyncExecution.java    From flink-crawler with Apache License 2.0 5 votes vote down vote up
/**
 * Return whether <jobID> is currently running.
 * 
 * @param jobID
 * @return true if running.
 * @throws Exception
 */
public boolean isRunning(JobID jobID) throws Exception {
    ActorGateway leader = _exec.getLeaderGateway(_exec.timeout());
    Future<Object> response = leader.ask(new JobManagerMessages.RequestJobStatus(jobID),
            _exec.timeout());
    Object result = Await.result(response, _exec.timeout());
    if (result instanceof CurrentJobStatus) {
        JobStatus jobStatus = ((CurrentJobStatus) result).status();
        return !jobStatus.isGloballyTerminalState();
    } else if (response instanceof JobNotFound) {
        return false;
    } else {
        throw new RuntimeException("Unexpected response to job status: " + result);
    }
}
 
Example 16
Source File: ExecutionGraph.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Fails the execution graph globally. This failure will not be recovered by a specific
 * failover strategy, but results in a full restart of all tasks.
 *
 * <p>This global failure is meant to be triggered in cases where the consistency of the
 * execution graph' state cannot be guaranteed any more (for example when catching unexpected
 * exceptions that indicate a bug or an unexpected call race), and where a full restart is the
 * safe way to get consistency back.
 *
 * @param t The exception that caused the failure.
 */
public void failGlobal(Throwable t) {

	assertRunningInJobMasterMainThread();

	while (true) {
		JobStatus current = state;
		// stay in these states
		if (current == JobStatus.FAILING ||
			current == JobStatus.SUSPENDED ||
			current.isGloballyTerminalState()) {
			return;
		} else if (transitionState(current, JobStatus.FAILING, t)) {
			initFailureCause(t);

			// make sure no concurrent local or global actions interfere with the failover
			final long globalVersionForRestart = incrementGlobalModVersion();

			final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;

			// cancel ongoing scheduling action
			if (ongoingSchedulingFuture != null) {
				ongoingSchedulingFuture.cancel(false);
			}

			// we build a future that is complete once all vertices have reached a terminal state
			final ArrayList<CompletableFuture<?>> futures = new ArrayList<>(verticesInCreationOrder.size());

			// cancel all tasks (that still need cancelling)
			for (ExecutionJobVertex ejv : verticesInCreationOrder) {
				futures.add(ejv.cancelWithFuture());
			}

			final ConjunctFuture<Void> allTerminal = FutureUtils.waitForAll(futures);
			allTerminal.whenComplete(
				(Void ignored, Throwable throwable) -> {
					if (throwable != null) {
						transitionState(
							JobStatus.FAILING,
							JobStatus.FAILED,
							new FlinkException("Could not cancel all execution job vertices properly.", throwable));
					} else {
						allVerticesInTerminalState(globalVersionForRestart);
					}
				});

			return;
		}

		// else: concurrent change to execution state, retry
	}
}
 
Example 17
Source File: ExecutionGraph.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Fails the execution graph globally. This failure will not be recovered by a specific
 * failover strategy, but results in a full restart of all tasks.
 *
 * <p>This global failure is meant to be triggered in cases where the consistency of the
 * execution graph' state cannot be guaranteed any more (for example when catching unexpected
 * exceptions that indicate a bug or an unexpected call race), and where a full restart is the
 * safe way to get consistency back.
 *
 * @param t The exception that caused the failure.
 */
public void failGlobal(Throwable t) {

	assertRunningInJobMasterMainThread();

	while (true) {
		JobStatus current = state;
		// stay in these states
		if (current == JobStatus.FAILING ||
			current == JobStatus.SUSPENDED ||
			current.isGloballyTerminalState()) {
			return;
		} else if (transitionState(current, JobStatus.FAILING, t)) {
			initFailureCause(t);

			// make sure no concurrent local or global actions interfere with the failover
			final long globalVersionForRestart = incrementGlobalModVersion();

			final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;

			// cancel ongoing scheduling action
			if (ongoingSchedulingFuture != null) {
				ongoingSchedulingFuture.cancel(false);
			}

			// we build a future that is complete once all vertices have reached a terminal state
			final ConjunctFuture<Void> allTerminal = cancelVerticesAsync();
			FutureUtils.assertNoException(allTerminal.handle(
				(Void ignored, Throwable throwable) -> {
					if (throwable != null) {
						transitionState(
							JobStatus.FAILING,
							JobStatus.FAILED,
							new FlinkException("Could not cancel all execution job vertices properly.", throwable));
					} else {
						allVerticesInTerminalState(globalVersionForRestart);
					}
					return null;
				}));

			return;
		}

		// else: concurrent change to execution state, retry
	}
}
 
Example 18
Source File: LegacySchedulerBatchSchedulingTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void jobStatusChanges(JobID jobId, JobStatus newJobStatus, long timestamp, Throwable error) {
	if (newJobStatus.isGloballyTerminalState()) {
		globallyTerminalJobStatusFuture.complete(newJobStatus);
	}
}