Java Code Examples for org.apache.flink.runtime.execution.ExecutionState#isTerminal()

The following examples show how to use org.apache.flink.runtime.execution.ExecutionState#isTerminal() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SubtaskExecutionAttemptDetailsInfo.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static SubtaskExecutionAttemptDetailsInfo create(AccessExecution execution, MutableIOMetrics ioMetrics) {
	final ExecutionState status = execution.getState();
	final long now = System.currentTimeMillis();

	final TaskManagerLocation location = execution.getAssignedResourceLocation();
	final String locationString = location == null ? "(unassigned)" : location.getHostname();

	long startTime = execution.getStateTimestamp(ExecutionState.DEPLOYING);
	if (startTime == 0) {
		startTime = -1;
	}
	final long endTime = status.isTerminal() ? execution.getStateTimestamp(status) : -1;
	final long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1;

	final IOMetricsInfo ioMetricsInfo = new IOMetricsInfo(
		ioMetrics.getNumBytesInLocal() + ioMetrics.getNumBytesInRemote(),
		ioMetrics.isNumBytesInLocalComplete() && ioMetrics.isNumBytesInRemoteComplete(),
		ioMetrics.getNumBytesOut(),
		ioMetrics.isNumBytesOutComplete(),
		ioMetrics.getNumRecordsIn(),
		ioMetrics.isNumRecordsInComplete(),
		ioMetrics.getNumRecordsOut(),
		ioMetrics.isNumRecordsOutComplete());

	return new SubtaskExecutionAttemptDetailsInfo(
		execution.getParallelSubtaskIndex(),
		status,
		execution.getAttemptNumber(),
		locationString,
		startTime,
		endTime,
		duration,
		ioMetricsInfo
	);
}
 
Example 2
Source File: SubtasksTimesHandler.java    From flink with Apache License 2.0 5 votes vote down vote up
private static SubtasksTimesInfo createSubtaskTimesInfo(AccessExecutionJobVertex jobVertex) {
	final String id = jobVertex.getJobVertexId().toString();
	final String name = jobVertex.getName();
	final long now = System.currentTimeMillis();
	final List<SubtasksTimesInfo.SubtaskTimeInfo> subtasks = new ArrayList<>();

	int num = 0;
	for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {

		long[] timestamps = vertex.getCurrentExecutionAttempt().getStateTimestamps();
		ExecutionState status = vertex.getExecutionState();

		long scheduledTime = timestamps[ExecutionState.SCHEDULED.ordinal()];

		long start = scheduledTime > 0 ? scheduledTime : -1;
		long end = status.isTerminal() ? timestamps[status.ordinal()] : now;
		long duration = start >= 0 ? end - start : -1L;

		TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
		String locationString = location == null ? "(unassigned)" : location.getHostname();

		Map<ExecutionState, Long> timestampMap = new HashMap<>(ExecutionState.values().length);
		for (ExecutionState state : ExecutionState.values()) {
			timestampMap.put(state, timestamps[state.ordinal()]);
		}

		subtasks.add(new SubtasksTimesInfo.SubtaskTimeInfo(
			num++,
			locationString,
			duration,
			timestampMap));
	}
	return new SubtasksTimesInfo(id, name, now, subtasks);
}
 
Example 3
Source File: Execution.java    From flink with Apache License 2.0 5 votes vote down vote up
private boolean transitionState(ExecutionState currentState, ExecutionState targetState, Throwable error) {
	// sanity check
	if (currentState.isTerminal()) {
		throw new IllegalStateException("Cannot leave terminal state " + currentState + " to transition to " + targetState + '.');
	}

	if (STATE_UPDATER.compareAndSet(this, currentState, targetState)) {
		markTimestamp(targetState);

		if (error == null) {
			LOG.info("{} ({}) switched from {} to {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState);
		} else {
			LOG.info("{} ({}) switched from {} to {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState, error);
		}

		if (targetState.isTerminal()) {
			// complete the terminal state future
			terminalStateFuture.complete(targetState);
		}

		// make sure that the state transition completes normally.
		// potential errors (in listeners may not affect the main logic)
		try {
			vertex.notifyStateTransition(this, targetState, error);
		}
		catch (Throwable t) {
			LOG.error("Error while notifying execution graph of execution state transition.", t);
		}
		return true;
	} else {
		return false;
	}
}
 
Example 4
Source File: SubtasksTimesHandler.java    From flink with Apache License 2.0 5 votes vote down vote up
private static SubtasksTimesInfo createSubtaskTimesInfo(AccessExecutionJobVertex jobVertex) {
	final String id = jobVertex.getJobVertexId().toString();
	final String name = jobVertex.getName();
	final long now = System.currentTimeMillis();
	final List<SubtasksTimesInfo.SubtaskTimeInfo> subtasks = new ArrayList<>();

	int num = 0;
	for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {

		long[] timestamps = vertex.getCurrentExecutionAttempt().getStateTimestamps();
		ExecutionState status = vertex.getExecutionState();

		long scheduledTime = timestamps[ExecutionState.SCHEDULED.ordinal()];

		long start = scheduledTime > 0 ? scheduledTime : -1;
		long end = status.isTerminal() ? timestamps[status.ordinal()] : now;
		long duration = start >= 0 ? end - start : -1L;

		TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
		String locationString = location == null ? "(unassigned)" : location.getHostname();

		Map<ExecutionState, Long> timestampMap = new HashMap<>(ExecutionState.values().length);
		for (ExecutionState state : ExecutionState.values()) {
			timestampMap.put(state, timestamps[state.ordinal()]);
		}

		subtasks.add(new SubtasksTimesInfo.SubtaskTimeInfo(
			num++,
			locationString,
			duration,
			timestampMap));
	}
	return new SubtasksTimesInfo(id, name, now, subtasks);
}
 
Example 5
Source File: SubtaskExecutionAttemptDetailsInfo.java    From flink with Apache License 2.0 5 votes vote down vote up
public static SubtaskExecutionAttemptDetailsInfo create(AccessExecution execution, MutableIOMetrics ioMetrics) {
	final ExecutionState status = execution.getState();
	final long now = System.currentTimeMillis();

	final TaskManagerLocation location = execution.getAssignedResourceLocation();
	final String locationString = location == null ? "(unassigned)" : location.getHostname();

	long startTime = execution.getStateTimestamp(ExecutionState.DEPLOYING);
	if (startTime == 0) {
		startTime = -1;
	}
	final long endTime = status.isTerminal() ? execution.getStateTimestamp(status) : -1;
	final long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1;

	final IOMetricsInfo ioMetricsInfo = new IOMetricsInfo(
		ioMetrics.getNumBytesIn(),
		ioMetrics.isNumBytesInComplete(),
		ioMetrics.getNumBytesOut(),
		ioMetrics.isNumBytesOutComplete(),
		ioMetrics.getNumRecordsIn(),
		ioMetrics.isNumRecordsInComplete(),
		ioMetrics.getNumRecordsOut(),
		ioMetrics.isNumRecordsOutComplete());

	return new SubtaskExecutionAttemptDetailsInfo(
		execution.getParallelSubtaskIndex(),
		status,
		execution.getAttemptNumber(),
		locationString,
		startTime,
		endTime,
		duration,
		ioMetricsInfo
	);
}
 
Example 6
Source File: Execution.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private boolean transitionState(ExecutionState currentState, ExecutionState targetState, Throwable error) {
	// sanity check
	if (currentState.isTerminal()) {
		throw new IllegalStateException("Cannot leave terminal state " + currentState + " to transition to " + targetState + '.');
	}

	if (STATE_UPDATER.compareAndSet(this, currentState, targetState)) {
		markTimestamp(targetState);

		if (error == null) {
			LOG.info("{} ({}) switched from {} to {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState);
		} else {
			LOG.info("{} ({}) switched from {} to {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState, error);
		}

		if (targetState.isTerminal()) {
			// complete the terminal state future
			terminalStateFuture.complete(targetState);
		}

		// make sure that the state transition completes normally.
		// potential errors (in listeners may not affect the main logic)
		try {
			vertex.notifyStateTransition(this, targetState, error);
		}
		catch (Throwable t) {
			LOG.error("Error while notifying execution graph of execution state transition.", t);
		}
		return true;
	} else {
		return false;
	}
}
 
Example 7
Source File: SubtasksTimesHandler.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private static SubtasksTimesInfo createSubtaskTimesInfo(AccessExecutionJobVertex jobVertex) {
	final String id = jobVertex.getJobVertexId().toString();
	final String name = jobVertex.getName();
	final long now = System.currentTimeMillis();
	final List<SubtasksTimesInfo.SubtaskTimeInfo> subtasks = new ArrayList<>();

	int num = 0;
	for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {

		long[] timestamps = vertex.getCurrentExecutionAttempt().getStateTimestamps();
		ExecutionState status = vertex.getExecutionState();

		long scheduledTime = timestamps[ExecutionState.SCHEDULED.ordinal()];

		long start = scheduledTime > 0 ? scheduledTime : -1;
		long end = status.isTerminal() ? timestamps[status.ordinal()] : now;
		long duration = start >= 0 ? end - start : -1L;

		TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
		String locationString = location == null ? "(unassigned)" : location.getHostname();

		Map<ExecutionState, Long> timestampMap = new HashMap<>(ExecutionState.values().length);
		for (ExecutionState state : ExecutionState.values()) {
			timestampMap.put(state, timestamps[state.ordinal()]);
		}

		subtasks.add(new SubtasksTimesInfo.SubtaskTimeInfo(
			num++,
			locationString,
			duration,
			timestampMap));
	}
	return new SubtasksTimesInfo(id, name, now, subtasks);
}
 
Example 8
Source File: Task.java    From flink with Apache License 2.0 4 votes vote down vote up
private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) {
	while (true) {
		ExecutionState current = executionState;

		// if the task is already canceled (or canceling) or finished or failed,
		// then we need not do anything
		if (current.isTerminal() || current == ExecutionState.CANCELING) {
			LOG.info("Task {} is already in state {}", taskNameWithSubtask, current);
			return;
		}

		if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) {
			if (transitionState(current, targetState, cause)) {
				// if we manage this state transition, then the invokable gets never called
				// we need not call cancel on it
				this.failureCause = cause;
				return;
			}
		}
		else if (current == ExecutionState.RUNNING) {
			if (transitionState(ExecutionState.RUNNING, targetState, cause)) {
				// we are canceling / failing out of the running state
				// we need to cancel the invokable

				// copy reference to guard against concurrent null-ing out the reference
				final AbstractInvokable invokable = this.invokable;

				if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) {
					this.failureCause = cause;

					LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId);

					// because the canceling may block on user code, we cancel from a separate thread
					// we do not reuse the async call handler, because that one may be blocked, in which
					// case the canceling could not continue

					// The canceller calls cancel and interrupts the executing thread once
					Runnable canceler = new TaskCanceler(LOG, this :: closeNetworkResources, invokable, executingThread, taskNameWithSubtask);

					Thread cancelThread = new Thread(
							executingThread.getThreadGroup(),
							canceler,
							String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId));
					cancelThread.setDaemon(true);
					cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
					cancelThread.start();

					// the periodic interrupting thread - a different thread than the canceller, in case
					// the application code does blocking stuff in its cancellation paths.
					if (invokable.shouldInterruptOnCancel()) {
						Runnable interrupter = new TaskInterrupter(
								LOG,
								invokable,
								executingThread,
								taskNameWithSubtask,
								taskCancellationInterval);

						Thread interruptingThread = new Thread(
								executingThread.getThreadGroup(),
								interrupter,
								String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId));
						interruptingThread.setDaemon(true);
						interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						interruptingThread.start();
					}

					// if a cancellation timeout is set, the watchdog thread kills the process
					// if graceful cancellation does not succeed
					if (taskCancellationTimeout > 0) {
						Runnable cancelWatchdog = new TaskCancelerWatchDog(
								executingThread,
								taskManagerActions,
								taskCancellationTimeout,
								LOG);

						Thread watchDogThread = new Thread(
								executingThread.getThreadGroup(),
								cancelWatchdog,
								String.format("Cancellation Watchdog for %s (%s).",
										taskNameWithSubtask, executionId));
						watchDogThread.setDaemon(true);
						watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						watchDogThread.start();
					}
				}
				return;
			}
		}
		else {
			throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).",
				current, taskNameWithSubtask, executionId));
		}
	}
}
 
Example 9
Source File: ExecutionVertex.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Archives the current Execution and creates a new Execution for this vertex.
 *
 * <p>This method atomically checks if the ExecutionGraph is still of an expected
 * global mod. version and replaces the execution if that is the case. If the ExecutionGraph
 * has increased its global mod. version in the meantime, this operation fails.
 *
 * <p>This mechanism can be used to prevent conflicts between various concurrent recovery and
 * reconfiguration actions in a similar way as "optimistic concurrency control".
 *
 * @param timestamp
 *             The creation timestamp for the new Execution
 * @param originatingGlobalModVersion
 *
 * @return Returns the new created Execution.
 *
 * @throws GlobalModVersionMismatch Thrown, if the execution graph has a new global mod
 *                                  version than the one passed to this message.
 */
public Execution resetForNewExecution(final long timestamp, final long originatingGlobalModVersion)
		throws GlobalModVersionMismatch {
	LOG.debug("Resetting execution vertex {} for new execution.", getTaskNameWithSubtaskIndex());

	synchronized (priorExecutions) {
		// check if another global modification has been triggered since the
		// action that originally caused this reset/restart happened
		final long actualModVersion = getExecutionGraph().getGlobalModVersion();
		if (actualModVersion > originatingGlobalModVersion) {
			// global change happened since, reject this action
			throw new GlobalModVersionMismatch(originatingGlobalModVersion, actualModVersion);
		}

		final Execution oldExecution = currentExecution;
		final ExecutionState oldState = oldExecution.getState();

		if (oldState.isTerminal()) {
			priorExecutions.add(oldExecution.archive());

			final Execution newExecution = new Execution(
				getExecutionGraph().getFutureExecutor(),
				this,
				oldExecution.getAttemptNumber() + 1,
				originatingGlobalModVersion,
				timestamp,
				timeout);

			this.currentExecution = newExecution;

			CoLocationGroup grp = jobVertex.getCoLocationGroup();
			if (grp != null) {
				this.locationConstraint = grp.getLocationConstraint(subTaskIndex);
			}

			// register this execution at the execution graph, to receive call backs
			getExecutionGraph().registerExecution(newExecution);

			// if the execution was 'FINISHED' before, tell the ExecutionGraph that
			// we take one step back on the road to reaching global FINISHED
			if (oldState == FINISHED) {
				getExecutionGraph().vertexUnFinished();
			}

			return newExecution;
		}
		else {
			throw new IllegalStateException("Cannot reset a vertex that is in non-terminal state " + oldState);
		}
	}
}
 
Example 10
Source File: JobVertexDetailsHandler.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private static JobVertexDetailsInfo createJobVertexDetailsInfo(AccessExecutionJobVertex jobVertex, JobID jobID, @Nullable MetricFetcher metricFetcher) {
	List<JobVertexDetailsInfo.VertexTaskDetail> subtasks = new ArrayList<>();
	final long now = System.currentTimeMillis();
	int num = 0;
	for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {
		final ExecutionState status = vertex.getExecutionState();

		TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
		String locationString = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort();

		long startTime = vertex.getStateTimestamp(ExecutionState.DEPLOYING);
		if (startTime == 0) {
			startTime = -1;
		}
		long endTime = status.isTerminal() ? vertex.getStateTimestamp(status) : -1;
		long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1;

		MutableIOMetrics counts = new MutableIOMetrics();
		counts.addIOMetrics(
			vertex.getCurrentExecutionAttempt(),
			metricFetcher,
			jobID.toString(),
			jobVertex.getJobVertexId().toString());
		subtasks.add(new JobVertexDetailsInfo.VertexTaskDetail(
			num,
			status,
			vertex.getCurrentExecutionAttempt().getAttemptNumber(),
			locationString,
			startTime,
			endTime,
			duration,
			new IOMetricsInfo(
				counts.getNumBytesInLocal() + counts.getNumBytesInRemote(),
				counts.isNumBytesInLocalComplete() && counts.isNumBytesInRemoteComplete(),
				counts.getNumBytesOut(),
				counts.isNumBytesOutComplete(),
				counts.getNumRecordsIn(),
				counts.isNumRecordsInComplete(),
				counts.getNumRecordsOut(),
				counts.isNumRecordsOutComplete())));

		num++;
	}

	return new JobVertexDetailsInfo(
		jobVertex.getJobVertexId(),
		jobVertex.getName(),
		jobVertex.getParallelism(),
		now,
		subtasks);
}
 
Example 11
Source File: JobVertexDetailsHandler.java    From flink with Apache License 2.0 4 votes vote down vote up
private static JobVertexDetailsInfo createJobVertexDetailsInfo(AccessExecutionJobVertex jobVertex, JobID jobID, @Nullable MetricFetcher metricFetcher) {
	List<JobVertexDetailsInfo.VertexTaskDetail> subtasks = new ArrayList<>();
	final long now = System.currentTimeMillis();
	int num = 0;
	for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {
		final ExecutionState status = vertex.getExecutionState();

		TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
		String locationString = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort();

		long startTime = vertex.getStateTimestamp(ExecutionState.DEPLOYING);
		if (startTime == 0) {
			startTime = -1;
		}
		long endTime = status.isTerminal() ? vertex.getStateTimestamp(status) : -1;
		long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1;

		MutableIOMetrics counts = new MutableIOMetrics();
		counts.addIOMetrics(
			vertex.getCurrentExecutionAttempt(),
			metricFetcher,
			jobID.toString(),
			jobVertex.getJobVertexId().toString());
		subtasks.add(new JobVertexDetailsInfo.VertexTaskDetail(
			num,
			status,
			vertex.getCurrentExecutionAttempt().getAttemptNumber(),
			locationString,
			startTime,
			endTime,
			duration,
			new IOMetricsInfo(
				counts.getNumBytesIn(),
				counts.isNumBytesInComplete(),
				counts.getNumBytesOut(),
				counts.isNumBytesOutComplete(),
				counts.getNumRecordsIn(),
				counts.isNumRecordsInComplete(),
				counts.getNumRecordsOut(),
				counts.isNumRecordsOutComplete())));

		num++;
	}

	return new JobVertexDetailsInfo(
		jobVertex.getJobVertexId(),
		jobVertex.getName(),
		jobVertex.getParallelism(),
		now,
		subtasks);
}
 
Example 12
Source File: ExecutionVertex.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Archives the current Execution and creates a new Execution for this vertex.
 *
 * <p>This method atomically checks if the ExecutionGraph is still of an expected
 * global mod. version and replaces the execution if that is the case. If the ExecutionGraph
 * has increased its global mod. version in the meantime, this operation fails.
 *
 * <p>This mechanism can be used to prevent conflicts between various concurrent recovery and
 * reconfiguration actions in a similar way as "optimistic concurrency control".
 *
 * @param timestamp
 *             The creation timestamp for the new Execution
 * @param originatingGlobalModVersion
 *
 * @return Returns the new created Execution.
 *
 * @throws GlobalModVersionMismatch Thrown, if the execution graph has a new global mod
 *                                  version than the one passed to this message.
 */
public Execution resetForNewExecution(final long timestamp, final long originatingGlobalModVersion)
		throws GlobalModVersionMismatch {
	LOG.debug("Resetting execution vertex {} for new execution.", getTaskNameWithSubtaskIndex());

	synchronized (priorExecutions) {
		// check if another global modification has been triggered since the
		// action that originally caused this reset/restart happened
		final long actualModVersion = getExecutionGraph().getGlobalModVersion();
		if (actualModVersion > originatingGlobalModVersion) {
			// global change happened since, reject this action
			throw new GlobalModVersionMismatch(originatingGlobalModVersion, actualModVersion);
		}

		final Execution oldExecution = currentExecution;
		final ExecutionState oldState = oldExecution.getState();

		if (oldState.isTerminal()) {
			if (oldState == FINISHED) {
				// pipelined partitions are released in Execution#cancel(), covering both job failures and vertex resets
				// do not release pipelined partitions here to save RPC calls
				oldExecution.handlePartitionCleanup(false, true);
				getExecutionGraph().getPartitionReleaseStrategy().vertexUnfinished(executionVertexId);
			}

			priorExecutions.add(oldExecution.archive());

			final Execution newExecution = new Execution(
				getExecutionGraph().getFutureExecutor(),
				this,
				oldExecution.getAttemptNumber() + 1,
				originatingGlobalModVersion,
				timestamp,
				timeout);

			currentExecution = newExecution;

			synchronized (inputSplits) {
				InputSplitAssigner assigner = jobVertex.getSplitAssigner();
				if (assigner != null) {
					assigner.returnInputSplit(inputSplits, getParallelSubtaskIndex());
					inputSplits.clear();
				}
			}

			CoLocationGroup grp = jobVertex.getCoLocationGroup();
			if (grp != null) {
				locationConstraint = grp.getLocationConstraint(subTaskIndex);
			}

			// register this execution at the execution graph, to receive call backs
			getExecutionGraph().registerExecution(newExecution);

			// if the execution was 'FINISHED' before, tell the ExecutionGraph that
			// we take one step back on the road to reaching global FINISHED
			if (oldState == FINISHED) {
				getExecutionGraph().vertexUnFinished();
			}

			// reset the intermediate results
			for (IntermediateResultPartition resultPartition : resultPartitions.values()) {
				resultPartition.resetForNewExecution();
			}

			return newExecution;
		}
		else {
			throw new IllegalStateException("Cannot reset a vertex that is in non-terminal state " + oldState);
		}
	}
}
 
Example 13
Source File: Task.java    From flink with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
void cancelOrFailAndCancelInvokableInternal(ExecutionState targetState, Throwable cause) {
	while (true) {
		ExecutionState current = executionState;

		// if the task is already canceled (or canceling) or finished or failed,
		// then we need not do anything
		if (current.isTerminal() || current == ExecutionState.CANCELING) {
			LOG.info("Task {} is already in state {}", taskNameWithSubtask, current);
			return;
		}

		if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) {
			if (transitionState(current, targetState, cause)) {
				// if we manage this state transition, then the invokable gets never called
				// we need not call cancel on it
				this.failureCause = cause;
				return;
			}
		}
		else if (current == ExecutionState.RUNNING) {
			if (transitionState(ExecutionState.RUNNING, targetState, cause)) {
				// we are canceling / failing out of the running state
				// we need to cancel the invokable

				// copy reference to guard against concurrent null-ing out the reference
				final AbstractInvokable invokable = this.invokable;

				if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) {
					this.failureCause = cause;

					LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId);

					// because the canceling may block on user code, we cancel from a separate thread
					// we do not reuse the async call handler, because that one may be blocked, in which
					// case the canceling could not continue

					// The canceller calls cancel and interrupts the executing thread once
					Runnable canceler = new TaskCanceler(LOG, this::closeNetworkResources, invokable, executingThread, taskNameWithSubtask);

					Thread cancelThread = new Thread(
							executingThread.getThreadGroup(),
							canceler,
							String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId));
					cancelThread.setDaemon(true);
					cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
					cancelThread.start();

					// the periodic interrupting thread - a different thread than the canceller, in case
					// the application code does blocking stuff in its cancellation paths.
					if (invokable.shouldInterruptOnCancel()) {
						Runnable interrupter = new TaskInterrupter(
								LOG,
								invokable,
								executingThread,
								taskNameWithSubtask,
								taskCancellationInterval);

						Thread interruptingThread = new Thread(
								executingThread.getThreadGroup(),
								interrupter,
								String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId));
						interruptingThread.setDaemon(true);
						interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						interruptingThread.start();
					}

					// if a cancellation timeout is set, the watchdog thread kills the process
					// if graceful cancellation does not succeed
					if (taskCancellationTimeout > 0) {
						Runnable cancelWatchdog = new TaskCancelerWatchDog(
								executingThread,
								taskManagerActions,
								taskCancellationTimeout);

						Thread watchDogThread = new Thread(
								executingThread.getThreadGroup(),
								cancelWatchdog,
								String.format("Cancellation Watchdog for %s (%s).",
										taskNameWithSubtask, executionId));
						watchDogThread.setDaemon(true);
						watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						watchDogThread.start();
					}
				}
				return;
			}
		}
		else {
			throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).",
				current, taskNameWithSubtask, executionId));
		}
	}
}
 
Example 14
Source File: SubtaskExecutionAttemptDetailsInfo.java    From flink with Apache License 2.0 4 votes vote down vote up
public static SubtaskExecutionAttemptDetailsInfo create(AccessExecution execution, @Nullable MetricFetcher metricFetcher, JobID jobID, JobVertexID jobVertexID) {
	final ExecutionState status = execution.getState();
	final long now = System.currentTimeMillis();

	final TaskManagerLocation location = execution.getAssignedResourceLocation();
	final String locationString = location == null ? "(unassigned)" : location.getHostname();
	String taskmanagerId = location == null ? "(unassigned)" : location.getResourceID().toString();

	long startTime = execution.getStateTimestamp(ExecutionState.DEPLOYING);
	if (startTime == 0) {
		startTime = -1;
	}
	final long endTime = status.isTerminal() ? execution.getStateTimestamp(status) : -1;
	final long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1;

	final MutableIOMetrics ioMetrics = new MutableIOMetrics();
	ioMetrics.addIOMetrics(
		execution,
		metricFetcher,
		jobID.toString(),
		jobVertexID.toString()
	);

	final IOMetricsInfo ioMetricsInfo = new IOMetricsInfo(
		ioMetrics.getNumBytesIn(),
		ioMetrics.isNumBytesInComplete(),
		ioMetrics.getNumBytesOut(),
		ioMetrics.isNumBytesOutComplete(),
		ioMetrics.getNumRecordsIn(),
		ioMetrics.isNumRecordsInComplete(),
		ioMetrics.getNumRecordsOut(),
		ioMetrics.isNumRecordsOutComplete());

	return new SubtaskExecutionAttemptDetailsInfo(
		execution.getParallelSubtaskIndex(),
		status,
		execution.getAttemptNumber(),
		locationString,
		startTime,
		endTime,
		duration,
		ioMetricsInfo,
		taskmanagerId
	);
}
 
Example 15
Source File: Task.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) {
	while (true) {
		ExecutionState current = executionState;

		// if the task is already canceled (or canceling) or finished or failed,
		// then we need not do anything
		if (current.isTerminal() || current == ExecutionState.CANCELING) {
			LOG.info("Task {} is already in state {}", taskNameWithSubtask, current);
			return;
		}

		if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) {
			if (transitionState(current, targetState, cause)) {
				// if we manage this state transition, then the invokable gets never called
				// we need not call cancel on it
				this.failureCause = cause;
				return;
			}
		}
		else if (current == ExecutionState.RUNNING) {
			if (transitionState(ExecutionState.RUNNING, targetState, cause)) {
				// we are canceling / failing out of the running state
				// we need to cancel the invokable

				// copy reference to guard against concurrent null-ing out the reference
				final AbstractInvokable invokable = this.invokable;

				if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) {
					this.failureCause = cause;

					LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId);

					// because the canceling may block on user code, we cancel from a separate thread
					// we do not reuse the async call handler, because that one may be blocked, in which
					// case the canceling could not continue

					// The canceller calls cancel and interrupts the executing thread once
					Runnable canceler = new TaskCanceler(
							LOG,
							invokable,
							executingThread,
							taskNameWithSubtask,
							producedPartitions,
							inputGates);

					Thread cancelThread = new Thread(
							executingThread.getThreadGroup(),
							canceler,
							String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId));
					cancelThread.setDaemon(true);
					cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
					cancelThread.start();

					// the periodic interrupting thread - a different thread than the canceller, in case
					// the application code does blocking stuff in its cancellation paths.
					if (invokable.shouldInterruptOnCancel()) {
						Runnable interrupter = new TaskInterrupter(
								LOG,
								invokable,
								executingThread,
								taskNameWithSubtask,
								taskCancellationInterval);

						Thread interruptingThread = new Thread(
								executingThread.getThreadGroup(),
								interrupter,
								String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId));
						interruptingThread.setDaemon(true);
						interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						interruptingThread.start();
					}

					// if a cancellation timeout is set, the watchdog thread kills the process
					// if graceful cancellation does not succeed
					if (taskCancellationTimeout > 0) {
						Runnable cancelWatchdog = new TaskCancelerWatchDog(
								executingThread,
								taskManagerActions,
								taskCancellationTimeout,
								LOG);

						Thread watchDogThread = new Thread(
								executingThread.getThreadGroup(),
								cancelWatchdog,
								String.format("Cancellation Watchdog for %s (%s).",
										taskNameWithSubtask, executionId));
						watchDogThread.setDaemon(true);
						watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						watchDogThread.start();
					}
				}
				return;
			}
		}
		else {
			throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).",
				current, taskNameWithSubtask, executionId));
		}
	}
}
 
Example 16
Source File: ExecutionVertex.java    From flink with Apache License 2.0 4 votes vote down vote up
private Execution resetForNewExecutionInternal(final long timestamp, final long originatingGlobalModVersion) {
	final Execution oldExecution = currentExecution;
	final ExecutionState oldState = oldExecution.getState();

	if (oldState.isTerminal()) {
		if (oldState == FINISHED) {
			// pipelined partitions are released in Execution#cancel(), covering both job failures and vertex resets
			// do not release pipelined partitions here to save RPC calls
			oldExecution.handlePartitionCleanup(false, true);
			getExecutionGraph().getPartitionReleaseStrategy().vertexUnfinished(executionVertexId);
		}

		priorExecutions.add(oldExecution.archive());

		final Execution newExecution = new Execution(
			getExecutionGraph().getFutureExecutor(),
			this,
			oldExecution.getAttemptNumber() + 1,
			originatingGlobalModVersion,
			timestamp,
			timeout);

		currentExecution = newExecution;

		synchronized (inputSplits) {
			InputSplitAssigner assigner = jobVertex.getSplitAssigner();
			if (assigner != null) {
				assigner.returnInputSplit(inputSplits, getParallelSubtaskIndex());
				inputSplits.clear();
			}
		}

		CoLocationGroup grp = jobVertex.getCoLocationGroup();
		if (grp != null) {
			locationConstraint = grp.getLocationConstraint(subTaskIndex);
		}

		// register this execution at the execution graph, to receive call backs
		getExecutionGraph().registerExecution(newExecution);

		// if the execution was 'FINISHED' before, tell the ExecutionGraph that
		// we take one step back on the road to reaching global FINISHED
		if (oldState == FINISHED) {
			getExecutionGraph().vertexUnFinished();
		}

		// reset the intermediate results
		for (IntermediateResultPartition resultPartition : resultPartitions.values()) {
			resultPartition.resetForNewExecution();
		}

		return newExecution;
	}
	else {
		throw new IllegalStateException("Cannot reset a vertex that is in non-terminal state " + oldState);
	}
}
 
Example 17
Source File: Execution.java    From flink with Apache License 2.0 4 votes vote down vote up
private boolean transitionState(ExecutionState currentState, ExecutionState targetState, Throwable error) {
	// sanity check
	if (currentState.isTerminal()) {
		throw new IllegalStateException("Cannot leave terminal state " + currentState + " to transition to " + targetState + '.');
	}

	if (state == currentState) {
		state = targetState;
		markTimestamp(targetState);

		if (error == null) {
			LOG.info("{} ({}) switched from {} to {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState);
		} else {
			if (LOG.isInfoEnabled()) {
				final String locationInformation = getAssignedResource() != null ? getAssignedResource().toString() : "not deployed";

				LOG.info(
					"{} ({}) switched from {} to {} on {}.",
					getVertex().getTaskNameWithSubtaskIndex(),
					getAttemptId(),
					currentState,
					targetState,
					locationInformation,
					error);
			}
		}

		if (targetState.isTerminal()) {
			// complete the terminal state future
			terminalStateFuture.complete(targetState);
		}

		// make sure that the state transition completes normally.
		// potential errors (in listeners may not affect the main logic)
		try {
			vertex.notifyStateTransition(this, targetState, error);
		}
		catch (Throwable t) {
			LOG.error("Error while notifying execution graph of execution state transition.", t);
		}
		return true;
	} else {
		return false;
	}
}