Java Code Examples for org.apache.flink.runtime.execution.ExecutionState#CANCELING

The following examples show how to use org.apache.flink.runtime.execution.ExecutionState#CANCELING . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ExecutionVertexInputConstraintTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void waitUntilJobRestarted(ExecutionGraph eg) throws Exception {
	waitForAllExecutionsPredicate(eg,
		isInExecutionState(ExecutionState.CANCELING)
			.or(isInExecutionState(ExecutionState.CANCELED))
			.or(isInExecutionState(ExecutionState.FAILED))
			.or(isInExecutionState(ExecutionState.FINISHED)),
		2000L);

	for (ExecutionVertex ev : eg.getAllExecutionVertices()) {
		if (ev.getCurrentExecutionAttempt().getState() == ExecutionState.CANCELING) {
			ev.getCurrentExecutionAttempt().completeCancelling();
		}
	}

	waitUntilJobStatus(eg, JobStatus.RUNNING, 2000L);
}
 
Example 2
Source File: ExecutionVertexInputConstraintTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private void waitUntilJobRestarted(ExecutionGraph eg) throws Exception {
	waitForAllExecutionsPredicate(eg,
		isInExecutionState(ExecutionState.CANCELING)
			.or(isInExecutionState(ExecutionState.CANCELED))
			.or(isInExecutionState(ExecutionState.FAILED))
			.or(isInExecutionState(ExecutionState.FINISHED)),
		2000L);

	for (ExecutionVertex ev : eg.getAllExecutionVertices()) {
		if (ev.getCurrentExecutionAttempt().getState() == ExecutionState.CANCELING) {
			ev.getCurrentExecutionAttempt().completeCancelling();
		}
	}

	waitUntilJobStatus(eg, JobStatus.RUNNING, 2000L);
}
 
Example 3
Source File: DefaultScheduler.java    From flink with Apache License 2.0 6 votes vote down vote up
private void notifyCoordinatorOfCancellation(ExecutionVertex vertex) {
	// this method makes a best effort to filter out duplicate notifications, meaning cases where
	// the coordinator was already notified for that specific task
	// we don't notify if the task is already FAILED, CANCELLING, or CANCELED

	final ExecutionState currentState = vertex.getExecutionState();
	if (currentState == ExecutionState.FAILED ||
			currentState == ExecutionState.CANCELING ||
			currentState == ExecutionState.CANCELED) {
		return;
	}

	for (OperatorCoordinator coordinator : vertex.getJobVertex().getOperatorCoordinators()) {
		coordinator.subtaskFailed(vertex.getParallelSubtaskIndex(), null);
	}
}
 
Example 4
Source File: ExecutionVertexInputConstraintTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void waitUntilJobRestarted(ExecutionGraph eg) throws Exception {
	waitForAllExecutionsPredicate(eg,
		isInExecutionState(ExecutionState.CANCELING)
			.or(isInExecutionState(ExecutionState.CANCELED))
			.or(isInExecutionState(ExecutionState.FAILED))
			.or(isInExecutionState(ExecutionState.FINISHED)),
		2000L);

	for (ExecutionVertex ev : eg.getAllExecutionVertices()) {
		if (ev.getCurrentExecutionAttempt().getState() == ExecutionState.CANCELING) {
			ev.getCurrentExecutionAttempt().completeCancelling();
		}
	}

	waitUntilJobStatus(eg, JobStatus.RUNNING, 2000L);
}
 
Example 5
Source File: RemoteChannelStateChecker.java    From flink with Apache License 2.0 5 votes vote down vote up
private void abortConsumptionOrIgnoreCheckResult(ResponseHandle responseHandle) {
	ExecutionState producerState = getProducerState(responseHandle);
	if (producerState == ExecutionState.CANCELING ||
		producerState == ExecutionState.CANCELED ||
		producerState == ExecutionState.FAILED) {

		// The producing execution has been canceled or failed. We
		// don't need to re-trigger the request since it cannot
		// succeed.
		if (LOG.isDebugEnabled()) {
			LOG.debug("Cancelling task {} after the producer of partition {} with attempt ID {} has entered state {}.",
				taskNameWithSubtask,
				resultPartitionId.getPartitionId(),
				resultPartitionId.getProducerId(),
				producerState);
		}

		responseHandle.cancelConsumption();
	} else {
		// Any other execution state is unexpected. Currently, only
		// state CREATED is left out of the checked states. If we
		// see a producer in this state, something went wrong with
		// scheduling in topological order.
		final String msg = String.format("Producer with attempt ID %s of partition %s in unexpected state %s.",
			resultPartitionId.getProducerId(),
			resultPartitionId.getPartitionId(),
			producerState);

		responseHandle.failConsumption(new IllegalStateException(msg));
	}
}
 
Example 6
Source File: RemoteChannelStateChecker.java    From flink with Apache License 2.0 5 votes vote down vote up
private void abortConsumptionOrIgnoreCheckResult(ResponseHandle responseHandle) {
	ExecutionState producerState = getProducerState(responseHandle);
	if (producerState == ExecutionState.CANCELING ||
		producerState == ExecutionState.CANCELED ||
		producerState == ExecutionState.FAILED) {

		// The producing execution has been canceled or failed. We
		// don't need to re-trigger the request since it cannot
		// succeed.
		if (LOG.isDebugEnabled()) {
			LOG.debug("Cancelling task {} after the producer of partition {} with attempt ID {} has entered state {}.",
				taskNameWithSubtask,
				resultPartitionId.getPartitionId(),
				resultPartitionId.getProducerId(),
				producerState);
		}

		responseHandle.cancelConsumption();
	} else {
		// Any other execution state is unexpected. Currently, only
		// state CREATED is left out of the checked states. If we
		// see a producer in this state, something went wrong with
		// scheduling in topological order.
		final String msg = String.format("Producer with attempt ID %s of partition %s in unexpected state %s.",
			resultPartitionId.getProducerId(),
			resultPartitionId.getPartitionId(),
			producerState);

		responseHandle.failConsumption(new IllegalStateException(msg));
	}
}
 
Example 7
Source File: ExecutionJobVertex.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * A utility function that computes an "aggregated" state for the vertex.
 *
 * <p>This state is not used anywhere in the  coordination, but can be used for display
 * in dashboards to as a summary for how the particular parallel operation represented by
 * this ExecutionJobVertex is currently behaving.
 *
 * <p>For example, if at least one parallel task is failed, the aggregate state is failed.
 * If not, and at least one parallel task is cancelling (or cancelled), the aggregate state
 * is cancelling (or cancelled). If all tasks are finished, the aggregate state is finished,
 * and so on.
 *
 * @param verticesPerState The number of vertices in each state (indexed by the ordinal of
 *                         the ExecutionState values).
 * @param parallelism The parallelism of the ExecutionJobVertex
 *
 * @return The aggregate state of this ExecutionJobVertex.
 */
public static ExecutionState getAggregateJobVertexState(int[] verticesPerState, int parallelism) {
	if (verticesPerState == null || verticesPerState.length != ExecutionState.values().length) {
		throw new IllegalArgumentException("Must provide an array as large as there are execution states.");
	}

	if (verticesPerState[ExecutionState.FAILED.ordinal()] > 0) {
		return ExecutionState.FAILED;
	}
	if (verticesPerState[ExecutionState.CANCELING.ordinal()] > 0) {
		return ExecutionState.CANCELING;
	}
	else if (verticesPerState[ExecutionState.CANCELED.ordinal()] > 0) {
		return ExecutionState.CANCELED;
	}
	else if (verticesPerState[ExecutionState.RUNNING.ordinal()] > 0) {
		return ExecutionState.RUNNING;
	}
	else if (verticesPerState[ExecutionState.FINISHED.ordinal()] > 0) {
		return verticesPerState[ExecutionState.FINISHED.ordinal()] == parallelism ?
				ExecutionState.FINISHED : ExecutionState.RUNNING;
	}
	else {
		// all else collapses under created
		return ExecutionState.CREATED;
	}
}
 
Example 8
Source File: InterruptSensitiveRestoreTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void testRestoreWithInterrupt(int mode) throws Exception {

		IN_RESTORE_LATCH.reset();
		Configuration taskConfig = new Configuration();
		StreamConfig cfg = new StreamConfig(taskConfig);
		cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
		switch (mode) {
			case OPERATOR_MANAGED:
			case OPERATOR_RAW:
			case KEYED_MANAGED:
			case KEYED_RAW:
				cfg.setStateKeySerializer(IntSerializer.INSTANCE);
				cfg.setStreamOperator(new StreamSource<>(new TestSource(mode)));
				break;
			default:
				throw new IllegalArgumentException();
		}

		StreamStateHandle lockingHandle = new InterruptLockingStateHandle();

		Task task = createTask(cfg, taskConfig, lockingHandle, mode);

		// start the task and wait until it is in "restore"
		task.startTaskThread();
		IN_RESTORE_LATCH.await();

		// trigger cancellation and signal to continue
		task.cancelExecution();

		task.getExecutingThread().join(30000);

		if (task.getExecutionState() == ExecutionState.CANCELING) {
			fail("Task is stuck and not canceling");
		}

		assertEquals(ExecutionState.CANCELED, task.getExecutionState());
		assertNull(task.getFailureCause());
	}
 
Example 9
Source File: ExecutionJobVertex.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * A utility function that computes an "aggregated" state for the vertex.
 *
 * <p>This state is not used anywhere in the  coordination, but can be used for display
 * in dashboards to as a summary for how the particular parallel operation represented by
 * this ExecutionJobVertex is currently behaving.
 *
 * <p>For example, if at least one parallel task is failed, the aggregate state is failed.
 * If not, and at least one parallel task is cancelling (or cancelled), the aggregate state
 * is cancelling (or cancelled). If all tasks are finished, the aggregate state is finished,
 * and so on.
 *
 * @param verticesPerState The number of vertices in each state (indexed by the ordinal of
 *                         the ExecutionState values).
 * @param parallelism The parallelism of the ExecutionJobVertex
 *
 * @return The aggregate state of this ExecutionJobVertex.
 */
public static ExecutionState getAggregateJobVertexState(int[] verticesPerState, int parallelism) {
	if (verticesPerState == null || verticesPerState.length != ExecutionState.values().length) {
		throw new IllegalArgumentException("Must provide an array as large as there are execution states.");
	}

	if (verticesPerState[ExecutionState.FAILED.ordinal()] > 0) {
		return ExecutionState.FAILED;
	}
	if (verticesPerState[ExecutionState.CANCELING.ordinal()] > 0) {
		return ExecutionState.CANCELING;
	}
	else if (verticesPerState[ExecutionState.CANCELED.ordinal()] > 0) {
		return ExecutionState.CANCELED;
	}
	else if (verticesPerState[ExecutionState.RUNNING.ordinal()] > 0) {
		return ExecutionState.RUNNING;
	}
	else if (verticesPerState[ExecutionState.FINISHED.ordinal()] > 0) {
		return verticesPerState[ExecutionState.FINISHED.ordinal()] == parallelism ?
				ExecutionState.FINISHED : ExecutionState.RUNNING;
	}
	else {
		// all else collapses under created
		return ExecutionState.CREATED;
	}
}
 
Example 10
Source File: InterruptSensitiveRestoreTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void testRestoreWithInterrupt(int mode) throws Exception {

		IN_RESTORE_LATCH.reset();
		Configuration taskConfig = new Configuration();
		StreamConfig cfg = new StreamConfig(taskConfig);
		cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
		switch (mode) {
			case OPERATOR_MANAGED:
			case OPERATOR_RAW:
			case KEYED_MANAGED:
			case KEYED_RAW:
				cfg.setStateKeySerializer(IntSerializer.INSTANCE);
				cfg.setStreamOperator(new StreamSource<>(new TestSource(mode)));
				break;
			default:
				throw new IllegalArgumentException();
		}

		StreamStateHandle lockingHandle = new InterruptLockingStateHandle();

		Task task = createTask(cfg, taskConfig, lockingHandle, mode);

		// start the task and wait until it is in "restore"
		task.startTaskThread();
		IN_RESTORE_LATCH.await();

		// trigger cancellation and signal to continue
		task.cancelExecution();

		task.getExecutingThread().join(30000);

		if (task.getExecutionState() == ExecutionState.CANCELING) {
			fail("Task is stuck and not canceling");
		}

		assertEquals(ExecutionState.CANCELED, task.getExecutionState());
		assertNull(task.getFailureCause());
	}
 
Example 11
Source File: InterruptSensitiveRestoreTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void testRestoreWithInterrupt(int mode) throws Exception {

		IN_RESTORE_LATCH.reset();
		Configuration taskConfig = new Configuration();
		StreamConfig cfg = new StreamConfig(taskConfig);
		cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
		switch (mode) {
			case OPERATOR_MANAGED:
			case OPERATOR_RAW:
			case KEYED_MANAGED:
			case KEYED_RAW:
				cfg.setStateKeySerializer(IntSerializer.INSTANCE);
				cfg.setStreamOperator(new StreamSource<>(new TestSource(mode)));
				break;
			default:
				throw new IllegalArgumentException();
		}

		StreamStateHandle lockingHandle = new InterruptLockingStateHandle();

		Task task = createTask(cfg, taskConfig, lockingHandle, mode);

		// start the task and wait until it is in "restore"
		task.startTaskThread();
		IN_RESTORE_LATCH.await();

		// trigger cancellation and signal to continue
		task.cancelExecution();

		task.getExecutingThread().join(30000);

		if (task.getExecutionState() == ExecutionState.CANCELING) {
			fail("Task is stuck and not canceling");
		}

		assertEquals(ExecutionState.CANCELED, task.getExecutionState());
		assertNull(task.getFailureCause());
	}
 
Example 12
Source File: ExecutionJobVertex.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * A utility function that computes an "aggregated" state for the vertex.
 *
 * <p>This state is not used anywhere in the  coordination, but can be used for display
 * in dashboards to as a summary for how the particular parallel operation represented by
 * this ExecutionJobVertex is currently behaving.
 *
 * <p>For example, if at least one parallel task is failed, the aggregate state is failed.
 * If not, and at least one parallel task is cancelling (or cancelled), the aggregate state
 * is cancelling (or cancelled). If all tasks are finished, the aggregate state is finished,
 * and so on.
 *
 * @param verticesPerState The number of vertices in each state (indexed by the ordinal of
 *                         the ExecutionState values).
 * @param parallelism The parallelism of the ExecutionJobVertex
 *
 * @return The aggregate state of this ExecutionJobVertex.
 */
public static ExecutionState getAggregateJobVertexState(int[] verticesPerState, int parallelism) {
	if (verticesPerState == null || verticesPerState.length != ExecutionState.values().length) {
		throw new IllegalArgumentException("Must provide an array as large as there are execution states.");
	}

	if (verticesPerState[ExecutionState.FAILED.ordinal()] > 0) {
		return ExecutionState.FAILED;
	}
	if (verticesPerState[ExecutionState.CANCELING.ordinal()] > 0) {
		return ExecutionState.CANCELING;
	}
	else if (verticesPerState[ExecutionState.CANCELED.ordinal()] > 0) {
		return ExecutionState.CANCELED;
	}
	else if (verticesPerState[ExecutionState.RUNNING.ordinal()] > 0) {
		return ExecutionState.RUNNING;
	}
	else if (verticesPerState[ExecutionState.FINISHED.ordinal()] > 0) {
		return verticesPerState[ExecutionState.FINISHED.ordinal()] == parallelism ?
				ExecutionState.FINISHED : ExecutionState.RUNNING;
	}
	else {
		// all else collapses under created
		return ExecutionState.CREATED;
	}
}
 
Example 13
Source File: Task.java    From flink with Apache License 2.0 4 votes vote down vote up
private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) {
	while (true) {
		ExecutionState current = executionState;

		// if the task is already canceled (or canceling) or finished or failed,
		// then we need not do anything
		if (current.isTerminal() || current == ExecutionState.CANCELING) {
			LOG.info("Task {} is already in state {}", taskNameWithSubtask, current);
			return;
		}

		if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) {
			if (transitionState(current, targetState, cause)) {
				// if we manage this state transition, then the invokable gets never called
				// we need not call cancel on it
				this.failureCause = cause;
				return;
			}
		}
		else if (current == ExecutionState.RUNNING) {
			if (transitionState(ExecutionState.RUNNING, targetState, cause)) {
				// we are canceling / failing out of the running state
				// we need to cancel the invokable

				// copy reference to guard against concurrent null-ing out the reference
				final AbstractInvokable invokable = this.invokable;

				if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) {
					this.failureCause = cause;

					LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId);

					// because the canceling may block on user code, we cancel from a separate thread
					// we do not reuse the async call handler, because that one may be blocked, in which
					// case the canceling could not continue

					// The canceller calls cancel and interrupts the executing thread once
					Runnable canceler = new TaskCanceler(LOG, this :: closeNetworkResources, invokable, executingThread, taskNameWithSubtask);

					Thread cancelThread = new Thread(
							executingThread.getThreadGroup(),
							canceler,
							String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId));
					cancelThread.setDaemon(true);
					cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
					cancelThread.start();

					// the periodic interrupting thread - a different thread than the canceller, in case
					// the application code does blocking stuff in its cancellation paths.
					if (invokable.shouldInterruptOnCancel()) {
						Runnable interrupter = new TaskInterrupter(
								LOG,
								invokable,
								executingThread,
								taskNameWithSubtask,
								taskCancellationInterval);

						Thread interruptingThread = new Thread(
								executingThread.getThreadGroup(),
								interrupter,
								String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId));
						interruptingThread.setDaemon(true);
						interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						interruptingThread.start();
					}

					// if a cancellation timeout is set, the watchdog thread kills the process
					// if graceful cancellation does not succeed
					if (taskCancellationTimeout > 0) {
						Runnable cancelWatchdog = new TaskCancelerWatchDog(
								executingThread,
								taskManagerActions,
								taskCancellationTimeout,
								LOG);

						Thread watchDogThread = new Thread(
								executingThread.getThreadGroup(),
								cancelWatchdog,
								String.format("Cancellation Watchdog for %s (%s).",
										taskNameWithSubtask, executionId));
						watchDogThread.setDaemon(true);
						watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						watchDogThread.start();
					}
				}
				return;
			}
		}
		else {
			throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).",
				current, taskNameWithSubtask, executionId));
		}
	}
}
 
Example 14
Source File: Task.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Checks whether the task has failed, is canceled, or is being canceled at the moment.
 * @return True is the task in state FAILED, CANCELING, or CANCELED, false otherwise.
 */
public boolean isCanceledOrFailed() {
	return executionState == ExecutionState.CANCELING ||
			executionState == ExecutionState.CANCELED ||
			executionState == ExecutionState.FAILED;
}
 
Example 15
Source File: TaskDeploymentDescriptorFactory.java    From flink with Apache License 2.0 4 votes vote down vote up
private static boolean isProducerFailedOrCanceled(ExecutionState producerState) {
	return producerState == ExecutionState.CANCELING ||
		producerState == ExecutionState.CANCELED ||
		producerState == ExecutionState.FAILED;
}
 
Example 16
Source File: TaskDeploymentDescriptorFactory.java    From flink with Apache License 2.0 4 votes vote down vote up
private static boolean isProducerFailedOrCanceled(ExecutionState producerState) {
	return producerState == ExecutionState.CANCELING ||
		producerState == ExecutionState.CANCELED ||
		producerState == ExecutionState.FAILED;
}
 
Example 17
Source File: Task.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Checks whether the task has failed, is canceled, or is being canceled at the moment.
 * @return True is the task in state FAILED, CANCELING, or CANCELED, false otherwise.
 */
public boolean isCanceledOrFailed() {
	return executionState == ExecutionState.CANCELING ||
			executionState == ExecutionState.CANCELED ||
			executionState == ExecutionState.FAILED;
}
 
Example 18
Source File: Task.java    From flink with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
void cancelOrFailAndCancelInvokableInternal(ExecutionState targetState, Throwable cause) {
	while (true) {
		ExecutionState current = executionState;

		// if the task is already canceled (or canceling) or finished or failed,
		// then we need not do anything
		if (current.isTerminal() || current == ExecutionState.CANCELING) {
			LOG.info("Task {} is already in state {}", taskNameWithSubtask, current);
			return;
		}

		if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) {
			if (transitionState(current, targetState, cause)) {
				// if we manage this state transition, then the invokable gets never called
				// we need not call cancel on it
				this.failureCause = cause;
				return;
			}
		}
		else if (current == ExecutionState.RUNNING) {
			if (transitionState(ExecutionState.RUNNING, targetState, cause)) {
				// we are canceling / failing out of the running state
				// we need to cancel the invokable

				// copy reference to guard against concurrent null-ing out the reference
				final AbstractInvokable invokable = this.invokable;

				if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) {
					this.failureCause = cause;

					LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId);

					// because the canceling may block on user code, we cancel from a separate thread
					// we do not reuse the async call handler, because that one may be blocked, in which
					// case the canceling could not continue

					// The canceller calls cancel and interrupts the executing thread once
					Runnable canceler = new TaskCanceler(LOG, this::closeNetworkResources, invokable, executingThread, taskNameWithSubtask);

					Thread cancelThread = new Thread(
							executingThread.getThreadGroup(),
							canceler,
							String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId));
					cancelThread.setDaemon(true);
					cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
					cancelThread.start();

					// the periodic interrupting thread - a different thread than the canceller, in case
					// the application code does blocking stuff in its cancellation paths.
					if (invokable.shouldInterruptOnCancel()) {
						Runnable interrupter = new TaskInterrupter(
								LOG,
								invokable,
								executingThread,
								taskNameWithSubtask,
								taskCancellationInterval);

						Thread interruptingThread = new Thread(
								executingThread.getThreadGroup(),
								interrupter,
								String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId));
						interruptingThread.setDaemon(true);
						interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						interruptingThread.start();
					}

					// if a cancellation timeout is set, the watchdog thread kills the process
					// if graceful cancellation does not succeed
					if (taskCancellationTimeout > 0) {
						Runnable cancelWatchdog = new TaskCancelerWatchDog(
								executingThread,
								taskManagerActions,
								taskCancellationTimeout);

						Thread watchDogThread = new Thread(
								executingThread.getThreadGroup(),
								cancelWatchdog,
								String.format("Cancellation Watchdog for %s (%s).",
										taskNameWithSubtask, executionId));
						watchDogThread.setDaemon(true);
						watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						watchDogThread.start();
					}
				}
				return;
			}
		}
		else {
			throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).",
				current, taskNameWithSubtask, executionId));
		}
	}
}
 
Example 19
Source File: Task.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Answer to a partition state check issued after a failed partition request.
 */
@VisibleForTesting
void onPartitionStateUpdate(
		IntermediateDataSetID intermediateDataSetId,
		ResultPartitionID resultPartitionId,
		ExecutionState producerState) throws IOException, InterruptedException {

	if (executionState == ExecutionState.RUNNING) {
		final SingleInputGate inputGate = inputGatesById.get(intermediateDataSetId);

		if (inputGate != null) {
			if (producerState == ExecutionState.SCHEDULED
				|| producerState == ExecutionState.DEPLOYING
				|| producerState == ExecutionState.RUNNING
				|| producerState == ExecutionState.FINISHED) {

				// Retrigger the partition request
				inputGate.retriggerPartitionRequest(resultPartitionId.getPartitionId());

			} else if (producerState == ExecutionState.CANCELING
				|| producerState == ExecutionState.CANCELED
				|| producerState == ExecutionState.FAILED) {

				// The producing execution has been canceled or failed. We
				// don't need to re-trigger the request since it cannot
				// succeed.
				if (LOG.isDebugEnabled()) {
					LOG.debug("Cancelling task {} after the producer of partition {} with attempt ID {} has entered state {}.",
						taskNameWithSubtask,
						resultPartitionId.getPartitionId(),
						resultPartitionId.getProducerId(),
						producerState);
				}

				cancelExecution();
			} else {
				// Any other execution state is unexpected. Currently, only
				// state CREATED is left out of the checked states. If we
				// see a producer in this state, something went wrong with
				// scheduling in topological order.
				String msg = String.format("Producer with attempt ID %s of partition %s in unexpected state %s.",
					resultPartitionId.getProducerId(),
					resultPartitionId.getPartitionId(),
					producerState);

				failExternally(new IllegalStateException(msg));
			}
		} else {
			failExternally(new IllegalStateException("Received partition producer state for " +
					"unknown input gate " + intermediateDataSetId + "."));
		}
	} else {
		LOG.debug("Task {} ignored a partition producer state notification, because it's not running.", taskNameWithSubtask);
	}
}
 
Example 20
Source File: Task.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) {
	while (true) {
		ExecutionState current = executionState;

		// if the task is already canceled (or canceling) or finished or failed,
		// then we need not do anything
		if (current.isTerminal() || current == ExecutionState.CANCELING) {
			LOG.info("Task {} is already in state {}", taskNameWithSubtask, current);
			return;
		}

		if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) {
			if (transitionState(current, targetState, cause)) {
				// if we manage this state transition, then the invokable gets never called
				// we need not call cancel on it
				this.failureCause = cause;
				return;
			}
		}
		else if (current == ExecutionState.RUNNING) {
			if (transitionState(ExecutionState.RUNNING, targetState, cause)) {
				// we are canceling / failing out of the running state
				// we need to cancel the invokable

				// copy reference to guard against concurrent null-ing out the reference
				final AbstractInvokable invokable = this.invokable;

				if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) {
					this.failureCause = cause;

					LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId);

					// because the canceling may block on user code, we cancel from a separate thread
					// we do not reuse the async call handler, because that one may be blocked, in which
					// case the canceling could not continue

					// The canceller calls cancel and interrupts the executing thread once
					Runnable canceler = new TaskCanceler(
							LOG,
							invokable,
							executingThread,
							taskNameWithSubtask,
							producedPartitions,
							inputGates);

					Thread cancelThread = new Thread(
							executingThread.getThreadGroup(),
							canceler,
							String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId));
					cancelThread.setDaemon(true);
					cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
					cancelThread.start();

					// the periodic interrupting thread - a different thread than the canceller, in case
					// the application code does blocking stuff in its cancellation paths.
					if (invokable.shouldInterruptOnCancel()) {
						Runnable interrupter = new TaskInterrupter(
								LOG,
								invokable,
								executingThread,
								taskNameWithSubtask,
								taskCancellationInterval);

						Thread interruptingThread = new Thread(
								executingThread.getThreadGroup(),
								interrupter,
								String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId));
						interruptingThread.setDaemon(true);
						interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						interruptingThread.start();
					}

					// if a cancellation timeout is set, the watchdog thread kills the process
					// if graceful cancellation does not succeed
					if (taskCancellationTimeout > 0) {
						Runnable cancelWatchdog = new TaskCancelerWatchDog(
								executingThread,
								taskManagerActions,
								taskCancellationTimeout,
								LOG);

						Thread watchDogThread = new Thread(
								executingThread.getThreadGroup(),
								cancelWatchdog,
								String.format("Cancellation Watchdog for %s (%s).",
										taskNameWithSubtask, executionId));
						watchDogThread.setDaemon(true);
						watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						watchDogThread.start();
					}
				}
				return;
			}
		}
		else {
			throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).",
				current, taskNameWithSubtask, executionId));
		}
	}
}