Java Code Examples for org.apache.flink.runtime.concurrent.FutureUtils#assertNoException()

The following examples show how to use org.apache.flink.runtime.concurrent.FutureUtils#assertNoException() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Task.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void requestPartitionProducerState(
		final IntermediateDataSetID intermediateDataSetId,
		final ResultPartitionID resultPartitionId,
		Consumer<? super ResponseHandle> responseConsumer) {

	final CompletableFuture<ExecutionState> futurePartitionState =
		partitionProducerStateChecker.requestPartitionProducerState(
			jobId,
			intermediateDataSetId,
			resultPartitionId);

	FutureUtils.assertNoException(
		futurePartitionState
			.handle(PartitionProducerStateResponseHandle::new)
			.thenAcceptAsync(responseConsumer, executor));
}
 
Example 2
Source File: Task.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void requestPartitionProducerState(
		final IntermediateDataSetID intermediateDataSetId,
		final ResultPartitionID resultPartitionId,
		Consumer<? super ResponseHandle> responseConsumer) {

	final CompletableFuture<ExecutionState> futurePartitionState =
		partitionProducerStateChecker.requestPartitionProducerState(
			jobId,
			intermediateDataSetId,
			resultPartitionId);

	FutureUtils.assertNoException(
		futurePartitionState
			.handle(PartitionProducerStateResponseHandle::new)
			.thenAcceptAsync(responseConsumer, executor));
}
 
Example 3
Source File: MiniCluster.java    From flink with Apache License 2.0 6 votes vote down vote up
@GuardedBy("lock")
private void setupDispatcherResourceManagerComponents(Configuration configuration, RpcServiceFactory dispatcherResourceManagreComponentRpcServiceFactory, MetricQueryServiceRetriever metricQueryServiceRetriever) throws Exception {
	dispatcherResourceManagerComponents.addAll(createDispatcherResourceManagerComponents(
		configuration,
		dispatcherResourceManagreComponentRpcServiceFactory,
		haServices,
		blobServer,
		heartbeatServices,
		metricRegistry,
		metricQueryServiceRetriever,
		new ShutDownFatalErrorHandler()
	));

	final Collection<CompletableFuture<ApplicationStatus>> shutDownFutures = new ArrayList<>(dispatcherResourceManagerComponents.size());

	for (DispatcherResourceManagerComponent dispatcherResourceManagerComponent : dispatcherResourceManagerComponents) {
		final CompletableFuture<ApplicationStatus> shutDownFuture = dispatcherResourceManagerComponent.getShutDownFuture();
		FutureUtils.assertNoException(shutDownFuture.thenRun(dispatcherResourceManagerComponent::closeAsync));
		shutDownFutures.add(shutDownFuture);
	}

	FutureUtils.assertNoException(FutureUtils.completeAll(shutDownFutures).thenRun(this::closeAsync));
}
 
Example 4
Source File: ExecutionGraph.java    From flink with Apache License 2.0 5 votes vote down vote up
public void failJob(Throwable cause) {
	if (state == JobStatus.FAILING || state.isGloballyTerminalState()) {
		return;
	}

	transitionState(JobStatus.FAILING, cause);
	initFailureCause(cause);

	FutureUtils.assertNoException(
		cancelVerticesAsync().whenComplete((aVoid, throwable) -> {
			transitionState(JobStatus.FAILED, cause);
			onTerminalState(JobStatus.FAILED);
		}));
}
 
Example 5
Source File: Dispatcher.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private JobManagerRunner startJobManagerRunner(JobManagerRunner jobManagerRunner) throws Exception {
	final JobID jobId = jobManagerRunner.getJobGraph().getJobID();

	FutureUtils.assertNoException(
		jobManagerRunner.getResultFuture().handleAsync(
			(ArchivedExecutionGraph archivedExecutionGraph, Throwable throwable) -> {
				// check if we are still the active JobManagerRunner by checking the identity
				final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = jobManagerRunnerFutures.get(jobId);
				final JobManagerRunner currentJobManagerRunner = jobManagerRunnerFuture != null ? jobManagerRunnerFuture.getNow(null) : null;
				//noinspection ObjectEquality
				if (jobManagerRunner == currentJobManagerRunner) {
					if (archivedExecutionGraph != null) {
						jobReachedGloballyTerminalState(archivedExecutionGraph);
					} else {
						final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);

						if (strippedThrowable instanceof JobNotFinishedException) {
							jobNotFinished(jobId);
						} else {
							jobMasterFailed(jobId, strippedThrowable);
						}
					}
				} else {
					log.debug("There is a newer JobManagerRunner for the job {}.", jobId);
				}

				return null;
			}, getMainThreadExecutor()));

	jobManagerRunner.start();

	return jobManagerRunner;
}
 
Example 6
Source File: DefaultScheduler.java    From flink with Apache License 2.0 5 votes vote down vote up
private BiFunction<Void, Throwable, Void> deployAll(final List<DeploymentHandle> deploymentHandles) {
	return (ignored, throwable) -> {
		propagateIfNonNull(throwable);
		for (final DeploymentHandle deploymentHandle : deploymentHandles) {
			final SlotExecutionVertexAssignment slotExecutionVertexAssignment = deploymentHandle.getSlotExecutionVertexAssignment();
			final CompletableFuture<LogicalSlot> slotAssigned = slotExecutionVertexAssignment.getLogicalSlotFuture();
			checkState(slotAssigned.isDone());

			FutureUtils.assertNoException(
				slotAssigned.handle(deployOrHandleError(deploymentHandle)));
		}
		return null;
	};
}
 
Example 7
Source File: TaskExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> updatePartitions(
		final ExecutionAttemptID executionAttemptID,
		Iterable<PartitionInfo> partitionInfos,
		Time timeout) {
	final Task task = taskSlotTable.getTask(executionAttemptID);

	if (task != null) {
		for (final PartitionInfo partitionInfo: partitionInfos) {
			// Run asynchronously because it might be blocking
			FutureUtils.assertNoException(
				CompletableFuture.runAsync(
					() -> {
						try {
							if (!shuffleEnvironment.updatePartitionInfo(executionAttemptID, partitionInfo)) {
								log.debug(
									"Discard update for input gate partition {} of result {} in task {}. " +
										"The partition is no longer available.",
									partitionInfo.getShuffleDescriptor().getResultPartitionID(),
									partitionInfo.getIntermediateDataSetID(),
									executionAttemptID);
							}
						} catch (IOException | InterruptedException e) {
							log.error(
								"Could not update input data location for task {}. Trying to fail task.",
								task.getTaskInfo().getTaskName(),
								e);
							task.failExternally(e);
						}
					},
					getRpcService().getExecutor()));
		}
		return CompletableFuture.completedFuture(Acknowledge.get());
	} else {
		log.debug("Discard update for input partitions of task {}. Task is no longer running.", executionAttemptID);
		return CompletableFuture.completedFuture(Acknowledge.get());
	}
}
 
Example 8
Source File: DefaultDispatcherRunner.java    From flink with Apache License 2.0 5 votes vote down vote up
private void forwardConfirmLeaderSessionFuture(UUID leaderSessionID, DispatcherLeaderProcess newDispatcherLeaderProcess) {
	FutureUtils.assertNoException(
		newDispatcherLeaderProcess.getLeaderAddressFuture().thenAccept(
			leaderAddress -> {
				if (leaderElectionService.hasLeadership(leaderSessionID)) {
					leaderElectionService.confirmLeadership(leaderSessionID, leaderAddress);
				}
			}));
}
 
Example 9
Source File: DefaultDispatcherRunner.java    From flink with Apache License 2.0 5 votes vote down vote up
private void startNewDispatcherLeaderProcess(UUID leaderSessionID) {
	stopDispatcherLeaderProcess();

	dispatcherLeaderProcess = createNewDispatcherLeaderProcess(leaderSessionID);

	final DispatcherLeaderProcess newDispatcherLeaderProcess = dispatcherLeaderProcess;
	FutureUtils.assertNoException(
		previousDispatcherLeaderProcessTerminationFuture.thenRun(newDispatcherLeaderProcess::start));
}
 
Example 10
Source File: Dispatcher.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobManagerRunner startJobManagerRunner(JobManagerRunner jobManagerRunner) throws Exception {
	final JobID jobId = jobManagerRunner.getJobID();

	FutureUtils.assertNoException(
		jobManagerRunner.getResultFuture().handleAsync(
			(ArchivedExecutionGraph archivedExecutionGraph, Throwable throwable) -> {
				// check if we are still the active JobManagerRunner by checking the identity
				final JobManagerRunner currentJobManagerRunner = Optional.ofNullable(jobManagerRunnerFutures.get(jobId))
					.map(future -> future.getNow(null))
					.orElse(null);
				//noinspection ObjectEquality
				if (jobManagerRunner == currentJobManagerRunner) {
					if (archivedExecutionGraph != null) {
						jobReachedGloballyTerminalState(archivedExecutionGraph);
					} else {
						final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);

						if (strippedThrowable instanceof JobNotFinishedException) {
							jobNotFinished(jobId);
						} else {
							jobMasterFailed(jobId, strippedThrowable);
						}
					}
				} else {
					log.debug("There is a newer JobManagerRunner for the job {}.", jobId);
				}

				return null;
			}, getMainThreadExecutor()));

	jobManagerRunner.start();

	return jobManagerRunner;
}
 
Example 11
Source File: AdaptedRestartPipelinedRegionStrategyNG.java    From flink with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
protected void restartTasks(final Set<ExecutionVertexID> verticesToRestart) {
	final long globalModVersion = executionGraph.getGlobalModVersion();
	final Set<ExecutionVertexVersion> vertexVersions = new HashSet<>(
		executionVertexVersioner.recordVertexModifications(verticesToRestart).values());

	FutureUtils.assertNoException(
		cancelTasks(verticesToRestart)
			.thenComposeAsync(resetAndRescheduleTasks(globalModVersion, vertexVersions), executionGraph.getJobMasterMainThreadExecutor())
			.handle(failGlobalOnError()));
}
 
Example 12
Source File: TaskExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> updatePartitions(
		final ExecutionAttemptID executionAttemptID,
		Iterable<PartitionInfo> partitionInfos,
		Time timeout) {
	final Task task = taskSlotTable.getTask(executionAttemptID);

	if (task != null) {
		for (final PartitionInfo partitionInfo: partitionInfos) {
			// Run asynchronously because it might be blocking
			FutureUtils.assertNoException(
				CompletableFuture.runAsync(
					() -> {
						try {
							if (!shuffleEnvironment.updatePartitionInfo(executionAttemptID, partitionInfo)) {
								log.debug(
									"Discard update for input gate partition {} of result {} in task {}. " +
										"The partition is no longer available.",
									partitionInfo.getShuffleDescriptor().getResultPartitionID(),
									partitionInfo.getIntermediateDataSetID(),
									executionAttemptID);
							}
						} catch (IOException | InterruptedException e) {
							log.error(
								"Could not update input data location for task {}. Trying to fail task.",
								task.getTaskInfo().getTaskName(),
								e);
							task.failExternally(e);
						}
					},
					getRpcService().getExecutor()));
		}
		return CompletableFuture.completedFuture(Acknowledge.get());
	} else {
		log.debug("Discard update for input partitions of task {}. Task is no longer running.", executionAttemptID);
		return CompletableFuture.completedFuture(Acknowledge.get());
	}
}
 
Example 13
Source File: Dispatcher.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobManagerRunner startJobManagerRunner(JobManagerRunner jobManagerRunner) throws Exception {
	final JobID jobId = jobManagerRunner.getJobGraph().getJobID();

	FutureUtils.assertNoException(
		jobManagerRunner.getResultFuture().handleAsync(
			(ArchivedExecutionGraph archivedExecutionGraph, Throwable throwable) -> {
				// check if we are still the active JobManagerRunner by checking the identity
				final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = jobManagerRunnerFutures.get(jobId);
				final JobManagerRunner currentJobManagerRunner = jobManagerRunnerFuture != null ? jobManagerRunnerFuture.getNow(null) : null;
				//noinspection ObjectEquality
				if (jobManagerRunner == currentJobManagerRunner) {
					if (archivedExecutionGraph != null) {
						jobReachedGloballyTerminalState(archivedExecutionGraph);
					} else {
						final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);

						if (strippedThrowable instanceof JobNotFinishedException) {
							jobNotFinished(jobId);
						} else {
							jobMasterFailed(jobId, strippedThrowable);
						}
					}
				} else {
					log.debug("There is a newer JobManagerRunner for the job {}.", jobId);
				}

				return null;
			}, getMainThreadExecutor()));

	jobManagerRunner.start();

	return jobManagerRunner;
}
 
Example 14
Source File: Dispatcher.java    From flink with Apache License 2.0 4 votes vote down vote up
void runRecoveredJob(final JobGraph recoveredJob) {
	checkNotNull(recoveredJob);
	FutureUtils.assertNoException(runJob(recoveredJob)
		.handle(handleRecoveredJobStartError(recoveredJob.getJobID())));
}
 
Example 15
Source File: ExecutionGraph.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Try to restart the job. If we cannot restart the job (e.g. no more restarts allowed), then
 * try to fail the job. This operation is only permitted if the current state is FAILING or
 * RESTARTING.
 *
 * @return true if the operation could be executed; false if a concurrent job status change occurred
 */
private boolean tryRestartOrFail(long globalModVersionForRestart) {
	JobStatus currentState = state;

	if (currentState == JobStatus.FAILING || currentState == JobStatus.RESTARTING) {
		final Throwable failureCause = this.failureCause;

		synchronized (progressLock) {
			if (LOG.isDebugEnabled()) {
				LOG.debug("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID(), failureCause);
			} else {
				LOG.info("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID());
			}

			final boolean isFailureCauseAllowingRestart = !(failureCause instanceof SuppressRestartsException);
			final boolean isRestartStrategyAllowingRestart = restartStrategy.canRestart();
			boolean isRestartable = isFailureCauseAllowingRestart && isRestartStrategyAllowingRestart;

			if (isRestartable && transitionState(currentState, JobStatus.RESTARTING)) {
				LOG.info("Restarting the job {} ({}).", getJobName(), getJobID());

				RestartCallback restarter = new ExecutionGraphRestartCallback(this, globalModVersionForRestart);
				FutureUtils.assertNoException(
					restartStrategy
						.restart(restarter, getJobMasterMainThreadExecutor())
						.exceptionally((throwable) -> {
							failGlobal(throwable);
							return null;
						}));
				return true;
			}
			else if (!isRestartable && transitionState(currentState, JobStatus.FAILED, failureCause)) {
				final String cause1 = isFailureCauseAllowingRestart ? null :
					"a type of SuppressRestartsException was thrown";
				final String cause2 = isRestartStrategyAllowingRestart ? null :
					"the restart strategy prevented it";

				LOG.info("Could not restart the job {} ({}) because {}.", getJobName(), getJobID(),
					StringUtils.concatenateWithAnd(cause1, cause2), failureCause);
				onTerminalState(JobStatus.FAILED);

				return true;
			} else {
				// we must have changed the state concurrently, thus we cannot complete this operation
				return false;
			}
		}
	} else {
		// this operation is only allowed in the state FAILING or RESTARTING
		return false;
	}
}
 
Example 16
Source File: ExecutionGraph.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Fails the execution graph globally. This failure will not be recovered by a specific
 * failover strategy, but results in a full restart of all tasks.
 *
 * <p>This global failure is meant to be triggered in cases where the consistency of the
 * execution graph' state cannot be guaranteed any more (for example when catching unexpected
 * exceptions that indicate a bug or an unexpected call race), and where a full restart is the
 * safe way to get consistency back.
 *
 * @param t The exception that caused the failure.
 */
public void failGlobal(Throwable t) {

	assertRunningInJobMasterMainThread();

	while (true) {
		JobStatus current = state;
		// stay in these states
		if (current == JobStatus.FAILING ||
			current == JobStatus.SUSPENDED ||
			current.isGloballyTerminalState()) {
			return;
		} else if (transitionState(current, JobStatus.FAILING, t)) {
			initFailureCause(t);

			// make sure no concurrent local or global actions interfere with the failover
			final long globalVersionForRestart = incrementGlobalModVersion();

			final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;

			// cancel ongoing scheduling action
			if (ongoingSchedulingFuture != null) {
				ongoingSchedulingFuture.cancel(false);
			}

			// we build a future that is complete once all vertices have reached a terminal state
			final ConjunctFuture<Void> allTerminal = cancelVerticesAsync();
			FutureUtils.assertNoException(allTerminal.handle(
				(Void ignored, Throwable throwable) -> {
					if (throwable != null) {
						transitionState(
							JobStatus.FAILING,
							JobStatus.FAILED,
							new FlinkException("Could not cancel all execution job vertices properly.", throwable));
					} else {
						allVerticesInTerminalState(globalVersionForRestart);
					}
					return null;
				}));

			return;
		}

		// else: concurrent change to execution state, retry
	}
}
 
Example 17
Source File: DefaultScheduler.java    From flink with Apache License 2.0 4 votes vote down vote up
private void waitForAllSlotsAndDeploy(final List<DeploymentHandle> deploymentHandles) {
	FutureUtils.assertNoException(
		assignAllResources(deploymentHandles).handle(deployAll(deploymentHandles)));
}
 
Example 18
Source File: ExecutionGraph.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Fails the execution graph globally. This failure will not be recovered by a specific
 * failover strategy, but results in a full restart of all tasks.
 *
 * <p>This global failure is meant to be triggered in cases where the consistency of the
 * execution graph' state cannot be guaranteed any more (for example when catching unexpected
 * exceptions that indicate a bug or an unexpected call race), and where a full restart is the
 * safe way to get consistency back.
 *
 * @param t The exception that caused the failure.
 */
public void failGlobal(Throwable t) {
	if (!isLegacyScheduling()) {
		internalTaskFailuresListener.notifyGlobalFailure(t);
		return;
	}

	assertRunningInJobMasterMainThread();

	while (true) {
		JobStatus current = state;
		// stay in these states
		if (current == JobStatus.FAILING ||
			current == JobStatus.SUSPENDED ||
			current.isGloballyTerminalState()) {
			return;
		} else if (transitionState(current, JobStatus.FAILING, t)) {
			initFailureCause(t);

			// make sure no concurrent local or global actions interfere with the failover
			final long globalVersionForRestart = incrementGlobalModVersion();

			final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;

			// cancel ongoing scheduling action
			if (ongoingSchedulingFuture != null) {
				ongoingSchedulingFuture.cancel(false);
			}

			// we build a future that is complete once all vertices have reached a terminal state
			final ConjunctFuture<Void> allTerminal = cancelVerticesAsync();
			FutureUtils.assertNoException(allTerminal.handle(
				(Void ignored, Throwable throwable) -> {
					if (throwable != null) {
						transitionState(
							JobStatus.FAILING,
							JobStatus.FAILED,
							new FlinkException("Could not cancel all execution job vertices properly.", throwable));
					} else {
						allVerticesInTerminalState(globalVersionForRestart);
					}
					return null;
				}));

			return;
		}

		// else: concurrent change to execution state, retry
	}
}
 
Example 19
Source File: ExecutionGraph.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Try to restart the job. If we cannot restart the job (e.g. no more restarts allowed), then
 * try to fail the job. This operation is only permitted if the current state is FAILING or
 * RESTARTING.
 *
 * @return true if the operation could be executed; false if a concurrent job status change occurred
 */
@Deprecated
private boolean tryRestartOrFail(long globalModVersionForRestart) {
	if (!isLegacyScheduling()) {
		return true;
	}

	JobStatus currentState = state;

	if (currentState == JobStatus.FAILING || currentState == JobStatus.RESTARTING) {
		final Throwable failureCause = this.failureCause;

		if (LOG.isDebugEnabled()) {
			LOG.debug("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID(), failureCause);
		} else {
			LOG.info("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID());
		}

		final boolean isFailureCauseAllowingRestart = !(failureCause instanceof SuppressRestartsException);
		final boolean isRestartStrategyAllowingRestart = restartStrategy.canRestart();
		boolean isRestartable = isFailureCauseAllowingRestart && isRestartStrategyAllowingRestart;

		if (isRestartable && transitionState(currentState, JobStatus.RESTARTING)) {
			LOG.info("Restarting the job {} ({}).", getJobName(), getJobID());

			RestartCallback restarter = new ExecutionGraphRestartCallback(this, globalModVersionForRestart);
			FutureUtils.assertNoException(
				restartStrategy
					.restart(restarter, getJobMasterMainThreadExecutor())
					.exceptionally((throwable) -> {
							failGlobal(throwable);
							return null;
						}));
			return true;
		}
		else if (!isRestartable && transitionState(currentState, JobStatus.FAILED, failureCause)) {
			final String cause1 = isFailureCauseAllowingRestart ? null :
				"a type of SuppressRestartsException was thrown";
			final String cause2 = isRestartStrategyAllowingRestart ? null :
				"the restart strategy prevented it";

			LOG.info("Could not restart the job {} ({}) because {}.", getJobName(), getJobID(),
				StringUtils.concatenateWithAnd(cause1, cause2), failureCause);
			onTerminalState(JobStatus.FAILED);

			return true;
		} else {
			// we must have changed the state concurrently, thus we cannot complete this operation
			return false;
		}
	} else {
		// this operation is only allowed in the state FAILING or RESTARTING
		return false;
	}
}