org.apache.flink.runtime.concurrent.FutureUtils#assertNoException

Source File: Task.java From flink with Apache License 2.0

6 votes

@Override
public void requestPartitionProducerState(
		final IntermediateDataSetID intermediateDataSetId,
		final ResultPartitionID resultPartitionId,
		Consumer<? super ResponseHandle> responseConsumer) {

	final CompletableFuture<ExecutionState> futurePartitionState =
		partitionProducerStateChecker.requestPartitionProducerState(
			jobId,
			intermediateDataSetId,
			resultPartitionId);

	FutureUtils.assertNoException(
		futurePartitionState
			.handle(PartitionProducerStateResponseHandle::new)
			.thenAcceptAsync(responseConsumer, executor));
}

Source File: Task.java From flink with Apache License 2.0

6 votes

@Override
public void requestPartitionProducerState(
		final IntermediateDataSetID intermediateDataSetId,
		final ResultPartitionID resultPartitionId,
		Consumer<? super ResponseHandle> responseConsumer) {

	final CompletableFuture<ExecutionState> futurePartitionState =
		partitionProducerStateChecker.requestPartitionProducerState(
			jobId,
			intermediateDataSetId,
			resultPartitionId);

	FutureUtils.assertNoException(
		futurePartitionState
			.handle(PartitionProducerStateResponseHandle::new)
			.thenAcceptAsync(responseConsumer, executor));
}

Source File: MiniCluster.java From flink with Apache License 2.0

6 votes

@GuardedBy("lock")
private void setupDispatcherResourceManagerComponents(Configuration configuration, RpcServiceFactory dispatcherResourceManagreComponentRpcServiceFactory, MetricQueryServiceRetriever metricQueryServiceRetriever) throws Exception {
	dispatcherResourceManagerComponents.addAll(createDispatcherResourceManagerComponents(
		configuration,
		dispatcherResourceManagreComponentRpcServiceFactory,
		haServices,
		blobServer,
		heartbeatServices,
		metricRegistry,
		metricQueryServiceRetriever,
		new ShutDownFatalErrorHandler()
	));

	final Collection<CompletableFuture<ApplicationStatus>> shutDownFutures = new ArrayList<>(dispatcherResourceManagerComponents.size());

	for (DispatcherResourceManagerComponent dispatcherResourceManagerComponent : dispatcherResourceManagerComponents) {
		final CompletableFuture<ApplicationStatus> shutDownFuture = dispatcherResourceManagerComponent.getShutDownFuture();
		FutureUtils.assertNoException(shutDownFuture.thenRun(dispatcherResourceManagerComponent::closeAsync));
		shutDownFutures.add(shutDownFuture);
	}

	FutureUtils.assertNoException(FutureUtils.completeAll(shutDownFutures).thenRun(this::closeAsync));
}

Source File: ExecutionGraph.java From flink with Apache License 2.0

5 votes

public void failJob(Throwable cause) {
	if (state == JobStatus.FAILING || state.isGloballyTerminalState()) {
		return;
	}

	transitionState(JobStatus.FAILING, cause);
	initFailureCause(cause);

	FutureUtils.assertNoException(
		cancelVerticesAsync().whenComplete((aVoid, throwable) -> {
			transitionState(JobStatus.FAILED, cause);
			onTerminalState(JobStatus.FAILED);
		}));
}

Source File: Dispatcher.java From Flink-CEPplus with Apache License 2.0

5 votes

private JobManagerRunner startJobManagerRunner(JobManagerRunner jobManagerRunner) throws Exception {
	final JobID jobId = jobManagerRunner.getJobGraph().getJobID();

	FutureUtils.assertNoException(
		jobManagerRunner.getResultFuture().handleAsync(
			(ArchivedExecutionGraph archivedExecutionGraph, Throwable throwable) -> {
				// check if we are still the active JobManagerRunner by checking the identity
				final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = jobManagerRunnerFutures.get(jobId);
				final JobManagerRunner currentJobManagerRunner = jobManagerRunnerFuture != null ? jobManagerRunnerFuture.getNow(null) : null;
				//noinspection ObjectEquality
				if (jobManagerRunner == currentJobManagerRunner) {
					if (archivedExecutionGraph != null) {
						jobReachedGloballyTerminalState(archivedExecutionGraph);
					} else {
						final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);

						if (strippedThrowable instanceof JobNotFinishedException) {
							jobNotFinished(jobId);
						} else {
							jobMasterFailed(jobId, strippedThrowable);
						}
					}
				} else {
					log.debug("There is a newer JobManagerRunner for the job {}.", jobId);
				}

				return null;
			}, getMainThreadExecutor()));

	jobManagerRunner.start();

	return jobManagerRunner;
}

Source File: DefaultScheduler.java From flink with Apache License 2.0

5 votes

private BiFunction<Void, Throwable, Void> deployAll(final List<DeploymentHandle> deploymentHandles) {
	return (ignored, throwable) -> {
		propagateIfNonNull(throwable);
		for (final DeploymentHandle deploymentHandle : deploymentHandles) {
			final SlotExecutionVertexAssignment slotExecutionVertexAssignment = deploymentHandle.getSlotExecutionVertexAssignment();
			final CompletableFuture<LogicalSlot> slotAssigned = slotExecutionVertexAssignment.getLogicalSlotFuture();
			checkState(slotAssigned.isDone());

			FutureUtils.assertNoException(
				slotAssigned.handle(deployOrHandleError(deploymentHandle)));
		}
		return null;
	};
}

Source File: TaskExecutor.java From flink with Apache License 2.0

5 votes

@Override
public CompletableFuture<Acknowledge> updatePartitions(
		final ExecutionAttemptID executionAttemptID,
		Iterable<PartitionInfo> partitionInfos,
		Time timeout) {
	final Task task = taskSlotTable.getTask(executionAttemptID);

	if (task != null) {
		for (final PartitionInfo partitionInfo: partitionInfos) {
			// Run asynchronously because it might be blocking
			FutureUtils.assertNoException(
				CompletableFuture.runAsync(
					() -> {
						try {
							if (!shuffleEnvironment.updatePartitionInfo(executionAttemptID, partitionInfo)) {
								log.debug(
									"Discard update for input gate partition {} of result {} in task {}. " +
										"The partition is no longer available.",
									partitionInfo.getShuffleDescriptor().getResultPartitionID(),
									partitionInfo.getIntermediateDataSetID(),
									executionAttemptID);
							}
						} catch (IOException | InterruptedException e) {
							log.error(
								"Could not update input data location for task {}. Trying to fail task.",
								task.getTaskInfo().getTaskName(),
								e);
							task.failExternally(e);
						}
					},
					getRpcService().getExecutor()));
		}
		return CompletableFuture.completedFuture(Acknowledge.get());
	} else {
		log.debug("Discard update for input partitions of task {}. Task is no longer running.", executionAttemptID);
		return CompletableFuture.completedFuture(Acknowledge.get());
	}
}

Source File: DefaultDispatcherRunner.java From flink with Apache License 2.0

5 votes

private void forwardConfirmLeaderSessionFuture(UUID leaderSessionID, DispatcherLeaderProcess newDispatcherLeaderProcess) {
	FutureUtils.assertNoException(
		newDispatcherLeaderProcess.getLeaderAddressFuture().thenAccept(
			leaderAddress -> {
				if (leaderElectionService.hasLeadership(leaderSessionID)) {
					leaderElectionService.confirmLeadership(leaderSessionID, leaderAddress);
				}
			}));
}

Source File: DefaultDispatcherRunner.java From flink with Apache License 2.0

5 votes

private void startNewDispatcherLeaderProcess(UUID leaderSessionID) {
	stopDispatcherLeaderProcess();

	dispatcherLeaderProcess = createNewDispatcherLeaderProcess(leaderSessionID);

	final DispatcherLeaderProcess newDispatcherLeaderProcess = dispatcherLeaderProcess;
	FutureUtils.assertNoException(
		previousDispatcherLeaderProcessTerminationFuture.thenRun(newDispatcherLeaderProcess::start));
}

Source File: Dispatcher.java From flink with Apache License 2.0

5 votes

private JobManagerRunner startJobManagerRunner(JobManagerRunner jobManagerRunner) throws Exception {
	final JobID jobId = jobManagerRunner.getJobID();

	FutureUtils.assertNoException(
		jobManagerRunner.getResultFuture().handleAsync(
			(ArchivedExecutionGraph archivedExecutionGraph, Throwable throwable) -> {
				// check if we are still the active JobManagerRunner by checking the identity
				final JobManagerRunner currentJobManagerRunner = Optional.ofNullable(jobManagerRunnerFutures.get(jobId))
					.map(future -> future.getNow(null))
					.orElse(null);
				//noinspection ObjectEquality
				if (jobManagerRunner == currentJobManagerRunner) {
					if (archivedExecutionGraph != null) {
						jobReachedGloballyTerminalState(archivedExecutionGraph);
					} else {
						final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);

						if (strippedThrowable instanceof JobNotFinishedException) {
							jobNotFinished(jobId);
						} else {
							jobMasterFailed(jobId, strippedThrowable);
						}
					}
				} else {
					log.debug("There is a newer JobManagerRunner for the job {}.", jobId);
				}

				return null;
			}, getMainThreadExecutor()));

	jobManagerRunner.start();

	return jobManagerRunner;
}

Source File: AdaptedRestartPipelinedRegionStrategyNG.java From flink with Apache License 2.0

5 votes

@VisibleForTesting
protected void restartTasks(final Set<ExecutionVertexID> verticesToRestart) {
	final long globalModVersion = executionGraph.getGlobalModVersion();
	final Set<ExecutionVertexVersion> vertexVersions = new HashSet<>(
		executionVertexVersioner.recordVertexModifications(verticesToRestart).values());

	FutureUtils.assertNoException(
		cancelTasks(verticesToRestart)
			.thenComposeAsync(resetAndRescheduleTasks(globalModVersion, vertexVersions), executionGraph.getJobMasterMainThreadExecutor())
			.handle(failGlobalOnError()));
}

Source File: TaskExecutor.java From flink with Apache License 2.0

5 votes

@Override
public CompletableFuture<Acknowledge> updatePartitions(
		final ExecutionAttemptID executionAttemptID,
		Iterable<PartitionInfo> partitionInfos,
		Time timeout) {
	final Task task = taskSlotTable.getTask(executionAttemptID);

	if (task != null) {
		for (final PartitionInfo partitionInfo: partitionInfos) {
			// Run asynchronously because it might be blocking
			FutureUtils.assertNoException(
				CompletableFuture.runAsync(
					() -> {
						try {
							if (!shuffleEnvironment.updatePartitionInfo(executionAttemptID, partitionInfo)) {
								log.debug(
									"Discard update for input gate partition {} of result {} in task {}. " +
										"The partition is no longer available.",
									partitionInfo.getShuffleDescriptor().getResultPartitionID(),
									partitionInfo.getIntermediateDataSetID(),
									executionAttemptID);
							}
						} catch (IOException | InterruptedException e) {
							log.error(
								"Could not update input data location for task {}. Trying to fail task.",
								task.getTaskInfo().getTaskName(),
								e);
							task.failExternally(e);
						}
					},
					getRpcService().getExecutor()));
		}
		return CompletableFuture.completedFuture(Acknowledge.get());
	} else {
		log.debug("Discard update for input partitions of task {}. Task is no longer running.", executionAttemptID);
		return CompletableFuture.completedFuture(Acknowledge.get());
	}
}

Source File: Dispatcher.java From flink with Apache License 2.0

5 votes

private JobManagerRunner startJobManagerRunner(JobManagerRunner jobManagerRunner) throws Exception {
	final JobID jobId = jobManagerRunner.getJobGraph().getJobID();

	FutureUtils.assertNoException(
		jobManagerRunner.getResultFuture().handleAsync(
			(ArchivedExecutionGraph archivedExecutionGraph, Throwable throwable) -> {
				// check if we are still the active JobManagerRunner by checking the identity
				final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = jobManagerRunnerFutures.get(jobId);
				final JobManagerRunner currentJobManagerRunner = jobManagerRunnerFuture != null ? jobManagerRunnerFuture.getNow(null) : null;
				//noinspection ObjectEquality
				if (jobManagerRunner == currentJobManagerRunner) {
					if (archivedExecutionGraph != null) {
						jobReachedGloballyTerminalState(archivedExecutionGraph);
					} else {
						final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);

						if (strippedThrowable instanceof JobNotFinishedException) {
							jobNotFinished(jobId);
						} else {
							jobMasterFailed(jobId, strippedThrowable);
						}
					}
				} else {
					log.debug("There is a newer JobManagerRunner for the job {}.", jobId);
				}

				return null;
			}, getMainThreadExecutor()));

	jobManagerRunner.start();

	return jobManagerRunner;
}

Source File: Dispatcher.java From flink with Apache License 2.0

4 votes

void runRecoveredJob(final JobGraph recoveredJob) {
	checkNotNull(recoveredJob);
	FutureUtils.assertNoException(runJob(recoveredJob)
		.handle(handleRecoveredJobStartError(recoveredJob.getJobID())));
}

Source File: ExecutionGraph.java From flink with Apache License 2.0

4 votes

/**
 * Try to restart the job. If we cannot restart the job (e.g. no more restarts allowed), then
 * try to fail the job. This operation is only permitted if the current state is FAILING or
 * RESTARTING.
 *
 * @return true if the operation could be executed; false if a concurrent job status change occurred
 */
private boolean tryRestartOrFail(long globalModVersionForRestart) {
	JobStatus currentState = state;

	if (currentState == JobStatus.FAILING || currentState == JobStatus.RESTARTING) {
		final Throwable failureCause = this.failureCause;

		synchronized (progressLock) {
			if (LOG.isDebugEnabled()) {
				LOG.debug("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID(), failureCause);
			} else {
				LOG.info("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID());
			}

			final boolean isFailureCauseAllowingRestart = !(failureCause instanceof SuppressRestartsException);
			final boolean isRestartStrategyAllowingRestart = restartStrategy.canRestart();
			boolean isRestartable = isFailureCauseAllowingRestart && isRestartStrategyAllowingRestart;

			if (isRestartable && transitionState(currentState, JobStatus.RESTARTING)) {
				LOG.info("Restarting the job {} ({}).", getJobName(), getJobID());

				RestartCallback restarter = new ExecutionGraphRestartCallback(this, globalModVersionForRestart);
				FutureUtils.assertNoException(
					restartStrategy
						.restart(restarter, getJobMasterMainThreadExecutor())
						.exceptionally((throwable) -> {
							failGlobal(throwable);
							return null;
						}));
				return true;
			}
			else if (!isRestartable && transitionState(currentState, JobStatus.FAILED, failureCause)) {
				final String cause1 = isFailureCauseAllowingRestart ? null :
					"a type of SuppressRestartsException was thrown";
				final String cause2 = isRestartStrategyAllowingRestart ? null :
					"the restart strategy prevented it";

				LOG.info("Could not restart the job {} ({}) because {}.", getJobName(), getJobID(),
					StringUtils.concatenateWithAnd(cause1, cause2), failureCause);
				onTerminalState(JobStatus.FAILED);

				return true;
			} else {
				// we must have changed the state concurrently, thus we cannot complete this operation
				return false;
			}
		}
	} else {
		// this operation is only allowed in the state FAILING or RESTARTING
		return false;
	}
}

Source File: ExecutionGraph.java From flink with Apache License 2.0

4 votes

/**
 * Fails the execution graph globally. This failure will not be recovered by a specific
 * failover strategy, but results in a full restart of all tasks.
 *
 * <p>This global failure is meant to be triggered in cases where the consistency of the
 * execution graph' state cannot be guaranteed any more (for example when catching unexpected
 * exceptions that indicate a bug or an unexpected call race), and where a full restart is the
 * safe way to get consistency back.
 *
 * @param t The exception that caused the failure.
 */
public void failGlobal(Throwable t) {

	assertRunningInJobMasterMainThread();

	while (true) {
		JobStatus current = state;
		// stay in these states
		if (current == JobStatus.FAILING ||
			current == JobStatus.SUSPENDED ||
			current.isGloballyTerminalState()) {
			return;
		} else if (transitionState(current, JobStatus.FAILING, t)) {
			initFailureCause(t);

			// make sure no concurrent local or global actions interfere with the failover
			final long globalVersionForRestart = incrementGlobalModVersion();

			final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;

			// cancel ongoing scheduling action
			if (ongoingSchedulingFuture != null) {
				ongoingSchedulingFuture.cancel(false);
			}

			// we build a future that is complete once all vertices have reached a terminal state
			final ConjunctFuture<Void> allTerminal = cancelVerticesAsync();
			FutureUtils.assertNoException(allTerminal.handle(
				(Void ignored, Throwable throwable) -> {
					if (throwable != null) {
						transitionState(
							JobStatus.FAILING,
							JobStatus.FAILED,
							new FlinkException("Could not cancel all execution job vertices properly.", throwable));
					} else {
						allVerticesInTerminalState(globalVersionForRestart);
					}
					return null;
				}));

			return;
		}

		// else: concurrent change to execution state, retry
	}
}

Source File: DefaultScheduler.java From flink with Apache License 2.0

4 votes

private void waitForAllSlotsAndDeploy(final List<DeploymentHandle> deploymentHandles) {
	FutureUtils.assertNoException(
		assignAllResources(deploymentHandles).handle(deployAll(deploymentHandles)));
}

Source File: ExecutionGraph.java From flink with Apache License 2.0

4 votes

/**
 * Fails the execution graph globally. This failure will not be recovered by a specific
 * failover strategy, but results in a full restart of all tasks.
 *
 * <p>This global failure is meant to be triggered in cases where the consistency of the
 * execution graph' state cannot be guaranteed any more (for example when catching unexpected
 * exceptions that indicate a bug or an unexpected call race), and where a full restart is the
 * safe way to get consistency back.
 *
 * @param t The exception that caused the failure.
 */
public void failGlobal(Throwable t) {
	if (!isLegacyScheduling()) {
		internalTaskFailuresListener.notifyGlobalFailure(t);
		return;
	}

	assertRunningInJobMasterMainThread();

	while (true) {
		JobStatus current = state;
		// stay in these states
		if (current == JobStatus.FAILING ||
			current == JobStatus.SUSPENDED ||
			current.isGloballyTerminalState()) {
			return;
		} else if (transitionState(current, JobStatus.FAILING, t)) {
			initFailureCause(t);

			// make sure no concurrent local or global actions interfere with the failover
			final long globalVersionForRestart = incrementGlobalModVersion();

			final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;

			// cancel ongoing scheduling action
			if (ongoingSchedulingFuture != null) {
				ongoingSchedulingFuture.cancel(false);
			}

			// we build a future that is complete once all vertices have reached a terminal state
			final ConjunctFuture<Void> allTerminal = cancelVerticesAsync();
			FutureUtils.assertNoException(allTerminal.handle(
				(Void ignored, Throwable throwable) -> {
					if (throwable != null) {
						transitionState(
							JobStatus.FAILING,
							JobStatus.FAILED,
							new FlinkException("Could not cancel all execution job vertices properly.", throwable));
					} else {
						allVerticesInTerminalState(globalVersionForRestart);
					}
					return null;
				}));

			return;
		}

		// else: concurrent change to execution state, retry
	}
}

Source File: ExecutionGraph.java From flink with Apache License 2.0

4 votes

/**
 * Try to restart the job. If we cannot restart the job (e.g. no more restarts allowed), then
 * try to fail the job. This operation is only permitted if the current state is FAILING or
 * RESTARTING.
 *
 * @return true if the operation could be executed; false if a concurrent job status change occurred
 */
@Deprecated
private boolean tryRestartOrFail(long globalModVersionForRestart) {
	if (!isLegacyScheduling()) {
		return true;
	}

	JobStatus currentState = state;

	if (currentState == JobStatus.FAILING || currentState == JobStatus.RESTARTING) {
		final Throwable failureCause = this.failureCause;

		if (LOG.isDebugEnabled()) {
			LOG.debug("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID(), failureCause);
		} else {
			LOG.info("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID());
		}

		final boolean isFailureCauseAllowingRestart = !(failureCause instanceof SuppressRestartsException);
		final boolean isRestartStrategyAllowingRestart = restartStrategy.canRestart();
		boolean isRestartable = isFailureCauseAllowingRestart && isRestartStrategyAllowingRestart;

		if (isRestartable && transitionState(currentState, JobStatus.RESTARTING)) {
			LOG.info("Restarting the job {} ({}).", getJobName(), getJobID());

			RestartCallback restarter = new ExecutionGraphRestartCallback(this, globalModVersionForRestart);
			FutureUtils.assertNoException(
				restartStrategy
					.restart(restarter, getJobMasterMainThreadExecutor())
					.exceptionally((throwable) -> {
							failGlobal(throwable);
							return null;
						}));
			return true;
		}
		else if (!isRestartable && transitionState(currentState, JobStatus.FAILED, failureCause)) {
			final String cause1 = isFailureCauseAllowingRestart ? null :
				"a type of SuppressRestartsException was thrown";
			final String cause2 = isRestartStrategyAllowingRestart ? null :
				"the restart strategy prevented it";

			LOG.info("Could not restart the job {} ({}) because {}.", getJobName(), getJobID(),
				StringUtils.concatenateWithAnd(cause1, cause2), failureCause);
			onTerminalState(JobStatus.FAILED);

			return true;
		} else {
			// we must have changed the state concurrently, thus we cannot complete this operation
			return false;
		}
	} else {
		// this operation is only allowed in the state FAILING or RESTARTING
		return false;
	}
}

Java Code Examples for org.apache.flink.runtime.concurrent.FutureUtils#assertNoException()