Java Code Examples for org.apache.flink.runtime.concurrent.FutureUtils#assertNoException()

The following examples show how to use org.apache.flink.runtime.concurrent.FutureUtils#assertNoException() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: Task.java From flink with Apache License 2.0

6 votes

@Override
public void requestPartitionProducerState(
		final IntermediateDataSetID intermediateDataSetId,
		final ResultPartitionID resultPartitionId,
		Consumer<? super ResponseHandle> responseConsumer) {

	final CompletableFuture<ExecutionState> futurePartitionState =
		partitionProducerStateChecker.requestPartitionProducerState(
			jobId,
			intermediateDataSetId,
			resultPartitionId);

	FutureUtils.assertNoException(
		futurePartitionState
			.handle(PartitionProducerStateResponseHandle::new)
			.thenAcceptAsync(responseConsumer, executor));
}

Example 2

Source File: Task.java From flink with Apache License 2.0

6 votes

@Override
public void requestPartitionProducerState(
		final IntermediateDataSetID intermediateDataSetId,
		final ResultPartitionID resultPartitionId,
		Consumer<? super ResponseHandle> responseConsumer) {

	final CompletableFuture<ExecutionState> futurePartitionState =
		partitionProducerStateChecker.requestPartitionProducerState(
			jobId,
			intermediateDataSetId,
			resultPartitionId);

	FutureUtils.assertNoException(
		futurePartitionState
			.handle(PartitionProducerStateResponseHandle::new)
			.thenAcceptAsync(responseConsumer, executor));
}

Example 3

Source File: MiniCluster.java From flink with Apache License 2.0

6 votes

@GuardedBy("lock")
private void setupDispatcherResourceManagerComponents(Configuration configuration, RpcServiceFactory dispatcherResourceManagreComponentRpcServiceFactory, MetricQueryServiceRetriever metricQueryServiceRetriever) throws Exception {
	dispatcherResourceManagerComponents.addAll(createDispatcherResourceManagerComponents(
		configuration,
		dispatcherResourceManagreComponentRpcServiceFactory,
		haServices,
		blobServer,
		heartbeatServices,
		metricRegistry,
		metricQueryServiceRetriever,
		new ShutDownFatalErrorHandler()
	));

	final Collection<CompletableFuture<ApplicationStatus>> shutDownFutures = new ArrayList<>(dispatcherResourceManagerComponents.size());

	for (DispatcherResourceManagerComponent dispatcherResourceManagerComponent : dispatcherResourceManagerComponents) {
		final CompletableFuture<ApplicationStatus> shutDownFuture = dispatcherResourceManagerComponent.getShutDownFuture();
		FutureUtils.assertNoException(shutDownFuture.thenRun(dispatcherResourceManagerComponent::closeAsync));
		shutDownFutures.add(shutDownFuture);
	}

	FutureUtils.assertNoException(FutureUtils.completeAll(shutDownFutures).thenRun(this::closeAsync));
}

Example 4

Source File: ExecutionGraph.java From flink with Apache License 2.0

5 votes

public void failJob(Throwable cause) {
	if (state == JobStatus.FAILING || state.isGloballyTerminalState()) {
		return;
	}

	transitionState(JobStatus.FAILING, cause);
	initFailureCause(cause);

	FutureUtils.assertNoException(
		cancelVerticesAsync().whenComplete((aVoid, throwable) -> {
			transitionState(JobStatus.FAILED, cause);
			onTerminalState(JobStatus.FAILED);
		}));
}

Example 5

Source File: Dispatcher.java From Flink-CEPplus with Apache License 2.0

5 votes

private JobManagerRunner startJobManagerRunner(JobManagerRunner jobManagerRunner) throws Exception {
	final JobID jobId = jobManagerRunner.getJobGraph().getJobID();

	FutureUtils.assertNoException(
		jobManagerRunner.getResultFuture().handleAsync(
			(ArchivedExecutionGraph archivedExecutionGraph, Throwable throwable) -> {
				// check if we are still the active JobManagerRunner by checking the identity
				final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = jobManagerRunnerFutures.get(jobId);
				final JobManagerRunner currentJobManagerRunner = jobManagerRunnerFuture != null ? jobManagerRunnerFuture.getNow(null) : null;
				//noinspection ObjectEquality
				if (jobManagerRunner == currentJobManagerRunner) {
					if (archivedExecutionGraph != null) {
						jobReachedGloballyTerminalState(archivedExecutionGraph);
					} else {
						final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);

						if (strippedThrowable instanceof JobNotFinishedException) {
							jobNotFinished(jobId);
						} else {
							jobMasterFailed(jobId, strippedThrowable);
						}
					}
				} else {
					log.debug("There is a newer JobManagerRunner for the job {}.", jobId);
				}

				return null;
			}, getMainThreadExecutor()));

	jobManagerRunner.start();

	return jobManagerRunner;
}

Example 6

Source File: DefaultScheduler.java From flink with Apache License 2.0

5 votes

private BiFunction<Void, Throwable, Void> deployAll(final List<DeploymentHandle> deploymentHandles) {
	return (ignored, throwable) -> {
		propagateIfNonNull(throwable);
		for (final DeploymentHandle deploymentHandle : deploymentHandles) {
			final SlotExecutionVertexAssignment slotExecutionVertexAssignment = deploymentHandle.getSlotExecutionVertexAssignment();
			final CompletableFuture<LogicalSlot> slotAssigned = slotExecutionVertexAssignment.getLogicalSlotFuture();
			checkState(slotAssigned.isDone());

			FutureUtils.assertNoException(
				slotAssigned.handle(deployOrHandleError(deploymentHandle)));
		}
		return null;
	};
}

Example 7

Source File: TaskExecutor.java From flink with Apache License 2.0

5 votes

@Override
public CompletableFuture<Acknowledge> updatePartitions(
		final ExecutionAttemptID executionAttemptID,
		Iterable<PartitionInfo> partitionInfos,
		Time timeout) {
	final Task task = taskSlotTable.getTask(executionAttemptID);

	if (task != null) {
		for (final PartitionInfo partitionInfo: partitionInfos) {
			// Run asynchronously because it might be blocking
			FutureUtils.assertNoException(
				CompletableFuture.runAsync(
					() -> {
						try {
							if (!shuffleEnvironment.updatePartitionInfo(executionAttemptID, partitionInfo)) {
								log.debug(
									"Discard update for input gate partition {} of result {} in task {}. " +
										"The partition is no longer available.",
									partitionInfo.getShuffleDescriptor().getResultPartitionID(),
									partitionInfo.getIntermediateDataSetID(),
									executionAttemptID);
							}
						} catch (IOException | InterruptedException e) {
							log.error(
								"Could not update input data location for task {}. Trying to fail task.",
								task.getTaskInfo().getTaskName(),
								e);
							task.failExternally(e);
						}
					},
					getRpcService().getExecutor()));
		}
		return CompletableFuture.completedFuture(Acknowledge.get());
	} else {
		log.debug("Discard update for input partitions of task {}. Task is no longer running.", executionAttemptID);
		return CompletableFuture.completedFuture(Acknowledge.get());
	}
}

Example 8

Source File: DefaultDispatcherRunner.java From flink with Apache License 2.0

5 votes

private void forwardConfirmLeaderSessionFuture(UUID leaderSessionID, DispatcherLeaderProcess newDispatcherLeaderProcess) {
	FutureUtils.assertNoException(
		newDispatcherLeaderProcess.getLeaderAddressFuture().thenAccept(
			leaderAddress -> {
				if (leaderElectionService.hasLeadership(leaderSessionID)) {
					leaderElectionService.confirmLeadership(leaderSessionID, leaderAddress);
				}
			}));
}

Example 9

Source File: DefaultDispatcherRunner.java From flink with Apache License 2.0

5 votes

private void startNewDispatcherLeaderProcess(UUID leaderSessionID) {
	stopDispatcherLeaderProcess();

	dispatcherLeaderProcess = createNewDispatcherLeaderProcess(leaderSessionID);

	final DispatcherLeaderProcess newDispatcherLeaderProcess = dispatcherLeaderProcess;
	FutureUtils.assertNoException(
		previousDispatcherLeaderProcessTerminationFuture.thenRun(newDispatcherLeaderProcess::start));
}

Example 10

Source File: Dispatcher.java From flink with Apache License 2.0

5 votes

private JobManagerRunner startJobManagerRunner(JobManagerRunner jobManagerRunner) throws Exception {
	final JobID jobId = jobManagerRunner.getJobID();

	FutureUtils.assertNoException(
		jobManagerRunner.getResultFuture().handleAsync(
			(ArchivedExecutionGraph archivedExecutionGraph, Throwable throwable) -> {
				// check if we are still the active JobManagerRunner by checking the identity
				final JobManagerRunner currentJobManagerRunner = Optional.ofNullable(jobManagerRunnerFutures.get(jobId))
					.map(future -> future.getNow(null))
					.orElse(null);
				//noinspection ObjectEquality
				if (jobManagerRunner == currentJobManagerRunner) {
					if (archivedExecutionGraph != null) {
						jobReachedGloballyTerminalState(archivedExecutionGraph);
					} else {
						final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);

						if (strippedThrowable instanceof JobNotFinishedException) {
							jobNotFinished(jobId);
						} else {
							jobMasterFailed(jobId, strippedThrowable);
						}
					}
				} else {
					log.debug("There is a newer JobManagerRunner for the job {}.", jobId);
				}

				return null;
			}, getMainThreadExecutor()));

	jobManagerRunner.start();

	return jobManagerRunner;
}

Example 11

Source File: AdaptedRestartPipelinedRegionStrategyNG.java From flink with Apache License 2.0

5 votes

@VisibleForTesting
protected void restartTasks(final Set<ExecutionVertexID> verticesToRestart) {
	final long globalModVersion = executionGraph.getGlobalModVersion();
	final Set<ExecutionVertexVersion> vertexVersions = new HashSet<>(
		executionVertexVersioner.recordVertexModifications(verticesToRestart).values());

	FutureUtils.assertNoException(
		cancelTasks(verticesToRestart)
			.thenComposeAsync(resetAndRescheduleTasks(globalModVersion, vertexVersions), executionGraph.getJobMasterMainThreadExecutor())
			.handle(failGlobalOnError()));
}

Example 12

Source File: TaskExecutor.java From flink with Apache License 2.0

5 votes

@Override
public CompletableFuture<Acknowledge> updatePartitions(
		final ExecutionAttemptID executionAttemptID,
		Iterable<PartitionInfo> partitionInfos,
		Time timeout) {
	final Task task = taskSlotTable.getTask(executionAttemptID);

	if (task != null) {
		for (final PartitionInfo partitionInfo: partitionInfos) {
			// Run asynchronously because it might be blocking
			FutureUtils.assertNoException(
				CompletableFuture.runAsync(
					() -> {
						try {
							if (!shuffleEnvironment.updatePartitionInfo(executionAttemptID, partitionInfo)) {
								log.debug(
									"Discard update for input gate partition {} of result {} in task {}. " +
										"The partition is no longer available.",
									partitionInfo.getShuffleDescriptor().getResultPartitionID(),
									partitionInfo.getIntermediateDataSetID(),
									executionAttemptID);
							}
						} catch (IOException | InterruptedException e) {
							log.error(
								"Could not update input data location for task {}. Trying to fail task.",
								task.getTaskInfo().getTaskName(),
								e);
							task.failExternally(e);
						}
					},
					getRpcService().getExecutor()));
		}
		return CompletableFuture.completedFuture(Acknowledge.get());
	} else {
		log.debug("Discard update for input partitions of task {}. Task is no longer running.", executionAttemptID);
		return CompletableFuture.completedFuture(Acknowledge.get());
	}
}

Example 13

Source File: Dispatcher.java From flink with Apache License 2.0

5 votes

private JobManagerRunner startJobManagerRunner(JobManagerRunner jobManagerRunner) throws Exception {
	final JobID jobId = jobManagerRunner.getJobGraph().getJobID();

	FutureUtils.assertNoException(
		jobManagerRunner.getResultFuture().handleAsync(
			(ArchivedExecutionGraph archivedExecutionGraph, Throwable throwable) -> {
				// check if we are still the active JobManagerRunner by checking the identity
				final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = jobManagerRunnerFutures.get(jobId);
				final JobManagerRunner currentJobManagerRunner = jobManagerRunnerFuture != null ? jobManagerRunnerFuture.getNow(null) : null;
				//noinspection ObjectEquality
				if (jobManagerRunner == currentJobManagerRunner) {
					if (archivedExecutionGraph != null) {
						jobReachedGloballyTerminalState(archivedExecutionGraph);
					} else {
						final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);

						if (strippedThrowable instanceof JobNotFinishedException) {
							jobNotFinished(jobId);
						} else {
							jobMasterFailed(jobId, strippedThrowable);
						}
					}
				} else {
					log.debug("There is a newer JobManagerRunner for the job {}.", jobId);
				}

				return null;
			}, getMainThreadExecutor()));

	jobManagerRunner.start();

	return jobManagerRunner;
}

Example 14

Source File: Dispatcher.java From flink with Apache License 2.0

4 votes

void runRecoveredJob(final JobGraph recoveredJob) {
	checkNotNull(recoveredJob);
	FutureUtils.assertNoException(runJob(recoveredJob)
		.handle(handleRecoveredJobStartError(recoveredJob.getJobID())));
}

Example 15

Source File: ExecutionGraph.java From flink with Apache License 2.0

4 votes

/**
 * Try to restart the job. If we cannot restart the job (e.g. no more restarts allowed), then
 * try to fail the job. This operation is only permitted if the current state is FAILING or
 * RESTARTING.
 *
 * @return true if the operation could be executed; false if a concurrent job status change occurred
 */
private boolean tryRestartOrFail(long globalModVersionForRestart) {
	JobStatus currentState = state;

	if (currentState == JobStatus.FAILING || currentState == JobStatus.RESTARTING) {
		final Throwable failureCause = this.failureCause;

		synchronized (progressLock) {
			if (LOG.isDebugEnabled()) {
				LOG.debug("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID(), failureCause);
			} else {
				LOG.info("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID());
			}

			final boolean isFailureCauseAllowingRestart = !(failureCause instanceof SuppressRestartsException);
			final boolean isRestartStrategyAllowingRestart = restartStrategy.canRestart();
			boolean isRestartable = isFailureCauseAllowingRestart && isRestartStrategyAllowingRestart;

			if (isRestartable && transitionState(currentState, JobStatus.RESTARTING)) {
				LOG.info("Restarting the job {} ({}).", getJobName(), getJobID());

				RestartCallback restarter = new ExecutionGraphRestartCallback(this, globalModVersionForRestart);
				FutureUtils.assertNoException(
					restartStrategy
						.restart(restarter, getJobMasterMainThreadExecutor())
						.exceptionally((throwable) -> {
							failGlobal(throwable);
							return null;
						}));
				return true;
			}
			else if (!isRestartable && transitionState(currentState, JobStatus.FAILED, failureCause)) {
				final String cause1 = isFailureCauseAllowingRestart ? null :
					"a type of SuppressRestartsException was thrown";
				final String cause2 = isRestartStrategyAllowingRestart ? null :
					"the restart strategy prevented it";

				LOG.info("Could not restart the job {} ({}) because {}.", getJobName(), getJobID(),
					StringUtils.concatenateWithAnd(cause1, cause2), failureCause);
				onTerminalState(JobStatus.FAILED);

				return true;
			} else {
				// we must have changed the state concurrently, thus we cannot complete this operation
				return false;
			}
		}
	} else {
		// this operation is only allowed in the state FAILING or RESTARTING
		return false;
	}
}

Example 16

Source File: ExecutionGraph.java From flink with Apache License 2.0

4 votes

/**
 * Fails the execution graph globally. This failure will not be recovered by a specific
 * failover strategy, but results in a full restart of all tasks.
 *
 * <p>This global failure is meant to be triggered in cases where the consistency of the
 * execution graph' state cannot be guaranteed any more (for example when catching unexpected
 * exceptions that indicate a bug or an unexpected call race), and where a full restart is the
 * safe way to get consistency back.
 *
 * @param t The exception that caused the failure.
 */
public void failGlobal(Throwable t) {

	assertRunningInJobMasterMainThread();

	while (true) {
		JobStatus current = state;
		// stay in these states
		if (current == JobStatus.FAILING ||
			current == JobStatus.SUSPENDED ||
			current.isGloballyTerminalState()) {
			return;
		} else if (transitionState(current, JobStatus.FAILING, t)) {
			initFailureCause(t);

			// make sure no concurrent local or global actions interfere with the failover
			final long globalVersionForRestart = incrementGlobalModVersion();

			final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;

			// cancel ongoing scheduling action
			if (ongoingSchedulingFuture != null) {
				ongoingSchedulingFuture.cancel(false);
			}

			// we build a future that is complete once all vertices have reached a terminal state
			final ConjunctFuture<Void> allTerminal = cancelVerticesAsync();
			FutureUtils.assertNoException(allTerminal.handle(
				(Void ignored, Throwable throwable) -> {
					if (throwable != null) {
						transitionState(
							JobStatus.FAILING,
							JobStatus.FAILED,
							new FlinkException("Could not cancel all execution job vertices properly.", throwable));
					} else {
						allVerticesInTerminalState(globalVersionForRestart);
					}
					return null;
				}));

			return;
		}

		// else: concurrent change to execution state, retry
	}
}

Example 17

Source File: DefaultScheduler.java From flink with Apache License 2.0

4 votes

private void waitForAllSlotsAndDeploy(final List<DeploymentHandle> deploymentHandles) {
	FutureUtils.assertNoException(
		assignAllResources(deploymentHandles).handle(deployAll(deploymentHandles)));
}

Example 18

Source File: ExecutionGraph.java From flink with Apache License 2.0

4 votes

/**
 * Fails the execution graph globally. This failure will not be recovered by a specific
 * failover strategy, but results in a full restart of all tasks.
 *
 * <p>This global failure is meant to be triggered in cases where the consistency of the
 * execution graph' state cannot be guaranteed any more (for example when catching unexpected
 * exceptions that indicate a bug or an unexpected call race), and where a full restart is the
 * safe way to get consistency back.
 *
 * @param t The exception that caused the failure.
 */
public void failGlobal(Throwable t) {
	if (!isLegacyScheduling()) {
		internalTaskFailuresListener.notifyGlobalFailure(t);
		return;
	}

	assertRunningInJobMasterMainThread();

	while (true) {
		JobStatus current = state;
		// stay in these states
		if (current == JobStatus.FAILING ||
			current == JobStatus.SUSPENDED ||
			current.isGloballyTerminalState()) {
			return;
		} else if (transitionState(current, JobStatus.FAILING, t)) {
			initFailureCause(t);

			// make sure no concurrent local or global actions interfere with the failover
			final long globalVersionForRestart = incrementGlobalModVersion();

			final CompletableFuture<Void> ongoingSchedulingFuture = schedulingFuture;

			// cancel ongoing scheduling action
			if (ongoingSchedulingFuture != null) {
				ongoingSchedulingFuture.cancel(false);
			}

			// we build a future that is complete once all vertices have reached a terminal state
			final ConjunctFuture<Void> allTerminal = cancelVerticesAsync();
			FutureUtils.assertNoException(allTerminal.handle(
				(Void ignored, Throwable throwable) -> {
					if (throwable != null) {
						transitionState(
							JobStatus.FAILING,
							JobStatus.FAILED,
							new FlinkException("Could not cancel all execution job vertices properly.", throwable));
					} else {
						allVerticesInTerminalState(globalVersionForRestart);
					}
					return null;
				}));

			return;
		}

		// else: concurrent change to execution state, retry
	}
}

Example 19

Source File: ExecutionGraph.java From flink with Apache License 2.0

4 votes

/**
 * Try to restart the job. If we cannot restart the job (e.g. no more restarts allowed), then
 * try to fail the job. This operation is only permitted if the current state is FAILING or
 * RESTARTING.
 *
 * @return true if the operation could be executed; false if a concurrent job status change occurred
 */
@Deprecated
private boolean tryRestartOrFail(long globalModVersionForRestart) {
	if (!isLegacyScheduling()) {
		return true;
	}

	JobStatus currentState = state;

	if (currentState == JobStatus.FAILING || currentState == JobStatus.RESTARTING) {
		final Throwable failureCause = this.failureCause;

		if (LOG.isDebugEnabled()) {
			LOG.debug("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID(), failureCause);
		} else {
			LOG.info("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID());
		}

		final boolean isFailureCauseAllowingRestart = !(failureCause instanceof SuppressRestartsException);
		final boolean isRestartStrategyAllowingRestart = restartStrategy.canRestart();
		boolean isRestartable = isFailureCauseAllowingRestart && isRestartStrategyAllowingRestart;

		if (isRestartable && transitionState(currentState, JobStatus.RESTARTING)) {
			LOG.info("Restarting the job {} ({}).", getJobName(), getJobID());

			RestartCallback restarter = new ExecutionGraphRestartCallback(this, globalModVersionForRestart);
			FutureUtils.assertNoException(
				restartStrategy
					.restart(restarter, getJobMasterMainThreadExecutor())
					.exceptionally((throwable) -> {
							failGlobal(throwable);
							return null;
						}));
			return true;
		}
		else if (!isRestartable && transitionState(currentState, JobStatus.FAILED, failureCause)) {
			final String cause1 = isFailureCauseAllowingRestart ? null :
				"a type of SuppressRestartsException was thrown";
			final String cause2 = isRestartStrategyAllowingRestart ? null :
				"the restart strategy prevented it";

			LOG.info("Could not restart the job {} ({}) because {}.", getJobName(), getJobID(),
				StringUtils.concatenateWithAnd(cause1, cause2), failureCause);
			onTerminalState(JobStatus.FAILED);

			return true;
		} else {
			// we must have changed the state concurrently, thus we cannot complete this operation
			return false;
		}
	} else {
		// this operation is only allowed in the state FAILING or RESTARTING
		return false;
	}
}