org.apache.flink.runtime.taskmanager.TaskExecutionState Java Examples

The following examples show how to use org.apache.flink.runtime.taskmanager.TaskExecutionState. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestTaskManagerActions.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void updateTaskExecutionState(TaskExecutionState taskExecutionState) {
	Optional<CompletableFuture<Void>> listenerFuture =
		taskManagerActionListeners.getListenerFuture(taskExecutionState.getID(), taskExecutionState.getExecutionState());
	if (listenerFuture.isPresent()) {
		listenerFuture.get().complete(null);
	}
	if (jobMasterGateway != null) {
		CompletableFuture<Acknowledge> futureAcknowledge = jobMasterGateway.updateTaskExecutionState(taskExecutionState);

		futureAcknowledge.whenComplete(
			(ack, throwable) -> {
				if (throwable != null) {
					failTask(taskExecutionState.getID(), throwable);
				}
			}
		);
	}
}
 
Example #2
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void failJobIfCannotRestart() throws Exception {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
	testRestartBackoffTimeStrategy.setCanRestart(false);

	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getAllExecutionVertices());
	final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();

	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.FAILED));

	taskRestartExecutor.triggerScheduledTasks();

	waitForTermination(scheduler);
	final JobStatus jobStatus = scheduler.requestJobStatus();
	assertThat(jobStatus, is(equalTo(JobStatus.FAILED)));
}
 
Example #3
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void vertexIsResetBeforeRestarted() throws Exception {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();

	final TestSchedulingStrategy.Factory schedulingStrategyFactory = new TestSchedulingStrategy.Factory();
	final DefaultScheduler scheduler = createScheduler(jobGraph, schedulingStrategyFactory);
	final TestSchedulingStrategy schedulingStrategy = schedulingStrategyFactory.getLastCreatedSchedulingStrategy();
	final SchedulingTopology topology = schedulingStrategy.getSchedulingTopology();

	startScheduling(scheduler);

	final SchedulingExecutionVertex onlySchedulingVertex = Iterables.getOnlyElement(topology.getVertices());
	schedulingStrategy.schedule(Collections.singletonList(onlySchedulingVertex.getId()));

	final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getAllExecutionVertices());
	final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.FAILED));

	taskRestartExecutor.triggerScheduledTasks();

	assertThat(schedulingStrategy.getReceivedVerticesToRestart(), hasSize(1));
	assertThat(onlySchedulingVertex.getState(), is(equalTo(ExecutionState.CREATED)));
}
 
Example #4
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void handleGlobalFailure() {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
	final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);

	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	scheduler.handleGlobalFailure(new Exception("forced failure"));

	final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getAllExecutionVertices());
	final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.CANCELED));

	taskRestartExecutor.triggerScheduledTasks();

	final List<ExecutionVertexID> deployedExecutionVertices = testExecutionVertexOperations.getDeployedVertices();
	final ExecutionVertexID executionVertexId = new ExecutionVertexID(onlyJobVertex.getID(), 0);
	assertThat(deployedExecutionVertices, contains(executionVertexId, executionVertexId));
}
 
Example #5
Source File: ExecutionGraph.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Deserializes accumulators from a task state update.
 *
 * <p>This method never throws an exception!
 *
 * @param state The task execution state from which to deserialize the accumulators.
 * @return The deserialized accumulators, of null, if there are no accumulators or an error occurred.
 */
private Map<String, Accumulator<?, ?>> deserializeAccumulators(TaskExecutionState state) {
	AccumulatorSnapshot serializedAccumulators = state.getAccumulators();

	if (serializedAccumulators != null) {
		try {
			return serializedAccumulators.deserializeUserAccumulators(userClassLoader);
		}
		catch (Throwable t) {
			// we catch Throwable here to include all form of linking errors that may
			// occur if user classes are missing in the classpath
			LOG.error("Failed to deserialize final accumulator results.", t);
		}
	}
	return null;
}
 
Example #6
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void abortPendingCheckpointsWhenRestartingTasks() throws Exception {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
	enableCheckpointing(jobGraph);

	final CountDownLatch checkpointTriggeredLatch = getCheckpointTriggeredLatch();

	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getAllExecutionVertices());
	final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.RUNNING));

	final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);

	checkpointCoordinator.triggerCheckpoint(false);
	checkpointTriggeredLatch.await();
	assertThat(checkpointCoordinator.getNumberOfPendingCheckpoints(), is(equalTo(1)));

	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.FAILED));
	taskRestartExecutor.triggerScheduledTasks();
	assertThat(checkpointCoordinator.getNumberOfPendingCheckpoints(), is(equalTo(0)));
}
 
Example #7
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void jobStatusIsRestartingIfOneVertexIsWaitingForRestart() {
	final JobGraph jobGraph = singleJobVertexJobGraph(2);
	final JobID jobId = jobGraph.getJobID();
	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	final Iterator<ArchivedExecutionVertex> vertexIterator = scheduler.requestJob().getAllExecutionVertices().iterator();
	final ExecutionAttemptID attemptId1 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
	final ExecutionAttemptID attemptId2 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();

	scheduler.updateTaskExecutionState(new TaskExecutionState(jobId, attemptId1, ExecutionState.FAILED, new RuntimeException("expected")));
	final JobStatus jobStatusAfterFirstFailure = scheduler.requestJobStatus();
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobId, attemptId2, ExecutionState.FAILED, new RuntimeException("expected")));

	taskRestartExecutor.triggerNonPeriodicScheduledTask();
	final JobStatus jobStatusWithPendingRestarts = scheduler.requestJobStatus();
	taskRestartExecutor.triggerNonPeriodicScheduledTask();
	final JobStatus jobStatusAfterRestarts = scheduler.requestJobStatus();

	assertThat(jobStatusAfterFirstFailure, equalTo(JobStatus.RESTARTING));
	assertThat(jobStatusWithPendingRestarts, equalTo(JobStatus.RESTARTING));
	assertThat(jobStatusAfterRestarts, equalTo(JobStatus.RUNNING));
}
 
Example #8
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void skipDeploymentIfVertexVersionOutdated() {
	testExecutionSlotAllocator.disableAutoCompletePendingRequests();

	final JobGraph jobGraph = nonParallelSourceSinkJobGraph();
	final List<JobVertex> sortedJobVertices = jobGraph.getVerticesSortedTopologicallyFromSources();
	final ExecutionVertexID sourceExecutionVertexId = new ExecutionVertexID(sortedJobVertices.get(0).getID(), 0);
	final ExecutionVertexID sinkExecutionVertexId = new ExecutionVertexID(sortedJobVertices.get(1).getID(), 0);

	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
	testExecutionSlotAllocator.completePendingRequest(sourceExecutionVertexId);

	final ArchivedExecutionVertex sourceExecutionVertex = scheduler.requestJob().getAllExecutionVertices().iterator().next();
	final ExecutionAttemptID attemptId = sourceExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.FAILED));
	testRestartBackoffTimeStrategy.setCanRestart(false);

	testExecutionSlotAllocator.enableAutoCompletePendingRequests();
	taskRestartExecutor.triggerScheduledTasks();

	assertThat(testExecutionVertexOperations.getDeployedVertices(), containsInAnyOrder(sourceExecutionVertexId, sinkExecutionVertexId));
	assertThat(scheduler.requestJob().getState(), is(equalTo(JobStatus.RUNNING)));
}
 
Example #9
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void failureInfoIsSetAfterTaskFailure() {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
	final JobID jobId = jobGraph.getJobID();
	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getAllExecutionVertices());
	final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();

	final String exceptionMessage = "expected exception";
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobId, attemptId, ExecutionState.FAILED, new RuntimeException(exceptionMessage)));

	final ErrorInfo failureInfo = scheduler.requestJob().getFailureInfo();
	assertThat(failureInfo, is(notNullValue()));
	assertThat(failureInfo.getExceptionAsString(), containsString(exceptionMessage));
}
 
Example #10
Source File: TestTaskManagerActions.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void updateTaskExecutionState(TaskExecutionState taskExecutionState) {
	Optional<CompletableFuture<Void>> listenerFuture =
		taskManagerActionListeners.getListenerFuture(taskExecutionState.getID(), taskExecutionState.getExecutionState());
	if (listenerFuture.isPresent()) {
		listenerFuture.get().complete(null);
	}
	if (jobMasterGateway != null) {
		CompletableFuture<Acknowledge> futureAcknowledge = jobMasterGateway.updateTaskExecutionState(taskExecutionState);

		futureAcknowledge.whenComplete(
			(ack, throwable) -> {
				if (throwable != null) {
					failTask(taskExecutionState.getID(), throwable);
				}
			}
		);
	}
}
 
Example #11
Source File: JobMasterPartitionReleaseTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void testPartitionReleaseOrPromotionOnJobTermination(Function<TestSetup, CompletableFuture<ResourceID>> taskExecutorCallSelector, ExecutionState finalExecutionState) throws Exception {
	final CompletableFuture<TaskDeploymentDescriptor> taskDeploymentDescriptorFuture = new CompletableFuture<>();
	final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setSubmitTaskConsumer((tdd, ignored) -> {
			taskDeploymentDescriptorFuture.complete(tdd);
			return CompletableFuture.completedFuture(Acknowledge.get());
		})
		.createTestingTaskExecutorGateway();

	try (final TestSetup testSetup = new TestSetup(rpcService, testingFatalErrorHandler, testingTaskExecutorGateway)) {
		final JobMasterGateway jobMasterGateway = testSetup.getJobMasterGateway();

		// update the execution state of the only execution to target state
		// this should trigger the job to finish
		final TaskDeploymentDescriptor taskDeploymentDescriptor = taskDeploymentDescriptorFuture.get();
		jobMasterGateway.updateTaskExecutionState(
			new TaskExecutionState(
				taskDeploymentDescriptor.getJobId(),
				taskDeploymentDescriptor.getExecutionAttemptId(),
				finalExecutionState));

		assertThat(taskExecutorCallSelector.apply(testSetup).get(), equalTo(testSetup.getTaskExecutorResourceID()));
	}
}
 
Example #12
Source File: ExecutionGraph.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Deserializes accumulators from a task state update.
 *
 * <p>This method never throws an exception!
 *
 * @param state The task execution state from which to deserialize the accumulators.
 * @return The deserialized accumulators, of null, if there are no accumulators or an error occurred.
 */
private Map<String, Accumulator<?, ?>> deserializeAccumulators(TaskExecutionState state) {
	AccumulatorSnapshot serializedAccumulators = state.getAccumulators();

	if (serializedAccumulators != null) {
		try {
			return serializedAccumulators.deserializeUserAccumulators(userClassLoader);
		}
		catch (Throwable t) {
			// we catch Throwable here to include all form of linking errors that may
			// occur if user classes are missing in the classpath
			LOG.error("Failed to deserialize final accumulator results.", t);
		}
	}
	return null;
}
 
Example #13
Source File: ExecutionGraph.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Updates the state of one of the ExecutionVertex's Execution attempts.
 * If the new status if "FINISHED", this also updates the accumulators.
 *
 * @param state The state update.
 * @return True, if the task update was properly applied, false, if the execution attempt was not found.
 */
public boolean updateState(TaskExecutionState state) {
	assertRunningInJobMasterMainThread();
	final Execution attempt = currentExecutions.get(state.getID());

	if (attempt != null) {
		try {
			final boolean stateUpdated = updateStateInternal(state, attempt);
			maybeReleasePartitions(attempt);
			return stateUpdated;
		}
		catch (Throwable t) {
			ExceptionUtils.rethrowIfFatalErrorOrOOM(t);

			// failures during updates leave the ExecutionGraph inconsistent
			failGlobal(t);
			return false;
		}
	}
	else {
		return false;
	}
}
 
Example #14
Source File: SchedulerBase.java    From flink with Apache License 2.0 6 votes vote down vote up
private boolean isNotifiable(
		final ExecutionVertexID executionVertexId,
		final TaskExecutionState taskExecutionState) {

	final ExecutionVertex executionVertex = getExecutionVertex(executionVertexId);

	// only notifies FINISHED and FAILED states which are needed at the moment.
	// can be refined in FLINK-14233 after the legacy scheduler is removed and
	// the actions are factored out from ExecutionGraph.
	switch (taskExecutionState.getExecutionState()) {
		case FINISHED:
		case FAILED:
			// only notifies a state update if it's effective, namely it successfully
			// turns the execution state to the expected value.
			if (executionVertex.getExecutionState() == taskExecutionState.getExecutionState()) {
				return true;
			}
			break;
		default:
			break;
	}

	return false;
}
 
Example #15
Source File: ExecutionGraph.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Deserializes accumulators from a task state update.
 *
 * <p>This method never throws an exception!
 *
 * @param state The task execution state from which to deserialize the accumulators.
 * @return The deserialized accumulators, of null, if there are no accumulators or an error occurred.
 */
private Map<String, Accumulator<?, ?>> deserializeAccumulators(TaskExecutionState state) {
	AccumulatorSnapshot serializedAccumulators = state.getAccumulators();

	if (serializedAccumulators != null) {
		try {
			return serializedAccumulators.deserializeUserAccumulators(userClassLoader);
		}
		catch (Throwable t) {
			// we catch Throwable here to include all form of linking errors that may
			// occur if user classes are missing in the classpath
			LOG.error("Failed to deserialize final accumulator results.", t);
		}
	}
	return null;
}
 
Example #16
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void restartFailedTask() {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
	final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);

	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	final ArchivedExecutionVertex archivedExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getAllExecutionVertices());
	final ExecutionAttemptID attemptId = archivedExecutionVertex.getCurrentExecutionAttempt().getAttemptId();

	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.FAILED));

	taskRestartExecutor.triggerScheduledTasks();

	final List<ExecutionVertexID> deployedExecutionVertices = testExecutionVertexOperations.getDeployedVertices();
	final ExecutionVertexID executionVertexId = new ExecutionVertexID(onlyJobVertex.getID(), 0);
	assertThat(deployedExecutionVertices, contains(executionVertexId, executionVertexId));
}
 
Example #17
Source File: SchedulerBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public final boolean updateTaskExecutionState(final TaskExecutionState taskExecutionState) {
	final Optional<ExecutionVertexID> executionVertexId = getExecutionVertexId(taskExecutionState.getID());

	boolean updateSuccess = executionGraph.updateState(taskExecutionState);

	if (updateSuccess) {
		checkState(executionVertexId.isPresent());

		if (isNotifiable(executionVertexId.get(), taskExecutionState)) {
			updateTaskExecutionStateInternal(executionVertexId.get(), taskExecutionState);
		}
		return true;
	} else {
		return false;
	}
}
 
Example #18
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void cancelWhileRestartingShouldWaitForRunningTasks() {
	final JobGraph jobGraph = singleJobVertexJobGraph(2);
	final JobID jobid = jobGraph.getJobID();
	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
	final SchedulingTopology topology = scheduler.getSchedulingTopology();

	final Iterator<ArchivedExecutionVertex> vertexIterator = scheduler.requestJob().getAllExecutionVertices().iterator();
	final ExecutionAttemptID attemptId1 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
	final ExecutionAttemptID attemptId2 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
	final ExecutionVertexID executionVertex2 = scheduler.getExecutionVertexIdOrThrow(attemptId2);

	scheduler.updateTaskExecutionState(new TaskExecutionState(jobid, attemptId1, ExecutionState.FAILED, new RuntimeException("expected")));
	scheduler.cancel();
	final ExecutionState vertex2StateAfterCancel = topology.getVertex(executionVertex2).getState();
	final JobStatus statusAfterCancelWhileRestarting = scheduler.requestJobStatus();
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobid, attemptId2, ExecutionState.CANCELED, new RuntimeException("expected")));

	assertThat(vertex2StateAfterCancel, is(equalTo(ExecutionState.CANCELING)));
	assertThat(statusAfterCancelWhileRestarting, is(equalTo(JobStatus.CANCELLING)));
	assertThat(scheduler.requestJobStatus(), is(equalTo(JobStatus.CANCELED)));
}
 
Example #19
Source File: UpdateSchedulerNgOnInternalFailuresListener.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void notifyTaskFailure(final ExecutionAttemptID attemptId, final Throwable t) {
	schedulerNg.updateTaskExecutionState(new TaskExecutionState(
		jobId,
		attemptId,
		ExecutionState.FAILED,
		t));
}
 
Example #20
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void failGlobalWhenRestoringStateFails() throws Exception {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
	final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);
	enableCheckpointing(jobGraph);

	final CountDownLatch checkpointTriggeredLatch = getCheckpointTriggeredLatch();

	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getAllExecutionVertices());
	final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.RUNNING));

	final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);

	// register a master hook to fail state restore
	final TestMasterHook masterHook = TestMasterHook.fromId("testHook");
	masterHook.enableFailOnRestore();
	checkpointCoordinator.addMasterHook(masterHook);

	// complete one checkpoint for state restore
	checkpointCoordinator.triggerCheckpoint(false);
	checkpointTriggeredLatch.await();
	final long checkpointId = checkpointCoordinator.getPendingCheckpoints().keySet().iterator().next();
	acknowledgePendingCheckpoint(scheduler, checkpointId);

	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.FAILED));
	taskRestartExecutor.triggerScheduledTasks();
	final List<ExecutionVertexID> deployedExecutionVertices = testExecutionVertexOperations.getDeployedVertices();

	// the first task failover should be skipped on state restore failure
	final ExecutionVertexID executionVertexId = new ExecutionVertexID(onlyJobVertex.getID(), 0);
	assertThat(deployedExecutionVertices, contains(executionVertexId));

	// a global failure should be triggered on state restore failure
	masterHook.disableFailOnRestore();
	taskRestartExecutor.triggerScheduledTasks();
	assertThat(deployedExecutionVertices, contains(executionVertexId, executionVertexId));
}
 
Example #21
Source File: DefaultSchedulerBatchSchedulingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void finishExecution(
		ExecutionAttemptID executionAttemptId,
		SchedulerNG scheduler,
		ComponentMainThreadExecutor mainThreadExecutor) {
	CompletableFuture.runAsync(
		() -> {
			scheduler.updateTaskExecutionState(new TaskExecutionState(jobId, executionAttemptId, ExecutionState.RUNNING));
			scheduler.updateTaskExecutionState(new TaskExecutionState(jobId, executionAttemptId, ExecutionState.FINISHED));
		},
		mainThreadExecutor
	).join();
}
 
Example #22
Source File: ExecutionGraphCheckpointCoordinatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the checkpoint coordinator is shut down if the execution graph
 * is finished.
 */
@Test
public void testShutdownCheckpointCoordinatorOnFinished() throws Exception {
	final CompletableFuture<JobStatus> counterShutdownFuture = new CompletableFuture<>();
	CheckpointIDCounter counter = new TestingCheckpointIDCounter(counterShutdownFuture);

	final CompletableFuture<JobStatus> storeShutdownFuture = new CompletableFuture<>();
	CompletedCheckpointStore store = new TestingCompletedCheckpointStore(storeShutdownFuture);

	ExecutionGraph graph = createExecutionGraphAndEnableCheckpointing(counter, store);
	final CheckpointCoordinator checkpointCoordinator = graph.getCheckpointCoordinator();

	assertThat(checkpointCoordinator, Matchers.notNullValue());
	assertThat(checkpointCoordinator.isShutdown(), is(false));

	graph.scheduleForExecution();

	for (ExecutionVertex executionVertex : graph.getAllExecutionVertices()) {
		final Execution currentExecutionAttempt = executionVertex.getCurrentExecutionAttempt();
		graph.updateState(new TaskExecutionState(graph.getJobID(), currentExecutionAttempt.getAttemptId(), ExecutionState.FINISHED));
	}

	assertThat(graph.getTerminationFuture().get(), is(JobStatus.FINISHED));

	assertThat(checkpointCoordinator.isShutdown(), is(true));
	assertThat(counterShutdownFuture.get(), is(JobStatus.FINISHED));
	assertThat(storeShutdownFuture.get(), is(JobStatus.FINISHED));
}
 
Example #23
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void vertexIsNotAffectedByOutdatedDeployment() {
	final JobGraph jobGraph = singleJobVertexJobGraph(2);

	testExecutionSlotAllocator.disableAutoCompletePendingRequests();
	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	final Iterator<ArchivedExecutionVertex> vertexIterator = scheduler.requestJob().getAllExecutionVertices().iterator();
	final ArchivedExecutionVertex v1 = vertexIterator.next();
	final ArchivedExecutionVertex v2 = vertexIterator.next();

	final SchedulingExecutionVertex sv1 = scheduler.getSchedulingTopology().getVertices().iterator().next();

	// fail v1 and let it recover to SCHEDULED
	// the initial deployment of v1 will be outdated
	scheduler.updateTaskExecutionState(new TaskExecutionState(
		jobGraph.getJobID(),
		v1.getCurrentExecutionAttempt().getAttemptId(),
		ExecutionState.FAILED));
	taskRestartExecutor.triggerScheduledTasks();

	// fail v2 to get all pending slot requests in the initial deployments to be done
	// this triggers the outdated deployment of v1
	scheduler.updateTaskExecutionState(new TaskExecutionState(
		jobGraph.getJobID(),
		v2.getCurrentExecutionAttempt().getAttemptId(),
		ExecutionState.FAILED));

	// v1 should not be affected
	assertThat(sv1.getState(), is(equalTo(ExecutionState.SCHEDULED)));
}
 
Example #24
Source File: ExecutionGraph.java    From flink with Apache License 2.0 5 votes vote down vote up
private boolean updateStateInternal(final TaskExecutionState state, final Execution attempt) {
	Map<String, Accumulator<?, ?>> accumulators;

	switch (state.getExecutionState()) {
		case RUNNING:
			return attempt.switchToRunning();

		case FINISHED:
			// this deserialization is exception-free
			accumulators = deserializeAccumulators(state);
			attempt.markFinished(accumulators, state.getIOMetrics());
			return true;

		case CANCELED:
			// this deserialization is exception-free
			accumulators = deserializeAccumulators(state);
			attempt.completeCancelling(accumulators, state.getIOMetrics(), false);
			return true;

		case FAILED:
			// this deserialization is exception-free
			accumulators = deserializeAccumulators(state);
			attempt.markFailed(state.getError(userClassLoader), accumulators, state.getIOMetrics(), !isLegacyScheduling());
			return true;

		default:
			// we mark as failed and return false, which triggers the TaskManager
			// to remove the task
			attempt.fail(new Exception("TaskManager sent illegal state update: " + state.getExecutionState()));
			return false;
	}
}
 
Example #25
Source File: TaskExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
private void unregisterTaskAndNotifyFinalState(
		final JobMasterGateway jobMasterGateway,
		final ExecutionAttemptID executionAttemptID) {

	Task task = taskSlotTable.removeTask(executionAttemptID);
	if (task != null) {
		if (!task.getExecutionState().isTerminal()) {
			try {
				task.failExternally(new IllegalStateException("Task is being remove from TaskManager."));
			} catch (Exception e) {
				log.error("Could not properly fail task.", e);
			}
		}

		log.info("Un-registering task and sending final execution state {} to JobManager for task {} {}.",
			task.getExecutionState(), task.getTaskInfo().getTaskNameWithSubtasks(), task.getExecutionId());

		AccumulatorSnapshot accumulatorSnapshot = task.getAccumulatorRegistry().getSnapshot();

		updateTaskExecutionState(
				jobMasterGateway,
				new TaskExecutionState(
					task.getJobID(),
					task.getExecutionId(),
					task.getExecutionState(),
					task.getFailureCause(),
					accumulatorSnapshot,
					task.getMetricGroup().getIOMetricGroup().createSnapshot()));
	} else {
		log.error("Cannot find task with ID {} to unregister.", executionAttemptID);
	}
}
 
Example #26
Source File: StreamTaskTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * This test checks that cancel calls that are issued before the operator is
 * instantiated still lead to proper canceling.
 */
@Test
public void testEarlyCanceling() throws Exception {
	final StreamConfig cfg = new StreamConfig(new Configuration());
	cfg.setOperatorID(new OperatorID(4711L, 42L));
	cfg.setStreamOperator(new SlowlyDeserializingOperator());
	cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);

	final TaskManagerActions taskManagerActions = spy(new NoOpTaskManagerActions());
	final Task task = createTask(SourceStreamTask.class, cfg, new Configuration(), taskManagerActions);

	final TaskExecutionState state = new TaskExecutionState(
		task.getJobID(), task.getExecutionId(), ExecutionState.RUNNING);

	task.startTaskThread();

	verify(taskManagerActions, timeout(2000L)).updateTaskExecutionState(eq(state));

	// send a cancel. because the operator takes a long time to deserialize, this should
	// hit the task before the operator is deserialized
	task.cancelExecution();

	task.getExecutingThread().join();

	assertFalse("Task did not cancel", task.getExecutingThread().isAlive());
	assertEquals(ExecutionState.CANCELED, task.getExecutionState());
}
 
Example #27
Source File: AdaptedRestartPipelinedRegionStrategyNGAbortPendingCheckpointsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void setTasksRunning(final ExecutionGraph executionGraph, final ExecutionVertex... executionVertices) {
	for (ExecutionVertex executionVertex : executionVertices) {
		executionGraph.updateState(
			new TaskExecutionState(executionGraph.getJobID(),
				executionVertex.getCurrentExecutionAttempt().getAttemptId(),
				ExecutionState.RUNNING));
	}
}
 
Example #28
Source File: LegacySchedulerBatchSchedulingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void finishExecution(
		ExecutionAttemptID executionAttemptId,
		LegacyScheduler legacyScheduler,
		ComponentMainThreadExecutor mainThreadExecutor) {
	CompletableFuture.runAsync(
		() -> {
			legacyScheduler.updateTaskExecutionState(new TaskExecutionState(jobId, executionAttemptId, ExecutionState.RUNNING));
			legacyScheduler.updateTaskExecutionState(new TaskExecutionState(jobId, executionAttemptId, ExecutionState.FINISHED));
		},
		mainThreadExecutor
	).join();
}
 
Example #29
Source File: JobMaster.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Updates the task execution state for a given task.
 *
 * @param taskExecutionState New task execution state for a given task
 * @return Acknowledge the task execution state update
 */
@Override
public CompletableFuture<Acknowledge> updateTaskExecutionState(
		final TaskExecutionState taskExecutionState) {
	checkNotNull(taskExecutionState, "taskExecutionState");

	if (schedulerNG.updateTaskExecutionState(taskExecutionState)) {
		return CompletableFuture.completedFuture(Acknowledge.get());
	} else {
		return FutureUtils.completedExceptionally(
			new ExecutionGraphException("The execution attempt " +
				taskExecutionState.getID() + " was not found."));
	}
}
 
Example #30
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void restoreStateWhenRestartingTasks() throws Exception {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
	enableCheckpointing(jobGraph);

	final CountDownLatch checkpointTriggeredLatch = getCheckpointTriggeredLatch();

	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getAllExecutionVertices());
	final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.RUNNING));

	final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);

	// register a stateful master hook to help verify state restore
	final TestMasterHook masterHook = TestMasterHook.fromId("testHook");
	checkpointCoordinator.addMasterHook(masterHook);

	// complete one checkpoint for state restore
	checkpointCoordinator.triggerCheckpoint(false);
	checkpointTriggeredLatch.await();
	final long checkpointId = checkpointCoordinator.getPendingCheckpoints().keySet().iterator().next();
	acknowledgePendingCheckpoint(scheduler, checkpointId);

	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.FAILED));
	taskRestartExecutor.triggerScheduledTasks();
	assertThat(masterHook.getRestoreCount(), is(equalTo(1)));
}