Java Code Examples for org.apache.flink.runtime.execution.ExecutionState

The following examples show how to use org.apache.flink.runtime.execution.ExecutionState. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink   Source File: SubtaskExecutionAttemptDetailsInfo.java    License: Apache License 2.0 6 votes vote down vote up
@JsonCreator
public SubtaskExecutionAttemptDetailsInfo(
		@JsonProperty(FIELD_NAME_SUBTASK_INDEX) int subtaskIndex,
		@JsonProperty(FIELD_NAME_STATUS) ExecutionState status,
		@JsonProperty(FIELD_NAME_ATTEMPT) int attempt,
		@JsonProperty(FIELD_NAME_HOST) String host,
		@JsonProperty(FIELD_NAME_START_TIME) long startTime,
		@JsonProperty(FIELD_NAME_END_TIME) long endTime,
		@JsonProperty(FIELD_NAME_DURATION) long duration,
		@JsonProperty(FIELD_NAME_METRICS) IOMetricsInfo ioMetricsInfo) {

	this.subtaskIndex = subtaskIndex;
	this.status = Preconditions.checkNotNull(status);
	this.attempt = attempt;
	this.host = Preconditions.checkNotNull(host);
	this.startTime = startTime;
	this.endTime = endTime;
	this.duration = duration;
	this.ioMetricsInfo = Preconditions.checkNotNull(ioMetricsInfo);
}
 
Example 2
Source Project: flink   Source File: ExecutionGraphTestUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Waits until the Execution has reached a certain state.
 *
 * <p>This method is based on polling and might miss very fast state transitions!
 */
public static void waitUntilExecutionState(Execution execution, ExecutionState state, long maxWaitMillis)
		throws TimeoutException {
	checkNotNull(execution);
	checkNotNull(state);
	checkArgument(maxWaitMillis >= 0);

	// this is a poor implementation - we may want to improve it eventually
	final long deadline = maxWaitMillis == 0 ? Long.MAX_VALUE : System.nanoTime() + (maxWaitMillis * 1_000_000);

	while (execution.getState() != state && System.nanoTime() < deadline) {
		try {
			Thread.sleep(2);
		} catch (InterruptedException ignored) {}
	}

	if (System.nanoTime() >= deadline) {
		throw new TimeoutException(
			String.format("The execution did not reach state %s in time. Current state is %s.",
				state, execution.getState()));
	}
}
 
Example 3
Source Project: Flink-CEPplus   Source File: ExecutionGraphTestUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Waits until the Execution has reached a certain state.
 *
 * <p>This method is based on polling and might miss very fast state transitions!
 */
public static void waitUntilExecutionState(Execution execution, ExecutionState state, long maxWaitMillis)
		throws TimeoutException {
	checkNotNull(execution);
	checkNotNull(state);
	checkArgument(maxWaitMillis >= 0);

	// this is a poor implementation - we may want to improve it eventually
	final long deadline = maxWaitMillis == 0 ? Long.MAX_VALUE : System.nanoTime() + (maxWaitMillis * 1_000_000);

	while (execution.getState() != state && System.nanoTime() < deadline) {
		try {
			Thread.sleep(2);
		} catch (InterruptedException ignored) {}
	}

	if (System.nanoTime() >= deadline) {
		throw new TimeoutException(
			String.format("The execution did not reach state %s in time. Current state is %s.",
				state, execution.getState()));
	}
}
 
Example 4
Source Project: flink   Source File: ExecutionVertexCancelTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSendCancelAndReceiveFail() throws Exception {
	final ExecutionGraph graph = ExecutionGraphTestUtils.createSimpleTestGraph();

	graph.scheduleForExecution();
	ExecutionGraphTestUtils.switchAllVerticesToRunning(graph);
	assertEquals(JobStatus.RUNNING, graph.getState());

	final ExecutionVertex[] vertices = graph.getVerticesTopologically().iterator().next().getTaskVertices();
	assertEquals(vertices.length, graph.getRegisteredExecutions().size());

	final Execution exec = vertices[3].getCurrentExecutionAttempt();
	exec.cancel();
	assertEquals(ExecutionState.CANCELING, exec.getState());

	exec.markFailed(new Exception("test"));
	assertTrue(exec.getState() == ExecutionState.FAILED || exec.getState() == ExecutionState.CANCELED);

	assertFalse(exec.getAssignedResource().isAlive());
	assertEquals(vertices.length - 1, exec.getVertex().getExecutionGraph().getRegisteredExecutions().size());
}
 
Example 5
@Override
protected JobVertexTaskManagersInfo getTestResponseInstance() throws Exception {
	final Random random = new Random();
	List<TaskManagersInfo> taskManagersInfoList = new ArrayList<>();

	final Map<ExecutionState, Integer> statusCounts = new HashMap<>(ExecutionState.values().length);
	final IOMetricsInfo jobVertexMetrics = new IOMetricsInfo(
		random.nextLong(),
		random.nextBoolean(),
		random.nextLong(),
		random.nextBoolean(),
		random.nextLong(),
		random.nextBoolean(),
		random.nextLong(),
		random.nextBoolean());
	int count = 100;
	for (ExecutionState executionState : ExecutionState.values()) {
		statusCounts.put(executionState, count++);
	}
	taskManagersInfoList.add(new TaskManagersInfo("host1", ExecutionState.CANCELING, 1L, 2L, 3L, jobVertexMetrics, statusCounts));

	return new JobVertexTaskManagersInfo(new JobVertexID(), "test", System.currentTimeMillis(), taskManagersInfoList);
}
 
Example 6
Source Project: flink   Source File: WebMonitorMessagesTest.java    License: Apache License 2.0 6 votes vote down vote up
private Collection<JobDetails> randomJobDetails(Random rnd) {
	final JobDetails[] details = new JobDetails[rnd.nextInt(10)];
	for (int k = 0; k < details.length; k++) {
		int[] numVerticesPerState = new int[ExecutionState.values().length];
		int numTotal = 0;

		for (int i = 0; i < numVerticesPerState.length; i++) {
			int count = rnd.nextInt(55);
			numVerticesPerState[i] = count;
			numTotal += count;
		}

		long time = rnd.nextLong();
		long endTime = rnd.nextBoolean() ? -1L : time + rnd.nextInt();
		long lastModified = endTime == -1 ? time + rnd.nextInt() : endTime;

		String name = new GenericMessageTester.StringInstantiator().instantiate(rnd);
		JobID jid = new JobID();
		JobStatus status = JobStatus.values()[rnd.nextInt(JobStatus.values().length)];

		details[k] = new JobDetails(jid, name, time, endTime, endTime - time, status, lastModified, numVerticesPerState, numTotal);
	}
	return Arrays.asList(details);
}
 
Example 7
/**
 * Tests that when any input dataset finishes will start available downstream {@link ResultPartitionType#BLOCKING} vertices.
 * vertex#0    vertex#1
 *       \     /
 *        \   /
 *         \ /
 *  (BLOCKING, ANY)
 *     vertex#2
 */
@Test
public void testRestartBlockingANYExecutionStateChange() {
	final TestingSchedulingTopology testingSchedulingTopology = new TestingSchedulingTopology();

	final List<TestingSchedulingExecutionVertex> producers1 = testingSchedulingTopology.addExecutionVertices()
		.withParallelism(2).finish();
	final List<TestingSchedulingExecutionVertex> producers2 = testingSchedulingTopology.addExecutionVertices()
		.withParallelism(2).finish();
	final List<TestingSchedulingExecutionVertex> consumers = testingSchedulingTopology.addExecutionVertices()
		.withParallelism(2).finish();
	testingSchedulingTopology.connectPointwise(producers1, consumers).finish();
	testingSchedulingTopology.connectPointwise(producers2, consumers).finish();

	final LazyFromSourcesSchedulingStrategy schedulingStrategy = startScheduling(testingSchedulingTopology);

	for (TestingSchedulingExecutionVertex producer : producers1) {
		schedulingStrategy.onExecutionStateChange(producer.getId(), ExecutionState.FINISHED);
	}

	Set<ExecutionVertexID> verticesToRestart = consumers.stream().map(TestingSchedulingExecutionVertex::getId)
		.collect(Collectors.toSet());

	schedulingStrategy.restartTasks(verticesToRestart);
	assertLatestScheduledVerticesAreEqualTo(consumers);
}
 
Example 8
Source Project: flink   Source File: TaskTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTerminationFutureCompletesOnNormalExecution() throws Exception {
	final Task task = createTaskBuilder()
		.setInvokable(InvokableBlockingWithTrigger.class)
		.setTaskManagerActions(new NoOpTaskManagerActions())
		.build();

	// run the task asynchronous
	task.startTaskThread();

	// wait till the task is in invoke
	awaitLatch.await();

	assertFalse(task.getTerminationFuture().isDone());

	triggerLatch.trigger();

	task.getExecutingThread().join();

	assertEquals(ExecutionState.FINISHED, task.getTerminationFuture().getNow(null));
}
 
Example 9
Source Project: flink   Source File: TaskTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFailExternallyDuringInvoke() throws Exception {
	final QueuedNoOpTaskManagerActions taskManagerActions = new QueuedNoOpTaskManagerActions();
	final Task task = createTaskBuilder()
		.setInvokable(InvokableBlockingInInvoke.class)
		.setTaskManagerActions(taskManagerActions)
		.build();

	// run the task asynchronous
	task.startTaskThread();

	// wait till the task is in invoke
	awaitLatch.await();

	task.failExternally(new Exception("test"));

	task.getExecutingThread().join();

	assertEquals(ExecutionState.FAILED, task.getExecutionState());
	assertTrue(task.isCanceledOrFailed());
	assertTrue(task.getFailureCause().getMessage().contains("test"));

	taskManagerActions.validateListenerMessage(ExecutionState.RUNNING, task, null);
	taskManagerActions.validateListenerMessage(ExecutionState.FAILED, task, new Exception("test"));
}
 
Example 10
private ExecutionVertex mockExecutionVertex(
		ExecutionAttemptID executionId,
		ExecutionState state,
		boolean sendSuccess) {

	Execution exec = Mockito.mock(Execution.class);
	CompletableFuture<StackTraceSampleResponse> failedFuture = new CompletableFuture<>();
	failedFuture.completeExceptionally(new Exception("Send failed."));

	Mockito.when(exec.getAttemptId()).thenReturn(executionId);
	Mockito.when(exec.getState()).thenReturn(state);
	Mockito.when(exec.requestStackTraceSample(Matchers.anyInt(), Matchers.anyInt(), Matchers.any(Time.class), Matchers.anyInt(), Matchers.any(Time.class)))
		.thenReturn(
			sendSuccess ?
				CompletableFuture.completedFuture(Mockito.mock(StackTraceSampleResponse.class)) :
				failedFuture);

	ExecutionVertex vertex = Mockito.mock(ExecutionVertex.class);
	Mockito.when(vertex.getJobvertexId()).thenReturn(new JobVertexID());
	Mockito.when(vertex.getCurrentExecutionAttempt()).thenReturn(exec);

	return vertex;
}
 
Example 11
Source Project: Flink-CEPplus   Source File: TaskManagerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void handleMessage(Object message) throws Exception {
	if (message instanceof RequestPartitionProducerState) {
		getSender().tell(decorateMessage(ExecutionState.RUNNING), getSelf());
	}
	else if (message instanceof TaskMessages.UpdateTaskExecutionState) {
		final TaskExecutionState msg = ((TaskMessages.UpdateTaskExecutionState) message)
				.taskExecutionState();

		if (msg.getExecutionState().isTerminal()) {
			testActor.tell(msg, self());
		}
	} else {
		super.handleMessage(message);
	}
}
 
Example 12
Source Project: flink   Source File: ExecutionVertexSchedulingTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testScheduleToDeploying() {
	try {
		final ExecutionJobVertex ejv = getExecutionJobVertex(new JobVertexID());
		final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0],
				AkkaUtils.getDefaultTimeout());

		final LogicalSlot slot = new TestingLogicalSlotBuilder().createTestingLogicalSlot();

		CompletableFuture<LogicalSlot> future = CompletableFuture.completedFuture(slot);

		assertEquals(ExecutionState.CREATED, vertex.getExecutionState());

		// try to deploy to the slot
		vertex.scheduleForExecution(
			TestingSlotProviderStrategy.from(new TestingSlotProvider(ignore -> future)),
			LocationPreferenceConstraint.ALL,
			Collections.emptySet());
		assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 13
Source Project: flink   Source File: DefaultSchedulerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void handleGlobalFailure() {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
	final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);

	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	scheduler.handleGlobalFailure(new Exception("forced failure"));

	final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getAllExecutionVertices());
	final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.CANCELED));

	taskRestartExecutor.triggerScheduledTasks();

	final List<ExecutionVertexID> deployedExecutionVertices = testExecutionVertexOperations.getDeployedVertices();
	final ExecutionVertexID executionVertexId = new ExecutionVertexID(onlyJobVertex.getID(), 0);
	assertThat(deployedExecutionVertices, contains(executionVertexId, executionVertexId));
}
 
Example 14
Source Project: flink   Source File: TaskAsyncCallTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCheckpointCallsInOrder() throws Exception {

	Task task = createTask(CheckpointsInOrderInvokable.class);
	try (TaskCleaner ignored = new TaskCleaner(task)) {
		task.startTaskThread();

		awaitLatch.await();

		for (int i = 1; i <= numCalls; i++) {
			task.triggerCheckpointBarrier(i, 156865867234L, CheckpointOptions.forCheckpointWithDefaultLocation(), false);
		}

		triggerLatch.await();

		assertFalse(task.isCanceledOrFailed());

		ExecutionState currentState = task.getExecutionState();
		assertThat(currentState, isOneOf(ExecutionState.RUNNING, ExecutionState.FINISHED));
	}
}
 
Example 15
Source Project: flink   Source File: TaskTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTerminationFutureCompletesOnNormalExecution() throws Exception {
	final Task task = createTaskBuilder()
		.setInvokable(InvokableBlockingWithTrigger.class)
		.setTaskManagerActions(new NoOpTaskManagerActions())
		.build();

	// run the task asynchronous
	task.startTaskThread();

	// wait till the task is in invoke
	awaitLatch.await();

	assertFalse(task.getTerminationFuture().isDone());

	triggerLatch.trigger();

	task.getExecutingThread().join();

	assertEquals(ExecutionState.FINISHED, task.getTerminationFuture().getNow(null));
}
 
Example 16
Source Project: flink   Source File: ExecutionGraphSuspendTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Going into SUSPENDED out of DEPLOYING vertices should cancel all vertices once with RPC calls.
 */
@Test
public void testSuspendedOutOfDeploying() throws Exception {
	final int parallelism = 10;
	final InteractionsCountingTaskManagerGateway gateway = new InteractionsCountingTaskManagerGateway(parallelism);
	final ExecutionGraph eg = createExecutionGraph(gateway, parallelism);

	eg.scheduleForExecution();
	assertEquals(JobStatus.RUNNING, eg.getState());
	validateAllVerticesInState(eg, ExecutionState.DEPLOYING);

	// suspend
	eg.suspend(new Exception("suspend"));

	assertEquals(JobStatus.SUSPENDED, eg.getState());
	validateCancelRpcCalls(gateway, parallelism);

	ensureCannotLeaveSuspendedState(eg, gateway);
}
 
Example 17
Source Project: flink   Source File: StackTraceSampleCoordinatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/** Tests cancelling of a pending sample. */
@Test
public void testCancelStackTraceSample() throws Exception {
	ExecutionVertex[] vertices = new ExecutionVertex[] {
			mockExecutionVertex(new ExecutionAttemptID(), ExecutionState.RUNNING, true),
	};

	CompletableFuture<StackTraceSample> sampleFuture = coord.triggerStackTraceSample(
			vertices, 1, Time.milliseconds(100L), 0);

	Assert.assertFalse(sampleFuture.isDone());

	// Cancel
	coord.cancelStackTraceSample(0, null);

	// Verify completed
	Assert.assertTrue(sampleFuture.isDone());

	// Verify no more pending samples
	Assert.assertEquals(0, coord.getNumberOfPendingSamples());
}
 
Example 18
Source Project: flink   Source File: SubtaskExecutionAttemptDetailsInfo.java    License: Apache License 2.0 5 votes vote down vote up
public static SubtaskExecutionAttemptDetailsInfo create(AccessExecution execution, MutableIOMetrics ioMetrics) {
	final ExecutionState status = execution.getState();
	final long now = System.currentTimeMillis();

	final TaskManagerLocation location = execution.getAssignedResourceLocation();
	final String locationString = location == null ? "(unassigned)" : location.getHostname();

	long startTime = execution.getStateTimestamp(ExecutionState.DEPLOYING);
	if (startTime == 0) {
		startTime = -1;
	}
	final long endTime = status.isTerminal() ? execution.getStateTimestamp(status) : -1;
	final long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1;

	final IOMetricsInfo ioMetricsInfo = new IOMetricsInfo(
		ioMetrics.getNumBytesIn(),
		ioMetrics.isNumBytesInComplete(),
		ioMetrics.getNumBytesOut(),
		ioMetrics.isNumBytesOutComplete(),
		ioMetrics.getNumRecordsIn(),
		ioMetrics.isNumRecordsInComplete(),
		ioMetrics.getNumRecordsOut(),
		ioMetrics.isNumRecordsOutComplete());

	return new SubtaskExecutionAttemptDetailsInfo(
		execution.getParallelSubtaskIndex(),
		status,
		execution.getAttemptNumber(),
		locationString,
		startTime,
		endTime,
		duration,
		ioMetricsInfo
	);
}
 
Example 19
Source Project: flink   Source File: JobDetails.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void serialize(
		JobDetails jobDetails,
		JsonGenerator jsonGenerator,
		SerializerProvider serializerProvider) throws IOException {
	jsonGenerator.writeStartObject();

	jsonGenerator.writeStringField(FIELD_NAME_JOB_ID, jobDetails.getJobId().toString());
	jsonGenerator.writeStringField(FIELD_NAME_JOB_NAME, jobDetails.getJobName());
	jsonGenerator.writeStringField(FIELD_NAME_STATUS, jobDetails.getStatus().name());

	jsonGenerator.writeNumberField(FIELD_NAME_START_TIME, jobDetails.getStartTime());
	jsonGenerator.writeNumberField(FIELD_NAME_END_TIME, jobDetails.getEndTime());
	jsonGenerator.writeNumberField(FIELD_NAME_DURATION, jobDetails.getDuration());
	jsonGenerator.writeNumberField(FIELD_NAME_LAST_MODIFICATION, jobDetails.getLastUpdateTime());

	jsonGenerator.writeObjectFieldStart("tasks");
	jsonGenerator.writeNumberField(FIELD_NAME_TOTAL_NUMBER_TASKS, jobDetails.getNumTasks());

	final int[] perState = jobDetails.getTasksPerState();

	for (ExecutionState executionState : ExecutionState.values()) {
		jsonGenerator.writeNumberField(executionState.name().toLowerCase(), perState[executionState.ordinal()]);
	}

	jsonGenerator.writeEndObject();

	jsonGenerator.writeEndObject();
}
 
Example 20
Source Project: Flink-CEPplus   Source File: TaskTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCancelRightAway() throws Exception {
	final Task task = new TaskBuilder().build();
	task.cancelExecution();

	assertEquals(ExecutionState.CANCELING, task.getExecutionState());

	task.run();

	// verify final state
	assertEquals(ExecutionState.CANCELED, task.getExecutionState());

	assertNull(task.getInvokable());
}
 
Example 21
Source Project: flink   Source File: CheckpointCoordinatorTest.java    License: Apache License 2.0 5 votes vote down vote up
private CheckpointCoordinator getCheckpointCoordinator() {
	final ExecutionAttemptID triggerAttemptID1 = new ExecutionAttemptID();
	final ExecutionAttemptID triggerAttemptID2 = new ExecutionAttemptID();
	ExecutionVertex triggerVertex1 = mockExecutionVertex(triggerAttemptID1);
	JobVertexID jobVertexID2 = new JobVertexID();
	ExecutionVertex triggerVertex2 = mockExecutionVertex(
		triggerAttemptID2,
		jobVertexID2,
		Collections.singletonList(OperatorID.fromJobVertexID(jobVertexID2)),
		1,
		1,
		ExecutionState.FINISHED);

	// create some mock Execution vertices that need to ack the checkpoint
	final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID();
	final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID();
	ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1);
	ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2);

	// set up the coordinator and validate the initial state
	return new CheckpointCoordinatorBuilder()
		.setTasksToTrigger(new ExecutionVertex[] { triggerVertex1, triggerVertex2 })
		.setTasksToWaitFor(new ExecutionVertex[] { ackVertex1, ackVertex2 })
		.setTasksToCommitTo(new ExecutionVertex[] {})
		.setTimer(manuallyTriggeredScheduledExecutor)
		.build();
}
 
Example 22
Source Project: flink   Source File: ExecutionGraphRestartTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void failGlobalIfExecutionIsStillRunning_failingAnExecutionTwice_ShouldTriggerOnlyOneFailover() throws Exception {
	JobVertex sender = ExecutionGraphTestUtils.createJobVertex("Task1", 1, NoOpInvokable.class);
	JobVertex receiver = ExecutionGraphTestUtils.createJobVertex("Task2", 1, NoOpInvokable.class);
	JobGraph jobGraph = new JobGraph("Pointwise job", sender, receiver);

	try (SlotPool slotPool = createSlotPoolImpl()) {
		ExecutionGraph eg = TestingExecutionGraphBuilder.newBuilder()
			.setRestartStrategy(new TestRestartStrategy(1, false))
			.setJobGraph(jobGraph)
			.setNumberOfTasks(2)
			.buildAndScheduleForExecution(slotPool);

		Iterator<ExecutionVertex> executionVertices = eg.getAllExecutionVertices().iterator();

		Execution finishedExecution = executionVertices.next().getCurrentExecutionAttempt();
		Execution failedExecution = executionVertices.next().getCurrentExecutionAttempt();

		finishedExecution.markFinished();

		failedExecution.fail(new Exception("Test Exception"));
		failedExecution.completeCancelling();

		assertEquals(JobStatus.RUNNING, eg.getState());

		// At this point all resources have been assigned
		for (ExecutionVertex vertex : eg.getAllExecutionVertices()) {
			assertNotNull("No assigned resource (test instability).", vertex.getCurrentAssignedResource());
			vertex.getCurrentExecutionAttempt().switchToRunning();
		}

		// fail global with old finished execution, this should not affect the execution
		eg.failGlobalIfExecutionIsStillRunning(new Exception("This should have no effect"), finishedExecution.getAttemptId());

		assertThat(eg.getState(), is(JobStatus.RUNNING));

		// the state of the finished execution should have not changed since it is terminal
		assertThat(finishedExecution.getState(), is(ExecutionState.FINISHED));
	}
}
 
Example 23
Source Project: Flink-CEPplus   Source File: WebMonitorUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static JobDetails createDetailsForJob(AccessExecutionGraph job) {
	JobStatus status = job.getState();

	long started = job.getStatusTimestamp(JobStatus.CREATED);
	long finished = status.isGloballyTerminalState() ? job.getStatusTimestamp(status) : -1L;
	long duration = (finished >= 0L ? finished : System.currentTimeMillis()) - started;

	int[] countsPerStatus = new int[ExecutionState.values().length];
	long lastChanged = 0;
	int numTotalTasks = 0;

	for (AccessExecutionJobVertex ejv : job.getVerticesTopologically()) {
		AccessExecutionVertex[] vertices = ejv.getTaskVertices();
		numTotalTasks += vertices.length;

		for (AccessExecutionVertex vertex : vertices) {
			ExecutionState state = vertex.getExecutionState();
			countsPerStatus[state.ordinal()]++;
			lastChanged = Math.max(lastChanged, vertex.getStateTimestamp(state));
		}
	}

	lastChanged = Math.max(lastChanged, finished);

	return new JobDetails(
		job.getJobID(),
		job.getJobName(),
		started,
		finished,
		duration,
		status,
		lastChanged,
		countsPerStatus,
		numTotalTasks);
}
 
Example 24
Source Project: flink   Source File: DefaultSchedulerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void failGlobalWhenRestoringStateFails() throws Exception {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
	final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);
	enableCheckpointing(jobGraph);

	final CountDownLatch checkpointTriggeredLatch = getCheckpointTriggeredLatch();

	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getAllExecutionVertices());
	final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.RUNNING));

	final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);

	// register a master hook to fail state restore
	final TestMasterHook masterHook = TestMasterHook.fromId("testHook");
	masterHook.enableFailOnRestore();
	checkpointCoordinator.addMasterHook(masterHook);

	// complete one checkpoint for state restore
	checkpointCoordinator.triggerCheckpoint(false);
	checkpointTriggeredLatch.await();
	final long checkpointId = checkpointCoordinator.getPendingCheckpoints().keySet().iterator().next();
	acknowledgePendingCheckpoint(scheduler, checkpointId);

	scheduler.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), attemptId, ExecutionState.FAILED));
	taskRestartExecutor.triggerScheduledTasks();
	final List<ExecutionVertexID> deployedExecutionVertices = testExecutionVertexOperations.getDeployedVertices();

	// the first task failover should be skipped on state restore failure
	final ExecutionVertexID executionVertexId = new ExecutionVertexID(onlyJobVertex.getID(), 0);
	assertThat(deployedExecutionVertices, contains(executionVertexId));

	// a global failure should be triggered on state restore failure
	masterHook.disableFailOnRestore();
	taskRestartExecutor.triggerScheduledTasks();
	assertThat(deployedExecutionVertices, contains(executionVertexId, executionVertexId));
}
 
Example 25
Source Project: flink   Source File: SchedulerBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public ExecutionState requestPartitionState(
	final IntermediateDataSetID intermediateResultId,
	final ResultPartitionID resultPartitionId) throws PartitionProducerDisposedException {

	mainThreadExecutor.assertRunningInMainThread();

	final Execution execution = executionGraph.getRegisteredExecutions().get(resultPartitionId.getProducerId());
	if (execution != null) {
		return execution.getState();
	}
	else {
		final IntermediateResult intermediateResult =
			executionGraph.getAllIntermediateResults().get(intermediateResultId);

		if (intermediateResult != null) {
			// Try to find the producing execution
			Execution producerExecution = intermediateResult
				.getPartitionById(resultPartitionId.getPartitionId())
				.getProducer()
				.getCurrentExecutionAttempt();

			if (producerExecution.getAttemptId().equals(resultPartitionId.getProducerId())) {
				return producerExecution.getState();
			} else {
				throw new PartitionProducerDisposedException(resultPartitionId);
			}
		} else {
			throw new IllegalArgumentException("Intermediate data set with ID "
				+ intermediateResultId + " not found.");
		}
	}
}
 
Example 26
Source Project: flink   Source File: ExecutionVertexSchedulingTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSlotReleasedWhenScheduledImmediately() {
	try {
		final ExecutionJobVertex ejv = getExecutionVertex(new JobVertexID());
		final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0],
				AkkaUtils.getDefaultTimeout());

		// a slot than cannot be deployed to
		final LogicalSlot slot = new TestingLogicalSlotBuilder().createTestingLogicalSlot();
		slot.releaseSlot(new Exception("Test Exception"));

		assertFalse(slot.isAlive());

		CompletableFuture<LogicalSlot> future = new CompletableFuture<>();
		future.complete(slot);

		assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
		// try to deploy to the slot
		vertex.scheduleForExecution(
			TestingSlotProviderStrategy.from(new TestingSlotProvider((i) -> future), false),
			LocationPreferenceConstraint.ALL,
			Collections.emptySet());

		// will have failed
		assertEquals(ExecutionState.FAILED, vertex.getExecutionState());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 27
Source Project: flink   Source File: LazyFromSourcesSchedulingStrategy.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void onExecutionStateChange(ExecutionVertexID executionVertexId, ExecutionState executionState) {
	if (!FINISHED.equals(executionState)) {
		return;
	}

	final Set<SchedulingExecutionVertex> verticesToSchedule = schedulingTopology.getVertexOrThrow(executionVertexId)
		.getProducedResultPartitions()
		.stream()
		.flatMap(partition -> inputConstraintChecker.markSchedulingResultPartitionFinished(partition).stream())
		.flatMap(partition -> partition.getConsumers().stream())
		.collect(Collectors.toSet());

	allocateSlotsAndDeployExecutionVertices(verticesToSchedule);
}
 
Example 28
Source Project: flink   Source File: ExecutionGraph.java    License: Apache License 2.0 5 votes vote down vote up
void notifyExecutionChange(
		final Execution execution,
		final ExecutionState newExecutionState,
		final Throwable error) {

	if (!isLegacyScheduling()) {
		return;
	}

	// see what this means for us. currently, the first FAILED state means -> FAILED
	if (newExecutionState == ExecutionState.FAILED) {
		final Throwable ex = error != null ? error : new FlinkException("Unknown Error (missing cause)");

		// by filtering out late failure calls, we can save some work in
		// avoiding redundant local failover
		if (execution.getGlobalModVersion() == globalModVersion) {
			try {
				// fail all checkpoints which the failed task has not yet acknowledged
				if (checkpointCoordinator != null) {
					checkpointCoordinator.failUnacknowledgedPendingCheckpointsFor(execution.getAttemptId(), ex);
				}

				failoverStrategy.onTaskFailure(execution, ex);
			}
			catch (Throwable t) {
				// bug in the failover strategy - fall back to global failover
				LOG.warn("Error in failover strategy - falling back to global restart", t);
				failGlobal(ex);
			}
		}
	}
}
 
Example 29
Source Project: Flink-CEPplus   Source File: SubtasksTimesHandler.java    License: Apache License 2.0 5 votes vote down vote up
private static SubtasksTimesInfo createSubtaskTimesInfo(AccessExecutionJobVertex jobVertex) {
	final String id = jobVertex.getJobVertexId().toString();
	final String name = jobVertex.getName();
	final long now = System.currentTimeMillis();
	final List<SubtasksTimesInfo.SubtaskTimeInfo> subtasks = new ArrayList<>();

	int num = 0;
	for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {

		long[] timestamps = vertex.getCurrentExecutionAttempt().getStateTimestamps();
		ExecutionState status = vertex.getExecutionState();

		long scheduledTime = timestamps[ExecutionState.SCHEDULED.ordinal()];

		long start = scheduledTime > 0 ? scheduledTime : -1;
		long end = status.isTerminal() ? timestamps[status.ordinal()] : now;
		long duration = start >= 0 ? end - start : -1L;

		TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
		String locationString = location == null ? "(unassigned)" : location.getHostname();

		Map<ExecutionState, Long> timestampMap = new HashMap<>(ExecutionState.values().length);
		for (ExecutionState state : ExecutionState.values()) {
			timestampMap.put(state, timestamps[state.ordinal()]);
		}

		subtasks.add(new SubtasksTimesInfo.SubtaskTimeInfo(
			num++,
			locationString,
			duration,
			timestampMap));
	}
	return new SubtasksTimesInfo(id, name, now, subtasks);
}
 
Example 30
Source Project: flink   Source File: Task.java    License: Apache License 2.0 5 votes vote down vote up
private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) {
	try {
		cancelOrFailAndCancelInvokableInternal(targetState, cause);
	} catch (Throwable t) {
		if (ExceptionUtils.isJvmFatalOrOutOfMemoryError(t)) {
			String message = String.format("FATAL - exception in cancelling task %s (%s).", taskNameWithSubtask, executionId);
			notifyFatalError(message, t);
		} else {
			throw t;
		}
	}
}