org.apache.flink.runtime.jobgraph.JobStatus Java Examples

The following examples show how to use org.apache.flink.runtime.jobgraph.JobStatus. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExecutionGraphSuspendTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Suspending from FAILED should do nothing.
 */
@Test
public void testSuspendedOutOfFailed() throws Exception {
	final InteractionsCountingTaskManagerGateway gateway = new InteractionsCountingTaskManagerGateway();
	final int parallelism = 10;
	final ExecutionGraph eg = createExecutionGraph(gateway, parallelism);

	eg.scheduleForExecution();
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	eg.failGlobal(new Exception("fail global"));

	assertEquals(JobStatus.FAILING, eg.getState());
	validateCancelRpcCalls(gateway, parallelism);

	ExecutionGraphTestUtils.completeCancellingForAllVertices(eg);
	assertEquals(JobStatus.FAILED, eg.getState());

	// suspend
	eg.suspend(new Exception("suspend"));

	// still in failed state
	assertEquals(JobStatus.FAILED, eg.getState());
	validateCancelRpcCalls(gateway, parallelism);
}
 
Example #2
Source File: ZooKeeperCompletedCheckpointStore.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		LOG.info("Shutting down");

		for (CompletedCheckpoint checkpoint : completedCheckpoints) {
			tryRemoveCompletedCheckpoint(
				checkpoint,
				completedCheckpoint -> completedCheckpoint.discardOnShutdown(jobStatus));
		}

		completedCheckpoints.clear();
		checkpointsInZooKeeper.deleteChildren();
	} else {
		LOG.info("Suspending");

		// Clear the local handles, but don't remove any state
		completedCheckpoints.clear();

		// Release the state handle locks in ZooKeeper such that they can be deleted
		checkpointsInZooKeeper.releaseAll();
	}
}
 
Example #3
Source File: FailoverRegionTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a new failure comes while the failover region is in CANCELLING
 * @throws Exception
 */
@Test
public void testFailWhileCancelling() throws Exception {
	RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy();
	ExecutionGraph eg = createSingleRegionExecutionGraph(restartStrategy);
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	Iterator<ExecutionVertex> iter = eg.getAllExecutionVertices().iterator();
	ExecutionVertex ev1 = iter.next();
	ev1.getCurrentExecutionAttempt().switchToRunning();
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev1).getState());

	ev1.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());

	ExecutionVertex ev2 = iter.next();
	ev2.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.RUNNING, eg.getState());
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());
}
 
Example #4
Source File: ExecutionGraph.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private boolean transitionState(JobStatus current, JobStatus newState, Throwable error) {
	assertRunningInJobMasterMainThread();
	// consistency check
	if (current.isTerminalState()) {
		String message = "Job is trying to leave terminal state " + current;
		LOG.error(message);
		throw new IllegalStateException(message);
	}

	// now do the actual state transition
	if (STATE_UPDATER.compareAndSet(this, current, newState)) {
		LOG.info("Job {} ({}) switched from state {} to {}.", getJobName(), getJobID(), current, newState, error);

		stateTimestamps[newState.ordinal()] = System.currentTimeMillis();
		notifyJobStatusChange(newState, error);
		return true;
	}
	else {
		return false;
	}
}
 
Example #5
Source File: ExecutionGraphCheckpointCoordinatorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the checkpoint coordinator is shut down if the execution graph
 * is suspended.
 */
@Test
public void testShutdownCheckpointCoordinatorOnSuspend() throws Exception {
	final CompletableFuture<JobStatus> counterShutdownFuture = new CompletableFuture<>();
	CheckpointIDCounter counter = new TestingCheckpointIDCounter(counterShutdownFuture);

	final CompletableFuture<JobStatus> storeShutdownFuture = new CompletableFuture<>();
	CompletedCheckpointStore store = new TestingCompletedCheckpointStore(storeShutdownFuture);

	ExecutionGraph graph = createExecutionGraphAndEnableCheckpointing(counter, store);
	final CheckpointCoordinator checkpointCoordinator = graph.getCheckpointCoordinator();

	assertThat(checkpointCoordinator, Matchers.notNullValue());
	assertThat(checkpointCoordinator.isShutdown(), is(false));

	graph.suspend(new Exception("Test Exception"));

	assertThat(checkpointCoordinator.isShutdown(), is(true));
	assertThat(counterShutdownFuture.get(), is(JobStatus.SUSPENDED));
	assertThat(storeShutdownFuture.get(), is(JobStatus.SUSPENDED));
}
 
Example #6
Source File: UpTimeGauge.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public Long getValue() {
	final JobStatus status = eg.getState();

	if (status == JobStatus.RUNNING) {
		// running right now - report the uptime
		final long runningTimestamp = eg.getStatusTimestamp(JobStatus.RUNNING);
		// we use 'Math.max' here to avoid negative timestamps when clocks change
		return Math.max(System.currentTimeMillis() - runningTimestamp, 0);
	}
	else if (status.isTerminalState()) {
		// not running any more -> finished or not on leader
		return NO_LONGER_RUNNING;
	}
	else {
		// not yet running or not up at the moment
		return 0L;
	}
}
 
Example #7
Source File: FailoverRegionTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a new failure comes while the failover region is restarting
 * @throws Exception
 */
@Test
public void testFailWhileRestarting() throws Exception {
	RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy();
	ExecutionGraph eg = createSingleRegionExecutionGraph(restartStrategy);
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	Iterator<ExecutionVertex> iter = eg.getAllExecutionVertices().iterator();
	ExecutionVertex ev1 = iter.next();
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev1).getState());

	ev1.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());

	for (ExecutionVertex evs : eg.getAllExecutionVertices()) {
		evs.getCurrentExecutionAttempt().completeCancelling();
	}
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev1).getState());

	ev1.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());
}
 
Example #8
Source File: JobDetailsTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that we can marshal and unmarshal JobDetails instances.
 */
@Test
public void testJobDetailsMarshalling() throws JsonProcessingException {
	final JobDetails expected = new JobDetails(
		new JobID(),
		"foobar",
		1L,
		10L,
		9L,
		JobStatus.RUNNING,
		8L,
		new int[]{1, 3, 3, 7, 4, 2, 7, 3, 3},
		42);

	final ObjectMapper objectMapper = RestMapperUtils.getStrictObjectMapper();

	final JsonNode marshalled = objectMapper.valueToTree(expected);

	final JobDetails unmarshalled = objectMapper.treeToValue(marshalled, JobDetails.class);

	assertEquals(expected, unmarshalled);
}
 
Example #9
Source File: WebMonitorMessagesTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testStatusMessages() {
	try {
		final Random rnd = new Random();
		
		GenericMessageTester.testMessageInstance(RequestJobsOverview.getInstance());
		GenericMessageTester.testMessageInstance(RequestJobsWithIDsOverview.getInstance());
		GenericMessageTester.testMessageInstance(RequestStatusOverview.getInstance());
		GenericMessageTester.testMessageInstance(RequestJobsOverview.getInstance());

		GenericMessageTester.testMessageInstance(GenericMessageTester.instantiateGeneric(RequestJobDetails.class, rnd));
		GenericMessageTester.testMessageInstance(GenericMessageTester.instantiateGeneric(ClusterOverview.class, rnd));
		GenericMessageTester.testMessageInstance(GenericMessageTester.instantiateGeneric(JobsOverview.class, rnd));
		
		GenericMessageTester.testMessageInstance(new JobIdsWithStatusOverview(Arrays.asList(
			new JobIdsWithStatusOverview.JobIdWithStatus(JobID.generate(), JobStatus.RUNNING),
			new JobIdsWithStatusOverview.JobIdWithStatus(JobID.generate(), JobStatus.CANCELED),
			new JobIdsWithStatusOverview.JobIdWithStatus(JobID.generate(), JobStatus.CREATED),
			new JobIdsWithStatusOverview.JobIdWithStatus(JobID.generate(), JobStatus.FAILED),
			new JobIdsWithStatusOverview.JobIdWithStatus(JobID.generate(), JobStatus.RESTARTING))));
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #10
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a graph is not restarted after cancellation via a call to
 * {@link ExecutionGraph#failGlobal(Throwable)}. This can happen when a slot is
 * released concurrently with cancellation.
 */
@Test
public void testFailExecutionAfterCancel() throws Exception {
	try (SlotPool slotPool = createSlotPoolImpl()) {
		ExecutionGraph eg = TestingExecutionGraphBuilder.newBuilder()
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setJobGraph(createJobGraphToCancel())
			.setNumberOfTasks(2)
			.buildAndScheduleForExecution(slotPool);

		// Fail right after cancel (for example with concurrent slot release)
		eg.cancel();

		for (ExecutionVertex v : eg.getAllExecutionVertices()) {
			v.getCurrentExecutionAttempt().fail(new Exception("Test Exception"));
		}

		assertEquals(JobStatus.CANCELED, eg.getTerminationFuture().get());

		Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();

		execution.completeCancelling();
		assertEquals(JobStatus.CANCELED, eg.getState());
	}
}
 
Example #11
Source File: FailoverRegion.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private void cancel(final long globalModVersionOfFailover) {
	executionGraph.getJobMasterMainThreadExecutor().assertRunningInMainThread();
	while (true) {
		JobStatus curStatus = this.state;
		if (curStatus.equals(JobStatus.RUNNING)) {
			if (transitionState(curStatus, JobStatus.CANCELLING)) {

				createTerminationFutureOverAllConnectedVertexes()
					.thenAccept((nullptr) -> allVerticesInTerminalState(globalModVersionOfFailover));
				break;
			}
		} else {
			LOG.info("FailoverRegion {} is {} when cancel.", id, state);
			break;
		}
	}
}
 
Example #12
Source File: JobDetailsTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that we can marshal and unmarshal JobDetails instances.
 */
@Test
public void testJobDetailsMarshalling() throws JsonProcessingException {
	final JobDetails expected = new JobDetails(
		new JobID(),
		"foobar",
		1L,
		10L,
		9L,
		JobStatus.RUNNING,
		8L,
		new int[]{1, 3, 3, 7, 4, 2, 7, 3, 3},
		42);

	final ObjectMapper objectMapper = RestMapperUtils.getStrictObjectMapper();

	final JsonNode marshalled = objectMapper.valueToTree(expected);

	final JobDetails unmarshalled = objectMapper.treeToValue(marshalled, JobDetails.class);

	assertEquals(expected, unmarshalled);
}
 
Example #13
Source File: ExecutionVertexInputConstraintTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private void waitUntilJobRestarted(ExecutionGraph eg) throws Exception {
	waitForAllExecutionsPredicate(eg,
		isInExecutionState(ExecutionState.CANCELING)
			.or(isInExecutionState(ExecutionState.CANCELED))
			.or(isInExecutionState(ExecutionState.FAILED))
			.or(isInExecutionState(ExecutionState.FINISHED)),
		2000L);

	for (ExecutionVertex ev : eg.getAllExecutionVertices()) {
		if (ev.getCurrentExecutionAttempt().getState() == ExecutionState.CANCELING) {
			ev.getCurrentExecutionAttempt().completeCancelling();
		}
	}

	waitUntilJobStatus(eg, JobStatus.RUNNING, 2000L);
}
 
Example #14
Source File: ExecutionGraphSuspendTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void ensureCannotLeaveSuspendedState(ExecutionGraph eg, InteractionsCountingTaskManagerGateway gateway) {
	gateway.waitUntilAllTasksAreSubmitted();
	assertEquals(JobStatus.SUSPENDED, eg.getState());
	gateway.resetCounts();

	eg.failGlobal(new Exception("fail"));
	assertEquals(JobStatus.SUSPENDED, eg.getState());
	validateNoInteractions(gateway);

	eg.cancel();
	assertEquals(JobStatus.SUSPENDED, eg.getState());
	validateNoInteractions(gateway);

	eg.suspend(new Exception("suspend again"));
	assertEquals(JobStatus.SUSPENDED, eg.getState());
	validateNoInteractions(gateway);

	for (ExecutionVertex ev : eg.getAllExecutionVertices()) {
		assertEquals(0, ev.getCurrentExecutionAttempt().getAttemptNumber());
	}
}
 
Example #15
Source File: DispatcherTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that we wait until the JobMaster has gained leader ship before sending requests
 * to it. See FLINK-8887.
 */
@Test
public void testWaitingForJobMasterLeadership() throws Exception {
	dispatcher = createAndStartDispatcher(heartbeatServices, haServices, new ExpectedJobIdJobManagerRunnerFactory(TEST_JOB_ID, createdJobManagerRunnerLatch));

	final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);

	dispatcherLeaderElectionService.isLeader(UUID.randomUUID()).get();

	dispatcherGateway.submitJob(jobGraph, TIMEOUT).get();

	final CompletableFuture<JobStatus> jobStatusFuture = dispatcherGateway.requestJobStatus(jobGraph.getJobID(), TIMEOUT);

	assertThat(jobStatusFuture.isDone(), is(false));

	try {
		jobStatusFuture.get(10, TimeUnit.MILLISECONDS);
		fail("Should not complete.");
	} catch (TimeoutException ignored) {
		// ignored
	}

	jobMasterLeaderElectionService.isLeader(UUID.randomUUID()).get();

	assertThat(jobStatusFuture.get(), notNullValue());
}
 
Example #16
Source File: ZooKeeperCompletedCheckpointStoreTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that checkpoints are discarded when the completed checkpoint store is shut
 * down with a globally terminal state.
 */
@Test
public void testDiscardingCheckpointsAtShutDown() throws Exception {
	final SharedStateRegistry sharedStateRegistry = new SharedStateRegistry();
	final Configuration configuration = new Configuration();
	configuration.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperResource.getConnectString());

	final CuratorFramework client = ZooKeeperUtils.startCuratorFramework(configuration);
	final ZooKeeperCompletedCheckpointStore checkpointStore = createZooKeeperCheckpointStore(client);

	try {
		final CompletedCheckpointStoreTest.TestCompletedCheckpoint checkpoint1 = CompletedCheckpointStoreTest.createCheckpoint(0, sharedStateRegistry);

		checkpointStore.addCheckpoint(checkpoint1);
		assertThat(checkpointStore.getAllCheckpoints(), Matchers.contains(checkpoint1));

		checkpointStore.shutdown(JobStatus.FINISHED);

		// verify that the checkpoint is discarded
		CompletedCheckpointStoreTest.verifyCheckpointDiscarded(checkpoint1);
	} finally {
		client.close();
	}
}
 
Example #17
Source File: ExecutionGraphSuspendTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Going into SUSPENDED out of DEPLOYING vertices should cancel all vertices once with RPC calls.
 */
@Test
public void testSuspendedOutOfDeploying() throws Exception {
	final int parallelism = 10;
	final InteractionsCountingTaskManagerGateway gateway = new InteractionsCountingTaskManagerGateway(parallelism);
	final ExecutionGraph eg = createExecutionGraph(gateway, parallelism);

	eg.scheduleForExecution();
	assertEquals(JobStatus.RUNNING, eg.getState());
	validateAllVerticesInState(eg, ExecutionState.DEPLOYING);

	// suspend
	eg.suspend(new Exception("suspend"));

	assertEquals(JobStatus.SUSPENDED, eg.getState());
	validateCancelRpcCalls(gateway, parallelism);

	ensureCannotLeaveSuspendedState(eg, gateway);
}
 
Example #18
Source File: ExecutionGraphVariousFailuesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a failing scheduleOrUpdateConsumers call with a non-existing execution attempt
 * id, will not fail the execution graph.
 */
@Test
public void testFailingScheduleOrUpdateConsumers() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10));
	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	IntermediateResultPartitionID intermediateResultPartitionId = new IntermediateResultPartitionID();
	ExecutionAttemptID producerId = new ExecutionAttemptID();
	ResultPartitionID resultPartitionId = new ResultPartitionID(intermediateResultPartitionId, producerId);

	// The execution attempt id does not exist and thus the scheduleOrUpdateConsumers call
	// should fail

	try {
		eg.scheduleOrUpdateConsumers(resultPartitionId);
		fail("Expected ExecutionGraphException.");
	} catch (ExecutionGraphException e) {
		// we've expected this exception to occur
	}

	assertEquals(JobStatus.RUNNING, eg.getState());
}
 
Example #19
Source File: ExecutionGraphCheckpointCoordinatorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the checkpoint coordinator is shut down if the execution graph
 * is failed.
 */
@Test
public void testShutdownCheckpointCoordinatorOnFailure() throws Exception {
	final CompletableFuture<JobStatus> counterShutdownFuture = new CompletableFuture<>();
	CheckpointIDCounter counter = new TestingCheckpointIDCounter(counterShutdownFuture);

	final CompletableFuture<JobStatus> storeShutdownFuture = new CompletableFuture<>();
	CompletedCheckpointStore store = new TestingCompletedCheckpointStore(storeShutdownFuture);

	ExecutionGraph graph = createExecutionGraphAndEnableCheckpointing(counter, store);
	final CheckpointCoordinator checkpointCoordinator = graph.getCheckpointCoordinator();

	assertThat(checkpointCoordinator, Matchers.notNullValue());
	assertThat(checkpointCoordinator.isShutdown(), is(false));

	graph.failGlobal(new Exception("Test Exception"));

	assertThat(checkpointCoordinator.isShutdown(), is(true));
	assertThat(counterShutdownFuture.get(), is(JobStatus.FAILED));
	assertThat(storeShutdownFuture.get(), is(JobStatus.FAILED));
}
 
Example #20
Source File: ExecutionGraphTestUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void waitUntilFailoverRegionState(FailoverRegion region, JobStatus status, long maxWaitMillis)
		throws TimeoutException {
	checkNotNull(region);
	checkNotNull(status);
	checkArgument(maxWaitMillis >= 0);

	// this is a poor implementation - we may want to improve it eventually
	final long deadline = maxWaitMillis == 0 ? Long.MAX_VALUE : System.nanoTime() + (maxWaitMillis * 1_000_000);

	while (region.getState() != status && System.nanoTime() < deadline) {
		try {
			Thread.sleep(2);
		} catch (InterruptedException ignored) {}
	}

	if (System.nanoTime() >= deadline) {
		throw new TimeoutException();
	}
}
 
Example #21
Source File: ExecutionGraphSuspendTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Going into SUSPENDED out of DEPLOYING vertices should cancel all vertices once with RPC calls.
 */
@Test
public void testSuspendedOutOfDeploying() throws Exception {
	final int parallelism = 10;
	final InteractionsCountingTaskManagerGateway gateway = new InteractionsCountingTaskManagerGateway(parallelism);
	final ExecutionGraph eg = createExecutionGraph(gateway, parallelism);

	eg.scheduleForExecution();
	assertEquals(JobStatus.RUNNING, eg.getState());
	validateAllVerticesInState(eg, ExecutionState.DEPLOYING);

	// suspend
	eg.suspend(new Exception("suspend"));

	assertEquals(JobStatus.SUSPENDED, eg.getState());
	validateCancelRpcCalls(gateway, parallelism);

	ensureCannotLeaveSuspendedState(eg, gateway);
}
 
Example #22
Source File: DispatcherTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that we wait until the JobMaster has gained leader ship before sending requests
 * to it. See FLINK-8887.
 */
@Test
public void testWaitingForJobMasterLeadership() throws Exception {
	dispatcher = createAndStartDispatcher(heartbeatServices, haServices, new ExpectedJobIdJobManagerRunnerFactory(TEST_JOB_ID, createdJobManagerRunnerLatch));

	final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);

	dispatcherLeaderElectionService.isLeader(UUID.randomUUID()).get();

	dispatcherGateway.submitJob(jobGraph, TIMEOUT).get();

	final CompletableFuture<JobStatus> jobStatusFuture = dispatcherGateway.requestJobStatus(jobGraph.getJobID(), TIMEOUT);

	assertThat(jobStatusFuture.isDone(), is(false));

	try {
		jobStatusFuture.get(10, TimeUnit.MILLISECONDS);
		fail("Should not complete.");
	} catch (TimeoutException ignored) {
		// ignored
	}

	jobMasterLeaderElectionService.isLeader(UUID.randomUUID()).get();

	assertThat(jobStatusFuture.get(), notNullValue());
}
 
Example #23
Source File: AbstractOperatorRestoreTestBase.java    From flink with Apache License 2.0 6 votes vote down vote up
private void restoreJob(ClassLoader classLoader, ClusterClient<?> clusterClient, Deadline deadline, String savepointPath) throws Exception {
	JobGraph jobToRestore = createJobGraph(ExecutionMode.RESTORE);
	jobToRestore.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath, allowNonRestoredState));

	assertNotNull("Job doesn't have a JobID.", jobToRestore.getJobID());

	clusterClient.submitJob(jobToRestore, classLoader);

	CompletableFuture<JobStatus> jobStatusFuture = FutureUtils.retrySuccessfulWithDelay(
		() -> clusterClient.getJobStatus(jobToRestore.getJobID()),
		Time.milliseconds(50),
		deadline,
		(jobStatus) -> jobStatus == JobStatus.FINISHED,
		TestingUtils.defaultScheduledExecutor());
	assertEquals(
		JobStatus.FINISHED,
		jobStatusFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
}
 
Example #24
Source File: DispatcherResourceCleanupTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the {@link RunningJobsRegistry} entries are cleared after the
 * job reached a terminal state.
 */
@Test
public void testRunningJobsRegistryCleanup() throws Exception {
	submitJob();

	runningJobsRegistry.setJobRunning(jobId);
	assertThat(runningJobsRegistry.contains(jobId), is(true));

	resultFuture.complete(new ArchivedExecutionGraphBuilder().setState(JobStatus.FINISHED).setJobID(jobId).build());
	terminationFuture.complete(null);

	// wait for the clearing
	clearedJobLatch.await();

	assertThat(runningJobsRegistry.contains(jobId), is(false));
}
 
Example #25
Source File: DispatcherResourceCleanupTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the {@link RunningJobsRegistry} entries are cleared after the
 * job reached a terminal state.
 */
@Test
public void testRunningJobsRegistryCleanup() throws Exception {
	submitJob();

	runningJobsRegistry.setJobRunning(jobId);
	assertThat(runningJobsRegistry.contains(jobId), is(true));

	resultFuture.complete(new ArchivedExecutionGraphBuilder().setState(JobStatus.FINISHED).setJobID(jobId).build());
	terminationFuture.complete(null);

	// wait for the clearing
	clearedJobLatch.await();

	assertThat(runningJobsRegistry.contains(jobId), is(false));
}
 
Example #26
Source File: ExecutionGraphSuspendTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Going into SUSPENDED out of RUNNING vertices should cancel all vertices once with RPC calls.
 */
@Test
public void testSuspendedOutOfRunning() throws Exception {
	final int parallelism = 10;
	final InteractionsCountingTaskManagerGateway gateway = new InteractionsCountingTaskManagerGateway(parallelism);
	final ExecutionGraph eg = createExecutionGraph(gateway, parallelism);

	eg.scheduleForExecution();
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	assertEquals(JobStatus.RUNNING, eg.getState());
	validateAllVerticesInState(eg, ExecutionState.RUNNING);

	// suspend
	eg.suspend(new Exception("suspend"));

	assertEquals(JobStatus.SUSPENDED, eg.getState());
	validateCancelRpcCalls(gateway, parallelism);

	ensureCannotLeaveSuspendedState(eg, gateway);
}
 
Example #27
Source File: JobDetails.java    From flink with Apache License 2.0 6 votes vote down vote up
public JobDetails(
		JobID jobId,
		String jobName,
		long startTime,
		long endTime,
		long duration,
		JobStatus status,
		long lastUpdateTime,
		int[] tasksPerState,
		int numTasks) {

	this.jobId = checkNotNull(jobId);
	this.jobName = checkNotNull(jobName);
	this.startTime = startTime;
	this.endTime = endTime;
	this.duration = duration;
	this.status = checkNotNull(status);
	this.lastUpdateTime = lastUpdateTime;
	Preconditions.checkArgument(tasksPerState.length == ExecutionState.values().length, 
		"tasksPerState argument must be of size %s.", ExecutionState.values().length);
	this.tasksPerState = checkNotNull(tasksPerState);
	this.numTasks = numTasks;
}
 
Example #28
Source File: CheckpointIDCounterTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests serial increment and get calls.
 */
@Test
public void testSerialIncrementAndGet() throws Exception {
	final CheckpointIDCounter counter = createCompletedCheckpoints();

	try {
		counter.start();

		assertEquals(1, counter.getAndIncrement());
		assertEquals(2, counter.getAndIncrement());
		assertEquals(3, counter.getAndIncrement());
		assertEquals(4, counter.getAndIncrement());
	}
	finally {
		counter.shutdown(JobStatus.FINISHED);
	}
}
 
Example #29
Source File: ExecutionGraphCheckpointCoordinatorTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the checkpoint coordinator is shut down if the execution graph
 * is failed.
 */
@Test
public void testShutdownCheckpointCoordinatorOnFailure() throws Exception {
	final CompletableFuture<JobStatus> counterShutdownFuture = new CompletableFuture<>();
	CheckpointIDCounter counter = new TestingCheckpointIDCounter(counterShutdownFuture);

	final CompletableFuture<JobStatus> storeShutdownFuture = new CompletableFuture<>();
	CompletedCheckpointStore store = new TestingCompletedCheckpointStore(storeShutdownFuture);

	ExecutionGraph graph = createExecutionGraphAndEnableCheckpointing(counter, store);
	final CheckpointCoordinator checkpointCoordinator = graph.getCheckpointCoordinator();

	assertThat(checkpointCoordinator, Matchers.notNullValue());
	assertThat(checkpointCoordinator.isShutdown(), is(false));

	graph.failGlobal(new Exception("Test Exception"));

	assertThat(checkpointCoordinator.isShutdown(), is(true));
	assertThat(counterShutdownFuture.get(), is(JobStatus.FAILED));
	assertThat(storeShutdownFuture.get(), is(JobStatus.FAILED));
}
 
Example #30
Source File: ExecutionGraphRestartTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private Tuple2<ExecutionGraph, Instance> createExecutionGraph(RestartStrategy restartStrategy) throws Exception {
	Instance instance = ExecutionGraphTestUtils.getInstance(
		new ActorTaskManagerGateway(
			new SimpleActorGateway(TestingUtils.directExecutionContext())),
		NUM_TASKS);

	Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
	scheduler.newInstanceAvailable(instance);

	ExecutionGraph eg = createSimpleExecutionGraph(restartStrategy, scheduler);

	assertEquals(JobStatus.CREATED, eg.getState());

	eg.scheduleForExecution();
	assertEquals(JobStatus.RUNNING, eg.getState());
	return new Tuple2<>(eg, instance);
}