org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy Java Examples

The following examples show how to use org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that it is possible to fail a graph via a call to
 * {@link ExecutionGraph#failGlobal(Throwable)} after cancellation.
 */
@Test
public void testFailExecutionGraphAfterCancel() throws Exception {
	try (SlotPool slotPool = createSlotPoolImpl()) {
		ExecutionGraph eg = TestingExecutionGraphBuilder.newBuilder()
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setJobGraph(createJobGraphToCancel())
			.setNumberOfTasks(2)
			.buildAndScheduleForExecution(slotPool);

		// Fail right after cancel (for example with concurrent slot release)
		eg.cancel();
		assertEquals(JobStatus.CANCELLING, eg.getState());

		eg.failGlobal(new Exception("Test Exception"));
		assertEquals(JobStatus.FAILING, eg.getState());

		Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();

		execution.completeCancelling();
		assertEquals(JobStatus.RESTARTING, eg.getState());
	}
}
 
Example #2
Source File: ExecutionGraphVariousFailuesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a failing scheduleOrUpdateConsumers call with a non-existing execution attempt
 * id, will not fail the execution graph.
 */
@Test
public void testFailingScheduleOrUpdateConsumers() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10));
	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	IntermediateResultPartitionID intermediateResultPartitionId = new IntermediateResultPartitionID();
	ExecutionAttemptID producerId = new ExecutionAttemptID();
	ResultPartitionID resultPartitionId = new ResultPartitionID(intermediateResultPartitionId, producerId);

	// The execution attempt id does not exist and thus the scheduleOrUpdateConsumers call
	// should fail

	try {
		eg.scheduleOrUpdateConsumers(resultPartitionId);
		fail("Expected ExecutionGraphException.");
	} catch (ExecutionGraphException e) {
		// we've expected this exception to occur
	}

	assertEquals(JobStatus.RUNNING, eg.getState());
}
 
Example #3
Source File: ExecutionGraphVariousFailuesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a {@link SuppressRestartsException} in state RESTARTING stops the restarting
 * immediately and sets the execution graph's state to FAILED.
 */
@Test
public void testSuppressRestartFailureWhileRestarting() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10));
	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	eg.failGlobal(new Exception("test"));
	assertEquals(JobStatus.FAILING, eg.getState());

	ExecutionGraphTestUtils.completeCancellingForAllVertices(eg);
	assertEquals(JobStatus.RESTARTING, eg.getState());

	// suppress a possible restart
	eg.failGlobal(new SuppressRestartsException(new Exception("Test")));

	assertEquals(JobStatus.FAILED, eg.getState());
}
 
Example #4
Source File: GlobalModVersionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private ExecutionGraph createSampleGraph(FailoverStrategy failoverStrategy) throws Exception {
	final JobID jid = new JobID();
	final int parallelism = new Random().nextInt(10) + 1;

	JobVertex jv = new JobVertex("test vertex");
	jv.setInvokableClass(NoOpInvokable.class);
	jv.setParallelism(parallelism);

	JobGraph jg = new JobGraph(jid, "testjob", jv);

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(parallelism);

	// build a simple execution graph with on job vertex, parallelism 2
	final ExecutionGraph graph = TestingExecutionGraphBuilder
		.newBuilder()
		.setJobGraph(jg)
		.setRestartStrategy(new InfiniteDelayRestartStrategy())
		.setFailoverStrategyFactory(new CustomStrategy(failoverStrategy))
		.setSlotProvider(slotProvider)
		.build();

	graph.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());

	return graph;
}
 
Example #5
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that it is possible to fail a graph via a call to
 * {@link ExecutionGraph#failGlobal(Throwable)} after cancellation.
 */
@Test
public void testFailExecutionGraphAfterCancel() throws Exception {
	try (SlotPool slotPool = createSlotPoolImpl()) {
		ExecutionGraph eg = TestingExecutionGraphBuilder
			.newBuilder()
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setJobGraph(createJobGraphToCancel())
			.setSlotProvider(createSchedulerWithSlots(slotPool, new LocalTaskManagerLocation(), 2))
			.build();

		startAndScheduleExecutionGraph(eg);

		// Fail right after cancel (for example with concurrent slot release)
		eg.cancel();
		assertEquals(JobStatus.CANCELLING, eg.getState());

		eg.failGlobal(new Exception("Test Exception"));
		assertEquals(JobStatus.FAILING, eg.getState());

		Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();

		execution.completeCancelling();
		assertEquals(JobStatus.RESTARTING, eg.getState());
	}
}
 
Example #6
Source File: ExecutionGraphSuspendTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that we can suspend a job when in state RESTARTING.
 */
@Test
public void testSuspendWhileRestarting() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10));
	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	eg.failGlobal(new Exception("test"));
	assertEquals(JobStatus.FAILING, eg.getState());

	ExecutionGraphTestUtils.completeCancellingForAllVertices(eg);
	assertEquals(JobStatus.RESTARTING, eg.getState());

	final Exception exception = new Exception("Suspended");

	eg.suspend(exception);

	assertEquals(JobStatus.SUSPENDED, eg.getState());

	assertEquals(exception, eg.getFailureCause());
}
 
Example #7
Source File: ExecutionGraphVariousFailuesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a failing scheduleOrUpdateConsumers call with a non-existing execution attempt
 * id, will not fail the execution graph.
 */
@Test
public void testFailingScheduleOrUpdateConsumers() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10));
	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	IntermediateResultPartitionID intermediateResultPartitionId = new IntermediateResultPartitionID();
	ExecutionAttemptID producerId = new ExecutionAttemptID();
	ResultPartitionID resultPartitionId = new ResultPartitionID(intermediateResultPartitionId, producerId);

	// The execution attempt id does not exist and thus the scheduleOrUpdateConsumers call
	// should fail

	try {
		eg.scheduleOrUpdateConsumers(resultPartitionId);
		fail("Expected ExecutionGraphException.");
	} catch (ExecutionGraphException e) {
		// we've expected this exception to occur
	}

	assertEquals(JobStatus.RUNNING, eg.getState());
}
 
Example #8
Source File: ExecutionGraphVariousFailuesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a {@link SuppressRestartsException} in state RESTARTING stops the restarting
 * immediately and sets the execution graph's state to FAILED.
 */
@Test
public void testSuppressRestartFailureWhileRestarting() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10));
	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	eg.failGlobal(new Exception("test"));
	assertEquals(JobStatus.FAILING, eg.getState());

	ExecutionGraphTestUtils.completeCancellingForAllVertices(eg);
	assertEquals(JobStatus.RESTARTING, eg.getState());

	// suppress a possible restart
	eg.failGlobal(new SuppressRestartsException(new Exception("Test")));

	assertEquals(JobStatus.FAILED, eg.getState());
}
 
Example #9
Source File: FailoverRegionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a new failure comes while the failover region is restarting.
 * @throws Exception if fail to create the single region execution graph.
 */
@Test
public void testFailWhileRestarting() throws Exception {
	RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy();
	ExecutionGraph eg = createSingleRegionExecutionGraph(restartStrategy);
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	Iterator<ExecutionVertex> iter = eg.getAllExecutionVertices().iterator();
	ExecutionVertex ev1 = iter.next();
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev1).getState());

	ev1.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());

	for (ExecutionVertex evs : eg.getAllExecutionVertices()) {
		evs.getCurrentExecutionAttempt().completeCancelling();
	}
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev1).getState());

	ev1.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());
}
 
Example #10
Source File: FailoverRegionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a new failure comes while the failover region is in CANCELLING.
 * @throws Exception if fail to create the single region execution graph.
 */
@Test
public void testFailWhileCancelling() throws Exception {
	RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy();
	ExecutionGraph eg = createSingleRegionExecutionGraph(restartStrategy);
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	Iterator<ExecutionVertex> iter = eg.getAllExecutionVertices().iterator();
	ExecutionVertex ev1 = iter.next();
	ev1.getCurrentExecutionAttempt().switchToRunning();
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev1).getState());

	ev1.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());

	ExecutionVertex ev2 = iter.next();
	ev2.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.RUNNING, eg.getState());
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());
}
 
Example #11
Source File: ExecutionGraphSuspendTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that we can suspend a job when in state RESTARTING.
 */
@Test
public void testSuspendWhileRestarting() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10));
	eg.start(TestingComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	eg.failGlobal(new Exception("test"));
	assertEquals(JobStatus.FAILING, eg.getState());

	ExecutionGraphTestUtils.completeCancellingForAllVertices(eg);
	assertEquals(JobStatus.RESTARTING, eg.getState());

	final Exception exception = new Exception("Suspended");

	eg.suspend(exception);

	assertEquals(JobStatus.SUSPENDED, eg.getState());

	assertEquals(exception, eg.getFailureCause());
}
 
Example #12
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a graph is not restarted after cancellation via a call to
 * {@link ExecutionGraph#failGlobal(Throwable)}. This can happen when a slot is
 * released concurrently with cancellation.
 */
@Test
public void testFailExecutionAfterCancel() throws Exception {
	try (SlotPool slotPool = createSlotPoolImpl()) {
		ExecutionGraph eg = TestingExecutionGraphBuilder.newBuilder()
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setJobGraph(createJobGraphToCancel())
			.setNumberOfTasks(2)
			.buildAndScheduleForExecution(slotPool);

		// Fail right after cancel (for example with concurrent slot release)
		eg.cancel();

		for (ExecutionVertex v : eg.getAllExecutionVertices()) {
			v.getCurrentExecutionAttempt().fail(new Exception("Test Exception"));
		}

		assertEquals(JobStatus.CANCELED, eg.getTerminationFuture().get());

		Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();

		execution.completeCancelling();
		assertEquals(JobStatus.CANCELED, eg.getState());
	}
}
 
Example #13
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testTaskFailingWhileGlobalFailing() throws Exception {
	try (SlotPool slotPool = createSlotPoolImpl()) {
		final ExecutionGraph graph = TestingExecutionGraphBuilder.newBuilder()
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setFailoverStrategyFactory(new TestFailoverStrategy.Factory())
			.buildAndScheduleForExecution(slotPool);
		final TestFailoverStrategy failoverStrategy = (TestFailoverStrategy) graph.getFailoverStrategy();

		// switch all tasks to running
		for (ExecutionVertex vertex : graph.getVerticesTopologically().iterator().next().getTaskVertices()) {
			vertex.getCurrentExecutionAttempt().switchToRunning();
		}

		graph.failGlobal(new Exception("test"));

		graph.getAllExecutionVertices().iterator().next().fail(new Exception("Test task failure"));

		// no local failover should happen when in global failover cancelling
		assertEquals(0, failoverStrategy.getLocalFailoverCount());
	}

}
 
Example #14
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCancelWhileRestarting() throws Exception {
	// We want to manually control the restart and delay
	try (SlotPool slotPool = createSlotPoolImpl()) {
		TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
		final ExecutionGraph executionGraph = TestingExecutionGraphBuilder.newBuilder()
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setTaskManagerLocation(taskManagerLocation)
			.buildAndScheduleForExecution(slotPool);

		// Release the TaskManager and wait for the job to restart
		slotPool.releaseTaskManager(taskManagerLocation.getResourceID(), new Exception("Test Exception"));
		assertEquals(JobStatus.RESTARTING, executionGraph.getState());

		// Canceling needs to abort the restart
		executionGraph.cancel();

		assertEquals(JobStatus.CANCELED, executionGraph.getState());

		// The restart has been aborted
		executionGraph.restart(executionGraph.getGlobalModVersion());

		assertEquals(JobStatus.CANCELED, executionGraph.getState());
	}

}
 
Example #15
Source File: ExecutionGraphSuspendTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that we can suspend a job when in state RESTARTING.
 */
@Test
public void testSuspendWhileRestarting() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10));
	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	eg.failGlobal(new Exception("test"));
	assertEquals(JobStatus.FAILING, eg.getState());

	ExecutionGraphTestUtils.completeCancellingForAllVertices(eg);
	assertEquals(JobStatus.RESTARTING, eg.getState());

	final Exception exception = new Exception("Suspended");

	eg.suspend(exception);

	assertEquals(JobStatus.SUSPENDED, eg.getState());

	assertEquals(exception, eg.getFailureCause());
}
 
Example #16
Source File: ExecutionGraphVariousFailuesTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a {@link SuppressRestartsException} in state RESTARTING stops the restarting
 * immediately and sets the execution graph's state to FAILED.
 */
@Test
public void testSuppressRestartFailureWhileRestarting() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10));
	eg.start(TestingComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	eg.failGlobal(new Exception("test"));
	assertEquals(JobStatus.FAILING, eg.getState());

	ExecutionGraphTestUtils.completeCancellingForAllVertices(eg);
	assertEquals(JobStatus.RESTARTING, eg.getState());

	// suppress a possible restart
	eg.failGlobal(new SuppressRestartsException(new Exception("Test")));

	assertEquals(JobStatus.FAILED, eg.getState());
}
 
Example #17
Source File: ExecutionGraphRestartTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testCancelWhileRestarting() throws Exception {
	// We want to manually control the restart and delay
	RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy();
	Tuple2<ExecutionGraph, Instance> executionGraphInstanceTuple = createExecutionGraph(restartStrategy);
	ExecutionGraph executionGraph = executionGraphInstanceTuple.f0;
	Instance instance = executionGraphInstanceTuple.f1;

	// Kill the instance and wait for the job to restart
	instance.markDead();
	Assert.assertEquals(JobStatus.RESTARTING, executionGraph.getState());

	assertEquals(JobStatus.RESTARTING, executionGraph.getState());

	// Canceling needs to abort the restart
	executionGraph.cancel();

	assertEquals(JobStatus.CANCELED, executionGraph.getState());

	// The restart has been aborted
	executionGraph.restart(executionGraph.getGlobalModVersion());

	assertEquals(JobStatus.CANCELED, executionGraph.getState());
}
 
Example #18
Source File: FailoverRegionTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a job only has one failover region and can recover from task failure successfully
 * @throws Exception
 */
@Test
public void testSingleRegionFailover() throws Exception {
	RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy(10);
	ExecutionGraph eg = createSingleRegionExecutionGraph(restartStrategy);
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	ExecutionVertex ev = eg.getAllExecutionVertices().iterator().next();

	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev).getState());

	ev.getCurrentExecutionAttempt().fail(new Exception("Test Exception"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev).getState());

	for (ExecutionVertex evs : eg.getAllExecutionVertices()) {
		evs.getCurrentExecutionAttempt().completeCancelling();
	}
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev).getState());
}
 
Example #19
Source File: FailoverRegionTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a new failure comes while the failover region is in CANCELLING
 * @throws Exception
 */
@Test
public void testFailWhileCancelling() throws Exception {
	RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy();
	ExecutionGraph eg = createSingleRegionExecutionGraph(restartStrategy);
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	Iterator<ExecutionVertex> iter = eg.getAllExecutionVertices().iterator();
	ExecutionVertex ev1 = iter.next();
	ev1.getCurrentExecutionAttempt().switchToRunning();
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev1).getState());

	ev1.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());

	ExecutionVertex ev2 = iter.next();
	ev2.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.RUNNING, eg.getState());
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());
}
 
Example #20
Source File: FailoverRegionTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a new failure comes while the failover region is restarting
 * @throws Exception
 */
@Test
public void testFailWhileRestarting() throws Exception {
	RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy();
	ExecutionGraph eg = createSingleRegionExecutionGraph(restartStrategy);
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	Iterator<ExecutionVertex> iter = eg.getAllExecutionVertices().iterator();
	ExecutionVertex ev1 = iter.next();
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev1).getState());

	ev1.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());

	for (ExecutionVertex evs : eg.getAllExecutionVertices()) {
		evs.getCurrentExecutionAttempt().completeCancelling();
	}
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev1).getState());

	ev1.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev1).getState());
}
 
Example #21
Source File: ExecutionGraphVariousFailuesTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a failing scheduleOrUpdateConsumers call with a non-existing execution attempt
 * id, will not fail the execution graph.
 */
@Test
public void testFailingScheduleOrUpdateConsumers() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10));
	eg.start(TestingComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	IntermediateResultPartitionID intermediateResultPartitionId = new IntermediateResultPartitionID();
	ExecutionAttemptID producerId = new ExecutionAttemptID();
	ResultPartitionID resultPartitionId = new ResultPartitionID(intermediateResultPartitionId, producerId);

	// The execution attempt id does not exist and thus the scheduleOrUpdateConsumers call
	// should fail

	try {
		eg.scheduleOrUpdateConsumers(resultPartitionId);
		fail("Expected ExecutionGraphException.");
	} catch (ExecutionGraphException e) {
		// we've expected this exception to occur
	}

	assertEquals(JobStatus.RUNNING, eg.getState());
}
 
Example #22
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testFailWhileRestarting() throws Exception {
	try (SlotPool slotPool = createSlotPoolImpl()) {
		TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
		final ExecutionGraph executionGraph = TestingExecutionGraphBuilder.newBuilder()
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setTaskManagerLocation(taskManagerLocation)
			.buildAndScheduleForExecution(slotPool);

		// Release the TaskManager and wait for the job to restart
		slotPool.releaseTaskManager(taskManagerLocation.getResourceID(), new Exception("Test Exception"));

		assertEquals(JobStatus.RESTARTING, executionGraph.getState());

		// If we fail when being in RESTARTING, then we should try to restart again
		final long globalModVersion = executionGraph.getGlobalModVersion();
		final Exception testException = new Exception("Test exception");
		executionGraph.failGlobal(testException);

		assertNotEquals(globalModVersion, executionGraph.getGlobalModVersion());
		assertEquals(JobStatus.RESTARTING, executionGraph.getState());
		assertEquals(testException, executionGraph.getFailureCause()); // we should have updated the failure cause

		// but it should fail when sending a SuppressRestartsException
		executionGraph.failGlobal(new SuppressRestartsException(new Exception("Suppress restart exception")));

		assertEquals(JobStatus.FAILED, executionGraph.getState());

		// The restart has been aborted
		executionGraph.restart(executionGraph.getGlobalModVersion());

		assertEquals(JobStatus.FAILED, executionGraph.getState());
	}
}
 
Example #23
Source File: ExecutionGraphVariousFailuesTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test that failing in state restarting will retrigger the restarting logic. This means that
 * it only goes into the state FAILED after the restart strategy says the job is no longer
 * restartable.
 */
@Test
public void testFailureWhileRestarting() throws Exception {
	final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(2));
	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	assertEquals(JobStatus.RUNNING, eg.getState());
	ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);

	eg.failGlobal(new Exception("Test 1"));
	assertEquals(JobStatus.FAILING, eg.getState());
	ExecutionGraphTestUtils.completeCancellingForAllVertices(eg);

	// we should restart since we have two restart attempts left
	assertEquals(JobStatus.RESTARTING, eg.getState());

	eg.failGlobal(new Exception("Test 2"));

	// we should restart since we have one restart attempts left
	assertEquals(JobStatus.RESTARTING, eg.getState());

	eg.failGlobal(new Exception("Test 3"));

	// after depleting all our restart attempts we should go into Failed
	assertEquals(JobStatus.FAILED, eg.getState());
}
 
Example #24
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that a graph is not restarted after cancellation via a call to
 * {@link ExecutionGraph#failGlobal(Throwable)}. This can happen when a slot is
 * released concurrently with cancellation.
 */
@Test
public void testFailExecutionAfterCancel() throws Exception {
	try (SlotPool slotPool = createSlotPoolImpl()) {
		ExecutionGraph eg = TestingExecutionGraphBuilder.newBuilder()
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setJobGraph(createJobGraphToCancel())
			.setSlotProvider(createSchedulerWithSlots(slotPool, new LocalTaskManagerLocation(), 2))
			.build();

		startAndScheduleExecutionGraph(eg);

		// Fail right after cancel (for example with concurrent slot release)
		eg.cancel();

		for (ExecutionVertex v : eg.getAllExecutionVertices()) {
			v.getCurrentExecutionAttempt().fail(new Exception("Test Exception"));
		}

		assertEquals(JobStatus.CANCELED, eg.getTerminationFuture().get());

		Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();

		execution.completeCancelling();
		assertEquals(JobStatus.CANCELED, eg.getState());
	}
}
 
Example #25
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTaskFailingWhileGlobalFailing() throws Exception {
	try (SlotPool slotPool = createSlotPoolImpl()) {
		final ExecutionGraph graph = TestingExecutionGraphBuilder
			.newBuilder()
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setFailoverStrategyFactory(new TestFailoverStrategy.Factory())
			.setJobGraph(createJobGraph())
			.setSlotProvider(createSchedulerWithSlots(slotPool))
			.build();

		startAndScheduleExecutionGraph(graph);

		final TestFailoverStrategy failoverStrategy = (TestFailoverStrategy) graph.getFailoverStrategy();

		// switch all tasks to running
		for (ExecutionVertex vertex : graph.getVerticesTopologically().iterator().next().getTaskVertices()) {
			vertex.getCurrentExecutionAttempt().switchToRunning();
		}

		graph.failGlobal(new Exception("test"));

		graph.getAllExecutionVertices().iterator().next().fail(new Exception("Test task failure"));

		// no local failover should happen when in global failover cancelling
		assertEquals(0, failoverStrategy.getLocalFailoverCount());
	}

}
 
Example #26
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCancelWhileFailing() throws Exception {
	try (SlotPool slotPool = createSlotPoolImpl()) {
		final ExecutionGraph graph = TestingExecutionGraphBuilder
			.newBuilder()
			.setJobGraph(createJobGraph())
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setSlotProvider(createSchedulerWithSlots(slotPool))
			.build();

		startAndScheduleExecutionGraph(graph);

		assertEquals(JobStatus.RUNNING, graph.getState());

		// switch all tasks to running
		for (ExecutionVertex vertex : graph.getVerticesTopologically().iterator().next().getTaskVertices()) {
			vertex.getCurrentExecutionAttempt().switchToRunning();
		}

		graph.failGlobal(new Exception("test"));

		assertEquals(JobStatus.FAILING, graph.getState());

		graph.cancel();

		assertEquals(JobStatus.CANCELLING, graph.getState());

		// let all tasks finish cancelling
		completeCanceling(graph);

		assertEquals(JobStatus.CANCELED, graph.getState());
	}

}
 
Example #27
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testFailWhileRestarting() throws Exception {
	try (SlotPool slotPool = createSlotPoolImpl()) {
		TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
		final ExecutionGraph executionGraph = TestingExecutionGraphBuilder
			.newBuilder()
			.setJobGraph(createJobGraph())
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setSlotProvider(createSchedulerWithSlots(slotPool, taskManagerLocation))
			.build();

		startAndScheduleExecutionGraph(executionGraph);

		// Release the TaskManager and wait for the job to restart
		slotPool.releaseTaskManager(taskManagerLocation.getResourceID(), new Exception("Test Exception"));

		assertEquals(JobStatus.RESTARTING, executionGraph.getState());

		// If we fail when being in RESTARTING, then we should try to restart again
		final long globalModVersion = executionGraph.getGlobalModVersion();
		final Exception testException = new Exception("Test exception");
		executionGraph.failGlobal(testException);

		assertNotEquals(globalModVersion, executionGraph.getGlobalModVersion());
		assertEquals(JobStatus.RESTARTING, executionGraph.getState());
		assertEquals(testException, executionGraph.getFailureCause()); // we should have updated the failure cause

		// but it should fail when sending a SuppressRestartsException
		executionGraph.failGlobal(new SuppressRestartsException(new Exception("Suppress restart exception")));

		assertEquals(JobStatus.FAILED, executionGraph.getState());

		// The restart has been aborted
		executionGraph.restart(executionGraph.getGlobalModVersion());

		assertEquals(JobStatus.FAILED, executionGraph.getState());
	}
}
 
Example #28
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCancelWhileRestarting() throws Exception {
	// We want to manually control the restart and delay
	try (SlotPool slotPool = createSlotPoolImpl()) {
		TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
		final ExecutionGraph executionGraph = TestingExecutionGraphBuilder
			.newBuilder()
			.setJobGraph(createJobGraph())
			.setRestartStrategy(new InfiniteDelayRestartStrategy())
			.setSlotProvider(createSchedulerWithSlots(slotPool, taskManagerLocation))
			.build();

		startAndScheduleExecutionGraph(executionGraph);

		// Release the TaskManager and wait for the job to restart
		slotPool.releaseTaskManager(taskManagerLocation.getResourceID(), new Exception("Test Exception"));
		assertEquals(JobStatus.RESTARTING, executionGraph.getState());

		// Canceling needs to abort the restart
		executionGraph.cancel();

		assertEquals(JobStatus.CANCELED, executionGraph.getState());

		// The restart has been aborted
		executionGraph.restart(executionGraph.getGlobalModVersion());

		assertEquals(JobStatus.CANCELED, executionGraph.getState());
	}

}
 
Example #29
Source File: ExecutionGraphRestartTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that it is possible to fail a graph via a call to
 * {@link ExecutionGraph#failGlobal(Throwable)} after cancellation.
 */
@Test
public void testFailExecutionGraphAfterCancel() throws Exception {
	Instance instance = ExecutionGraphTestUtils.getInstance(
		new ActorTaskManagerGateway(
			new SimpleActorGateway(TestingUtils.directExecutionContext())),
		2);

	Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
	scheduler.newInstanceAvailable(instance);

	JobVertex vertex = ExecutionGraphTestUtils.createJobVertex("Test Vertex", 1, NoOpInvokable.class);

	ExecutionConfig executionConfig = new ExecutionConfig();
	executionConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(
		Integer.MAX_VALUE, Integer.MAX_VALUE));
	JobGraph jobGraph = new JobGraph("Test Job", vertex);
	jobGraph.setExecutionConfig(executionConfig);

	ExecutionGraph eg = newExecutionGraph(new InfiniteDelayRestartStrategy(), scheduler);

	eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());

	assertEquals(JobStatus.CREATED, eg.getState());

	eg.scheduleForExecution();
	assertEquals(JobStatus.RUNNING, eg.getState());

	// Fail right after cancel (for example with concurrent slot release)
	eg.cancel();
	assertEquals(JobStatus.CANCELLING, eg.getState());

	eg.failGlobal(new Exception("Test Exception"));
	assertEquals(JobStatus.FAILING, eg.getState());

	Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();

	execution.completeCancelling();
	assertEquals(JobStatus.RESTARTING, eg.getState());
}
 
Example #30
Source File: GlobalModVersionTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private ExecutionGraph createSampleGraph(FailoverStrategy failoverStrategy) throws Exception {

		final JobID jid = new JobID();
		final int parallelism = new Random().nextInt(10) + 1;

		final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jid, parallelism);

		// build a simple execution graph with on job vertex, parallelism 2
		final ExecutionGraph graph = new ExecutionGraph(
			new DummyJobInformation(
				jid,
				"test job"),
			TestingUtils.defaultExecutor(),
			TestingUtils.defaultExecutor(),
			Time.seconds(10),
			new InfiniteDelayRestartStrategy(),
			new CustomStrategy(failoverStrategy),
			slotProvider);

		graph.start(TestingComponentMainThreadExecutorServiceAdapter.forMainThread());

		JobVertex jv = new JobVertex("test vertex");
		jv.setInvokableClass(NoOpInvokable.class);
		jv.setParallelism(parallelism);

		JobGraph jg = new JobGraph(jid, "testjob", jv);
		graph.attachJobGraph(jg.getVerticesSortedTopologicallyFromSources());

		return graph;
	}