org.apache.flink.runtime.executiongraph.utils.SimpleSlotProvider Java Examples

The following examples show how to use org.apache.flink.runtime.executiongraph.utils.SimpleSlotProvider. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExecutionVertexInputConstraintTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private static ExecutionGraph createExecutionGraph(
		List<JobVertex> orderedVertices,
		InputDependencyConstraint inputDependencyConstraint) throws Exception {

	final JobID jobId = new JobID();
	final String jobName = "Test Job Sample Name";
	final SlotProvider slotProvider = new SimpleSlotProvider(jobId, 20);

	for (JobVertex vertex : orderedVertices) {
		vertex.setInputDependencyConstraint(inputDependencyConstraint);
	}

	ExecutionGraph eg = new ExecutionGraph(
		new DummyJobInformation(
			jobId,
			jobName),
		TestingUtils.defaultExecutor(),
		TestingUtils.defaultExecutor(),
		AkkaUtils.getDefaultTimeout(),
		TestRestartStrategy.directExecuting(),
		new RestartAllStrategy.Factory(),
		slotProvider);
	eg.attachJobGraph(orderedVertices);

	return eg;
}
 
Example #2
Source File: ExecutionGraphSuspendTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private static ExecutionGraph createExecutionGraph(TaskManagerGateway gateway, int parallelism) throws Exception {
	final JobID jobId = new JobID();

	final JobVertex vertex = new JobVertex("vertex");
	vertex.setInvokableClass(NoOpInvokable.class);
	vertex.setParallelism(parallelism);

	final SlotProvider slotProvider = new SimpleSlotProvider(jobId, parallelism, gateway);

	ExecutionGraph simpleTestGraph = ExecutionGraphTestUtils.createSimpleTestGraph(
		jobId,
		slotProvider,
		new FixedDelayRestartStrategy(0, 0),
		vertex);
	simpleTestGraph.start(TestingComponentMainThreadExecutorServiceAdapter.forMainThread());
	return simpleTestGraph;
}
 
Example #3
Source File: GlobalModVersionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private ExecutionGraph createSampleGraph(FailoverStrategy failoverStrategy) throws Exception {
	final JobID jid = new JobID();
	final int parallelism = new Random().nextInt(10) + 1;

	JobVertex jv = new JobVertex("test vertex");
	jv.setInvokableClass(NoOpInvokable.class);
	jv.setParallelism(parallelism);

	JobGraph jg = new JobGraph(jid, "testjob", jv);

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(parallelism);

	// build a simple execution graph with on job vertex, parallelism 2
	final ExecutionGraph graph = TestingExecutionGraphBuilder
		.newBuilder()
		.setJobGraph(jg)
		.setRestartStrategy(new InfiniteDelayRestartStrategy())
		.setFailoverStrategyFactory(new CustomStrategy(failoverStrategy))
		.setSlotProvider(slotProvider)
		.build();

	graph.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());

	return graph;
}
 
Example #4
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testNoManualRestart() throws Exception {
	ExecutionGraph eg = TestingExecutionGraphBuilder
		.newBuilder()
		.setSlotProvider(new SimpleSlotProvider(NUM_TASKS))
		.setJobGraph(createJobGraph())
		.build();

	startAndScheduleExecutionGraph(eg);

	eg.getAllExecutionVertices().iterator().next().fail(new Exception("Test Exception"));

	completeCanceling(eg);

	assertEquals(JobStatus.FAILED, eg.getState());

	// This should not restart the graph.
	eg.restart(eg.getGlobalModVersion());

	assertEquals(JobStatus.FAILED, eg.getState());
}
 
Example #5
Source File: ExecutionGraphTestUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates an execution graph containing the given vertices and the given restart strategy.
 */
public static ExecutionGraph createSimpleTestGraph(
		JobID jid,
		TaskManagerGateway taskManagerGateway,
		RestartStrategy restartStrategy,
		JobVertex... vertices) throws Exception {

	int numSlotsNeeded = 0;
	for (JobVertex vertex : vertices) {
		numSlotsNeeded += vertex.getParallelism();
	}

	SlotProvider slotProvider = new SimpleSlotProvider(jid, numSlotsNeeded, taskManagerGateway);

	return createSimpleTestGraph(jid, slotProvider, restartStrategy, vertices);
}
 
Example #6
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testNoManualRestart() throws Exception {
	NoRestartStrategy restartStrategy = new NoRestartStrategy();
	ExecutionGraph eg = createSimpleExecutionGraph(
		restartStrategy, new SimpleSlotProvider(TEST_JOB_ID, NUM_TASKS), createJobGraph());

	eg.getAllExecutionVertices().iterator().next().fail(new Exception("Test Exception"));

	completeCanceling(eg);

	assertEquals(JobStatus.FAILED, eg.getState());

	// This should not restart the graph.
	eg.restart(eg.getGlobalModVersion());

	assertEquals(JobStatus.FAILED, eg.getState());
}
 
Example #7
Source File: ExecutionGraphTestUtils.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates an execution graph containing the given vertices and the given restart strategy.
 */
public static ExecutionGraph createSimpleTestGraph(
		JobID jid,
		TaskManagerGateway taskManagerGateway,
		RestartStrategy restartStrategy,
		JobVertex... vertices) throws Exception {

	int numSlotsNeeded = 0;
	for (JobVertex vertex : vertices) {
		numSlotsNeeded += vertex.getParallelism();
	}

	SlotProvider slotProvider = new SimpleSlotProvider(jid, numSlotsNeeded, taskManagerGateway);

	return createSimpleTestGraph(jid, slotProvider, restartStrategy, vertices);
}
 
Example #8
Source File: ExecutionVertexInputConstraintTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private static ExecutionGraph createExecutionGraph(
		List<JobVertex> orderedVertices,
		InputDependencyConstraint inputDependencyConstraint,
		int numSlots) throws Exception {

	for (JobVertex vertex : orderedVertices) {
		vertex.setInputDependencyConstraint(inputDependencyConstraint);
	}

	final JobGraph jobGraph = new JobGraph(orderedVertices.toArray(new JobVertex[0]));
	final SlotProvider slotProvider = new SimpleSlotProvider(numSlots);

	return TestingExecutionGraphBuilder
		.newBuilder()
		.setJobGraph(jobGraph)
		.setRestartStrategy(TestRestartStrategy.directExecuting())
		.setSlotProvider(slotProvider)
		.build();
}
 
Example #9
Source File: ExecutionGraphSuspendTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private static ExecutionGraph createExecutionGraph(TaskManagerGateway gateway, int parallelism) throws Exception {
	final JobID jobId = new JobID();

	final JobVertex vertex = new JobVertex("vertex");
	vertex.setInvokableClass(NoOpInvokable.class);
	vertex.setParallelism(parallelism);

	final SlotProvider slotProvider = new SimpleSlotProvider(jobId, parallelism, gateway);

	ExecutionGraph simpleTestGraph = ExecutionGraphTestUtils.createSimpleTestGraph(
		jobId,
		slotProvider,
		new FixedDelayRestartStrategy(0, 0),
		vertex);
	simpleTestGraph.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	return simpleTestGraph;
}
 
Example #10
Source File: ExecutionVertexInputConstraintTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private static ExecutionGraph createExecutionGraph(
		List<JobVertex> orderedVertices,
		InputDependencyConstraint inputDependencyConstraint) throws Exception {

	final JobID jobId = new JobID();
	final String jobName = "Test Job Sample Name";
	final SlotProvider slotProvider = new SimpleSlotProvider(jobId, 20);

	for (JobVertex vertex : orderedVertices) {
		vertex.setInputDependencyConstraint(inputDependencyConstraint);
	}

	ExecutionGraph eg = new ExecutionGraph(
		new DummyJobInformation(
			jobId,
			jobName),
		TestingUtils.defaultExecutor(),
		TestingUtils.defaultExecutor(),
		AkkaUtils.getDefaultTimeout(),
		TestRestartStrategy.directExecuting(),
		new RestartAllStrategy.Factory(),
		slotProvider);
	eg.attachJobGraph(orderedVertices);

	return eg;
}
 
Example #11
Source File: AdaptedRestartPipelinedRegionStrategyNGConcurrentFailoverTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creating a sample ExecutionGraph for testing with topology as below.
 * <pre>
 *     (v11) -+-> (v21)
 *            x
 *     (v12) -+-> (v22)
 *
 *            ^
 *            |
 *       (blocking)
 * </pre>
 * 4 regions. Each consists of one individual execution vertex.
 */
private ExecutionGraph createExecutionGraph() throws Exception {

	final JobVertex v1 = new JobVertex("vertex1");
	v1.setInvokableClass(NoOpInvokable.class);
	v1.setParallelism(DEFAULT_PARALLELISM);

	final JobVertex v2 = new JobVertex("vertex2");
	v2.setInvokableClass(NoOpInvokable.class);
	v2.setParallelism(DEFAULT_PARALLELISM);

	v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);

	final JobGraph jg = new JobGraph(TEST_JOB_ID, "testjob", v1, v2);

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(TEST_JOB_ID, DEFAULT_PARALLELISM);

	final PartitionTracker partitionTracker = new PartitionTrackerImpl(
		jg.getJobID(),
		NettyShuffleMaster.INSTANCE,
		ignored -> Optional.empty());

	final ExecutionGraph graph = new ExecutionGraphTestUtils.TestingExecutionGraphBuilder(jg)
		.setRestartStrategy(manuallyTriggeredRestartStrategy)
		.setFailoverStrategyFactory(TestAdaptedRestartPipelinedRegionStrategyNG::new)
		.setSlotProvider(slotProvider)
		.setPartitionTracker(partitionTracker)
		.build();

	graph.start(componentMainThreadExecutor);

	return graph;
}
 
Example #12
Source File: FailoverRegionTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that if a task reports the result of its preceding task is failed,
 * its preceding task will be considered as failed, and start to failover
 * TODO: as the report part is not finished yet, this case is ignored temporarily
 * @throws Exception if fail to create dummy job information or fail to schedule for execution.
 */
@Ignore
@Test
public void testSucceedingNoticePreceding() throws Exception {
	final JobID jobId = new JobID();
	final String jobName = "Test Job Sample Name";

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jobId, 14);

	JobVertex v1 = new JobVertex("vertex1");
	JobVertex v2 = new JobVertex("vertex2");

	v1.setParallelism(1);
	v2.setParallelism(1);

	v1.setInvokableClass(AbstractInvokable.class);
	v2.setInvokableClass(AbstractInvokable.class);

	v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);

	ExecutionGraph eg = new ExecutionGraphTestUtils.TestingExecutionGraphBuilder(jobId, jobName, v1, v2)
		.setRestartStrategy(new InfiniteDelayRestartStrategy(10))
		.setFailoverStrategyFactory(new FailoverPipelinedRegionWithDirectExecutor())
		.setSlotProvider(slotProvider)
		.setScheduleMode(ScheduleMode.EAGER)
		.build();

	eg.scheduleForExecution();
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	ExecutionVertex ev11 = eg.getJobVertex(v2.getID()).getTaskVertices()[0];
	ExecutionVertex ev21 = eg.getJobVertex(v2.getID()).getTaskVertices()[0];
	ev21.getCurrentExecutionAttempt().fail(new Exception("Fail with v1"));

	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev21).getState());
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev11).getState());
}
 
Example #13
Source File: GlobalModVersionTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private ExecutionGraph createSampleGraph(FailoverStrategy failoverStrategy) throws Exception {

		final JobID jid = new JobID();
		final int parallelism = new Random().nextInt(10) + 1;

		final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jid, parallelism);

		// build a simple execution graph with on job vertex, parallelism 2
		final ExecutionGraph graph = new ExecutionGraph(
			new DummyJobInformation(
				jid,
				"test job"),
			TestingUtils.defaultExecutor(),
			TestingUtils.defaultExecutor(),
			Time.seconds(10),
			new InfiniteDelayRestartStrategy(),
			new CustomStrategy(failoverStrategy),
			slotProvider);

		graph.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());

		JobVertex jv = new JobVertex("test vertex");
		jv.setInvokableClass(NoOpInvokable.class);
		jv.setParallelism(parallelism);

		JobGraph jg = new JobGraph(jid, "testjob", jv);
		graph.attachJobGraph(jg.getVerticesSortedTopologicallyFromSources());

		return graph;
	}
 
Example #14
Source File: AdaptedRestartPipelinedRegionStrategyNGAbortPendingCheckpointsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private ExecutionGraph createExecutionGraph(final JobGraph jobGraph) throws Exception {
	final ExecutionGraph executionGraph = new ExecutionGraphTestUtils.TestingExecutionGraphBuilder(jobGraph)
		.setRestartStrategy(new FixedDelayRestartStrategy(10, 0))
		.setFailoverStrategyFactory(AdaptedRestartPipelinedRegionStrategyNG::new)
		.setSlotProvider(new SimpleSlotProvider(jobGraph.getJobID(), 2))
		.build();

	enableCheckpointing(executionGraph);
	executionGraph.start(componentMainThreadExecutor);
	executionGraph.scheduleForExecution();
	manualMainThreadExecutor.triggerAll();
	return executionGraph;
}
 
Example #15
Source File: GlobalModVersionTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private ExecutionGraph createSampleGraph(FailoverStrategy failoverStrategy) throws Exception {

		final JobID jid = new JobID();
		final int parallelism = new Random().nextInt(10) + 1;

		final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jid, parallelism);

		// build a simple execution graph with on job vertex, parallelism 2
		final ExecutionGraph graph = new ExecutionGraph(
			new DummyJobInformation(
				jid,
				"test job"),
			TestingUtils.defaultExecutor(),
			TestingUtils.defaultExecutor(),
			Time.seconds(10),
			new InfiniteDelayRestartStrategy(),
			new CustomStrategy(failoverStrategy),
			slotProvider);

		graph.start(TestingComponentMainThreadExecutorServiceAdapter.forMainThread());

		JobVertex jv = new JobVertex("test vertex");
		jv.setInvokableClass(NoOpInvokable.class);
		jv.setParallelism(parallelism);

		JobGraph jg = new JobGraph(jid, "testjob", jv);
		graph.attachJobGraph(jg.getVerticesSortedTopologicallyFromSources());

		return graph;
	}
 
Example #16
Source File: ExecutionGraphSuspendTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private static ExecutionGraph createExecutionGraph(TaskManagerGateway gateway, int parallelism) throws Exception {
	final JobVertex vertex = new JobVertex("vertex");
	vertex.setInvokableClass(NoOpInvokable.class);
	vertex.setParallelism(parallelism);

	final SlotProvider slotProvider = new SimpleSlotProvider(parallelism, gateway);

	ExecutionGraph simpleTestGraph = ExecutionGraphTestUtils.createSimpleTestGraph(
		slotProvider,
		new FixedDelayRestartStrategy(0, 0),
		vertex);
	simpleTestGraph.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	return simpleTestGraph;
}
 
Example #17
Source File: ExecutionGraphTestUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an execution graph containing the given vertices and the given restart strategy.
 */
public static ExecutionGraph createSimpleTestGraph(
		TaskManagerGateway taskManagerGateway,
		RestartStrategy restartStrategy,
		JobVertex... vertices) throws Exception {

	int numSlotsNeeded = 0;
	for (JobVertex vertex : vertices) {
		numSlotsNeeded += vertex.getParallelism();
	}

	SlotProvider slotProvider = new SimpleSlotProvider(numSlotsNeeded, taskManagerGateway);

	return createSimpleTestGraph(slotProvider, restartStrategy, vertices);
}
 
Example #18
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testGlobalFailAndRestarts() throws Exception {
	final int parallelism = 10;
	final JobVertex vertex = createNoOpVertex(parallelism);
	final NotCancelAckingTaskGateway taskManagerGateway = new NotCancelAckingTaskGateway();
	final SlotProvider slots = new SimpleSlotProvider(parallelism, taskManagerGateway);
	final TestRestartStrategy restartStrategy = TestRestartStrategy.manuallyTriggered();

	final JobGraph jobGraph = new JobGraph(TEST_JOB_ID, "Test Job", vertex);
	jobGraph.setScheduleMode(ScheduleMode.EAGER);
	final ExecutionGraph eg = TestingExecutionGraphBuilder
		.newBuilder()
		.setJobGraph(jobGraph)
		.setSlotProvider(slots)
		.setRestartStrategy(restartStrategy)
		.build();

	startAndScheduleExecutionGraph(eg);

	switchToRunning(eg);

	// fail into 'RESTARTING'
	eg.failGlobal(new Exception("intended test failure 1"));
	assertEquals(JobStatus.FAILING, eg.getState());

	completeCancellingForAllVertices(eg);

	assertEquals(JobStatus.RESTARTING, eg.getState());

	eg.failGlobal(new Exception("intended test failure 2"));
	assertEquals(JobStatus.RESTARTING, eg.getState());

	restartStrategy.triggerAll().join();

	assertEquals(JobStatus.RUNNING, eg.getState());

	switchToRunning(eg);
	finishAllVertices(eg);

	eg.waitUntilTerminal();
	assertEquals(JobStatus.FINISHED, eg.getState());

	assertThat("Too many restarts", eg.getNumberOfRestarts(), is(lessThanOrEqualTo(2L)));
}
 
Example #19
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testGlobalFailAndRestarts() throws Exception {
	final int parallelism = 10;
	final JobVertex vertex = createNoOpVertex(parallelism);
	final NotCancelAckingTaskGateway taskManagerGateway = new NotCancelAckingTaskGateway();
	final SlotProvider slots = new SimpleSlotProvider(TEST_JOB_ID, parallelism, taskManagerGateway);
	final TestRestartStrategy restartStrategy = TestRestartStrategy.manuallyTriggered();

	final ExecutionGraph eg = new ExecutionGraphTestUtils.TestingExecutionGraphBuilder(TEST_JOB_ID, vertex)
		.setSlotProvider(slots)
		.setRestartStrategy(restartStrategy)
		.setScheduleMode(ScheduleMode.EAGER)
		.build();

	eg.start(mainThreadExecutor);

	eg.scheduleForExecution();

	switchToRunning(eg);

	// fail into 'RESTARTING'
	eg.failGlobal(new Exception("intended test failure 1"));
	assertEquals(JobStatus.FAILING, eg.getState());

	completeCancellingForAllVertices(eg);

	assertEquals(JobStatus.RESTARTING, eg.getState());

	eg.failGlobal(new Exception("intended test failure 2"));
	assertEquals(JobStatus.RESTARTING, eg.getState());

	restartStrategy.triggerAll().join();

	assertEquals(JobStatus.RUNNING, eg.getState());

	switchToRunning(eg);
	finishAllVertices(eg);

	eg.waitUntilTerminal();
	assertEquals(JobStatus.FINISHED, eg.getState());

	if (eg.getNumberOfFullRestarts() > 2) {
		fail("Too many restarts: " + eg.getNumberOfFullRestarts());
	}
}
 
Example #20
Source File: FailoverRegionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private ExecutionGraph createSingleRegionExecutionGraph(RestartStrategy restartStrategy) throws Exception {
	final JobID jobId = new JobID();
	final String jobName = "Test Job Sample Name";

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jobId, 14);

	JobVertex v1 = new JobVertex("vertex1");
	JobVertex v2 = new JobVertex("vertex2");
	JobVertex v3 = new JobVertex("vertex3");

	v1.setParallelism(3);
	v2.setParallelism(2);
	v3.setParallelism(2);

	v1.setInvokableClass(AbstractInvokable.class);
	v2.setInvokableClass(AbstractInvokable.class);
	v3.setInvokableClass(AbstractInvokable.class);

	v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
	v3.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
	v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);

	List<JobVertex> ordered = new ArrayList<>(Arrays.asList(v1, v2, v3));

	ExecutionGraph eg = new ExecutionGraph(
		new DummyJobInformation(
			jobId,
			jobName),
		TestingUtils.defaultExecutor(),
		TestingUtils.defaultExecutor(),
		AkkaUtils.getDefaultTimeout(),
		restartStrategy,
		new FailoverPipelinedRegionWithDirectExecutor(),
		slotProvider);
	try {
		eg.attachJobGraph(ordered);
	}
	catch (JobException e) {
		e.printStackTrace();
		fail("Job failed with exception: " + e.getMessage());
	}

	enableCheckpointing(eg);

	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();

	attachPendingCheckpoints(eg);
	return eg;
}
 
Example #21
Source File: FailoverRegionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testStatusResettingOnRegionFailover() throws Exception {
	final JobID jobId = new JobID();
	final String jobName = "Test Job Sample Name";

	final SlotProvider slotProvider = new SimpleSlotProvider(jobId, 20);

	JobVertex v1 = new JobVertex("vertex1");
	JobVertex v2 = new JobVertex("vertex2");

	v1.setParallelism(2);
	v2.setParallelism(2);

	v1.setInvokableClass(AbstractInvokable.class);
	v2.setInvokableClass(AbstractInvokable.class);

	v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);

	List<JobVertex> ordered = Arrays.asList(v1, v2);

	ExecutionGraph eg = new ExecutionGraph(
		new DummyJobInformation(
			jobId,
			jobName),
		TestingUtils.defaultExecutor(),
		TestingUtils.defaultExecutor(),
		AkkaUtils.getDefaultTimeout(),
		new InfiniteDelayRestartStrategy(10),
		new FailoverPipelinedRegionWithDirectExecutor(),
		slotProvider);

	eg.attachJobGraph(ordered);
	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());

	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	ExecutionVertex ev11 = eg.getJobVertex(v1.getID()).getTaskVertices()[0];
	ExecutionVertex ev12 = eg.getJobVertex(v1.getID()).getTaskVertices()[1];
	ExecutionVertex ev21 = eg.getJobVertex(v2.getID()).getTaskVertices()[0];
	ExecutionVertex ev22 = eg.getJobVertex(v2.getID()).getTaskVertices()[1];

	eg.scheduleForExecution();

	// initial state
	assertEquals(ExecutionState.DEPLOYING, ev11.getExecutionState());
	assertEquals(ExecutionState.DEPLOYING, ev12.getExecutionState());
	assertEquals(ExecutionState.CREATED, ev21.getExecutionState());
	assertEquals(ExecutionState.CREATED, ev22.getExecutionState());
	assertFalse(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].areAllPartitionsFinished());
	assertFalse(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].getPartitions()[0].isConsumable());
	assertFalse(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].getPartitions()[1].isConsumable());

	// partitions all finished
	ev11.getCurrentExecutionAttempt().markFinished();
	ev12.getCurrentExecutionAttempt().markFinished();
	assertEquals(ExecutionState.FINISHED, ev11.getExecutionState());
	assertEquals(ExecutionState.FINISHED, ev12.getExecutionState());
	assertEquals(ExecutionState.DEPLOYING, ev21.getExecutionState());
	assertEquals(ExecutionState.DEPLOYING, ev22.getExecutionState());
	assertTrue(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].areAllPartitionsFinished());
	assertTrue(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].getPartitions()[0].isConsumable());
	assertTrue(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].getPartitions()[1].isConsumable());

	// force the partition producer to restart
	strategy.onTaskFailure(ev11.getCurrentExecutionAttempt(), new FlinkException("Fail for testing"));
	assertFalse(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].areAllPartitionsFinished());
	assertFalse(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].getPartitions()[0].isConsumable());
	assertFalse(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].getPartitions()[1].isConsumable());

	// failed partition finishes again
	ev11.getCurrentExecutionAttempt().markFinished();
	assertTrue(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].areAllPartitionsFinished());
	assertTrue(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].getPartitions()[0].isConsumable());
	assertTrue(eg.getJobVertex(v1.getID()).getProducedDataSets()[0].getPartitions()[1].isConsumable());
}
 
Example #22
Source File: FailoverRegionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that two regions failover at the same time, they will not influence each other.
 * @throws Exception if fail to create dummy job information, fail to schedule for execution
 * or timeout before region switches to expected status.
 */
@Test
public void testMultiRegionFailoverAtSameTime() throws Exception {
	final JobID jobId = new JobID();
	final String jobName = "Test Job Sample Name";

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jobId, 16);

	JobVertex v1 = new JobVertex("vertex1");
	JobVertex v2 = new JobVertex("vertex2");
	JobVertex v3 = new JobVertex("vertex3");
	JobVertex v4 = new JobVertex("vertex4");

	v1.setParallelism(2);
	v2.setParallelism(2);
	v3.setParallelism(2);
	v4.setParallelism(2);

	v1.setInvokableClass(AbstractInvokable.class);
	v2.setInvokableClass(AbstractInvokable.class);
	v3.setInvokableClass(AbstractInvokable.class);
	v4.setInvokableClass(AbstractInvokable.class);

	v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
	v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);
	v4.connectNewDataSetAsInput(v3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);

	List<JobVertex> ordered = Arrays.asList(v1, v2, v3, v4);

	ExecutionGraph eg = new ExecutionGraph(
			new DummyJobInformation(
				jobId,
				jobName),
			TestingUtils.defaultExecutor(),
			TestingUtils.defaultExecutor(),
			AkkaUtils.getDefaultTimeout(),
			new InfiniteDelayRestartStrategy(10),
			new RestartPipelinedRegionStrategy.Factory(),
			slotProvider);
	try {
		eg.attachJobGraph(ordered);
	}
	catch (JobException e) {
		e.printStackTrace();
		fail("Job failed with exception: " + e.getMessage());
	}
	eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	ExecutionVertex ev11 = eg.getJobVertex(v1.getID()).getTaskVertices()[0];
	ExecutionVertex ev12 = eg.getJobVertex(v1.getID()).getTaskVertices()[1];
	ExecutionVertex ev31 = eg.getJobVertex(v3.getID()).getTaskVertices()[0];
	ExecutionVertex ev32 = eg.getJobVertex(v3.getID()).getTaskVertices()[1];
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev11).getState());
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev31).getState());

	ev11.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	ev31.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev11).getState());
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev31).getState());

	ev32.getCurrentExecutionAttempt().completeCancelling();
	waitUntilFailoverRegionState(strategy.getFailoverRegion(ev31), JobStatus.RUNNING, 1000);

	ev12.getCurrentExecutionAttempt().completeCancelling();
	waitUntilFailoverRegionState(strategy.getFailoverRegion(ev11), JobStatus.RUNNING, 1000);
}
 
Example #23
Source File: ConcurrentFailoverStrategyExecutionGraphTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a cancellation concurrent to a local failover leads to a properly
 * cancelled state.
 */
@Test
public void testCancelWhileInLocalFailover() throws Exception {

	// the logic in this test is as follows:
	//  - start a job
	//  - cause a task failure and delay the local recovery action via the manual executor
	//  - cancel the job to go into cancelling
	//  - resume in local recovery action
	//  - validate that this does in fact not start a new task, because the graph as a
	//    whole should now be cancelled already

	final JobID jid = new JobID();
	final int parallelism = 2;

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jid, parallelism);

	final ExecutionGraph graph = createSampleGraph(
		jid,
		TestRestartPipelinedRegionStrategy::new,
		TestRestartStrategy.directExecuting(),
		slotProvider,
		parallelism);

	graph.start(mainThreadExecutor);
	TestRestartPipelinedRegionStrategy strategy = (TestRestartPipelinedRegionStrategy) graph.getFailoverStrategy();

	// This future is used to block the failover strategy execution until we complete it
	final CompletableFuture<?> blocker = new CompletableFuture<>();
	strategy.setBlockerFuture(blocker);

	final ExecutionJobVertex ejv = graph.getVerticesTopologically().iterator().next();
	final ExecutionVertex vertex1 = ejv.getTaskVertices()[0];
	final ExecutionVertex vertex2 = ejv.getTaskVertices()[1];

	graph.scheduleForExecution();
	assertEquals(JobStatus.RUNNING, graph.getState());

	// let one of the vertices fail - that triggers a local recovery action
	vertex1.getCurrentExecutionAttempt().fail(new Exception("test failure"));
	assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState());

	// graph should still be running and the failover recovery action should be queued
	assertEquals(JobStatus.RUNNING, graph.getState());

	// now cancel the job
	graph.cancel();

	assertEquals(JobStatus.CANCELLING, graph.getState());
	assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState());
	assertEquals(ExecutionState.CANCELING, vertex2.getCurrentExecutionAttempt().getState());

	// let the recovery action continue
	blocker.complete(null);

	// now report that cancelling is complete for the other vertex
	vertex2.getCurrentExecutionAttempt().completeCancelling();

	assertEquals(JobStatus.CANCELED, graph.getTerminationFuture().get());
	assertTrue(vertex1.getCurrentExecutionAttempt().getState().isTerminal());
	assertTrue(vertex2.getCurrentExecutionAttempt().getState().isTerminal());

	// make sure all slots are recycled
	assertEquals(parallelism, slotProvider.getNumberOfAvailableSlots());
}
 
Example #24
Source File: ConcurrentFailoverStrategyExecutionGraphTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a terminal global failure concurrent to a local failover
 * leads to a properly failed state.
 */
@Test
public void testGlobalFailureConcurrentToLocalFailover() throws Exception {

	// the logic in this test is as follows:
	//  - start a job
	//  - cause a task failure and delay the local recovery action via the manual executor
	//  - cause a global failure
	//  - resume in local recovery action
	//  - validate that this does in fact not start a new task, because the graph as a
	//    whole should now be terminally failed already

	final JobID jid = new JobID();
	final int parallelism = 2;

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jid, parallelism);

	final ExecutionGraph graph = createSampleGraph(
		jid,
		TestRestartPipelinedRegionStrategy::new,
		TestRestartStrategy.directExecuting(),
		slotProvider,
		parallelism);

	graph.start(mainThreadExecutor);
	TestRestartPipelinedRegionStrategy strategy = (TestRestartPipelinedRegionStrategy) graph.getFailoverStrategy();

	// This future is used to block the failover strategy execution until we complete it
	final CompletableFuture<?> blocker = new CompletableFuture<>();
	strategy.setBlockerFuture(blocker);

	final ExecutionJobVertex ejv = graph.getVerticesTopologically().iterator().next();
	final ExecutionVertex vertex1 = ejv.getTaskVertices()[0];
	final ExecutionVertex vertex2 = ejv.getTaskVertices()[1];

	graph.scheduleForExecution();
	assertEquals(JobStatus.RUNNING, graph.getState());

	// let one of the vertices fail - that triggers a local recovery action
	vertex1.getCurrentExecutionAttempt().fail(new Exception("test failure"));
	assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState());

	// graph should still be running and the failover recovery action should be queued
	assertEquals(JobStatus.RUNNING, graph.getState());

	// now cancel the job
	graph.failGlobal(new SuppressRestartsException(new Exception("test exception")));

	assertEquals(JobStatus.FAILING, graph.getState());
	assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState());
	assertEquals(ExecutionState.CANCELING, vertex2.getCurrentExecutionAttempt().getState());

	// let the recovery action continue
	blocker.complete(null);

	// now report that cancelling is complete for the other vertex
	vertex2.getCurrentExecutionAttempt().completeCancelling();

	assertEquals(JobStatus.FAILED, graph.getState());
	assertTrue(vertex1.getCurrentExecutionAttempt().getState().isTerminal());
	assertTrue(vertex2.getCurrentExecutionAttempt().getState().isTerminal());

	// make sure all slots are recycled
	assertEquals(parallelism, slotProvider.getNumberOfAvailableSlots());
}
 
Example #25
Source File: ConcurrentFailoverStrategyExecutionGraphTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a cancellation concurrent to a local failover leads to a properly
 * cancelled state.
 */
@Test
public void testCancelWhileInLocalFailover() throws Exception {

	// the logic in this test is as follows:
	//  - start a job
	//  - cause a task failure and delay the local recovery action via the manual executor
	//  - cancel the job to go into cancelling
	//  - resume in local recovery action
	//  - validate that this does in fact not start a new task, because the graph as a
	//    whole should now be cancelled already

	final JobID jid = new JobID();
	final int parallelism = 2;

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jid, parallelism);

	final ExecutionGraph graph = createSampleGraph(
		jid,
		TestRestartPipelinedRegionStrategy::new,
		TestRestartStrategy.directExecuting(),
		slotProvider,
		parallelism);

	graph.start(mainThreadExecutor);
	TestRestartPipelinedRegionStrategy strategy = (TestRestartPipelinedRegionStrategy) graph.getFailoverStrategy();

	// This future is used to block the failover strategy execution until we complete it
	final CompletableFuture<?> blocker = new CompletableFuture<>();
	strategy.setBlockerFuture(blocker);

	final ExecutionJobVertex ejv = graph.getVerticesTopologically().iterator().next();
	final ExecutionVertex vertex1 = ejv.getTaskVertices()[0];
	final ExecutionVertex vertex2 = ejv.getTaskVertices()[1];

	graph.scheduleForExecution();
	assertEquals(JobStatus.RUNNING, graph.getState());

	// let one of the vertices fail - that triggers a local recovery action
	vertex1.getCurrentExecutionAttempt().fail(new Exception("test failure"));
	assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState());

	// graph should still be running and the failover recovery action should be queued
	assertEquals(JobStatus.RUNNING, graph.getState());

	// now cancel the job
	graph.cancel();

	assertEquals(JobStatus.CANCELLING, graph.getState());
	assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState());
	assertEquals(ExecutionState.CANCELING, vertex2.getCurrentExecutionAttempt().getState());

	// let the recovery action continue
	blocker.complete(null);

	// now report that cancelling is complete for the other vertex
	vertex2.getCurrentExecutionAttempt().completeCancelling();

	assertEquals(JobStatus.CANCELED, graph.getTerminationFuture().get());
	assertTrue(vertex1.getCurrentExecutionAttempt().getState().isTerminal());
	assertTrue(vertex2.getCurrentExecutionAttempt().getState().isTerminal());

	// make sure all slots are recycled
	assertEquals(parallelism, slotProvider.getNumberOfAvailableSlots());
}
 
Example #26
Source File: FailoverRegionTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private static ExecutionGraph createSingleRegionExecutionGraph(RestartStrategy restartStrategy) throws Exception {
	final JobID jobId = new JobID();
	final String jobName = "Test Job Sample Name";

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jobId, 14);

	JobVertex v1 = new JobVertex("vertex1");
	JobVertex v2 = new JobVertex("vertex2");
	JobVertex v3 = new JobVertex("vertex3");

	v1.setParallelism(3);
	v2.setParallelism(2);
	v3.setParallelism(2);

	v1.setInvokableClass(AbstractInvokable.class);
	v2.setInvokableClass(AbstractInvokable.class);
	v3.setInvokableClass(AbstractInvokable.class);

	v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
	v3.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
	v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);

	List<JobVertex> ordered = new ArrayList<>(Arrays.asList(v1, v2, v3));

	ExecutionGraph eg = new ExecutionGraph(
		new DummyJobInformation(
			jobId,
			jobName),
		TestingUtils.defaultExecutor(),
		TestingUtils.defaultExecutor(),
		AkkaUtils.getDefaultTimeout(),
		restartStrategy,
		new FailoverPipelinedRegionWithDirectExecutor(),
		slotProvider);
	try {
		eg.attachJobGraph(ordered);
	}
	catch (JobException e) {
		e.printStackTrace();
		fail("Job failed with exception: " + e.getMessage());
	}
	eg.start(TestingComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();
	return eg;
}
 
Example #27
Source File: FailoverRegionTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that if a task reports the result of its preceding task is failed,
 * its preceding task will be considered as failed, and start to failover
 * TODO: as the report part is not finished yet, this case is ignored temporarily
 * @throws Exception
 */
@Ignore
@Test
public void testSucceedingNoticePreceding() throws Exception {
	final JobID jobId = new JobID();
	final String jobName = "Test Job Sample Name";

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jobId, 14);

	JobVertex v1 = new JobVertex("vertex1");
	JobVertex v2 = new JobVertex("vertex2");

	v1.setParallelism(1);
	v2.setParallelism(1);

	v1.setInvokableClass(AbstractInvokable.class);
	v2.setInvokableClass(AbstractInvokable.class);

	v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);

	List<JobVertex> ordered = new ArrayList<>(Arrays.asList(v1, v2));

	ExecutionGraph eg = new ExecutionGraph(
		new DummyJobInformation(
			jobId,
			jobName),
		TestingUtils.defaultExecutor(),
		TestingUtils.defaultExecutor(),
		AkkaUtils.getDefaultTimeout(),
		new InfiniteDelayRestartStrategy(10),
		new FailoverPipelinedRegionWithDirectExecutor(),
		slotProvider);
	try {
		eg.attachJobGraph(ordered);
	}
	catch (JobException e) {
		e.printStackTrace();
		fail("Job failed with exception: " + e.getMessage());
	}
	eg.setScheduleMode(ScheduleMode.EAGER);
	eg.scheduleForExecution();
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	ExecutionVertex ev11 = eg.getJobVertex(v2.getID()).getTaskVertices()[0];
	ExecutionVertex ev21 = eg.getJobVertex(v2.getID()).getTaskVertices()[0];
	ev21.getCurrentExecutionAttempt().fail(new Exception("Fail with v1"));

	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev21).getState());
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev11).getState());
}
 
Example #28
Source File: FailoverRegionTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that two failover regions failover at the same time, they will not influence each other
 * @throws Exception
 */
@Test
public void testMultiRegionFailoverAtSameTime() throws Exception {
	final JobID jobId = new JobID();
	final String jobName = "Test Job Sample Name";

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jobId, 16);

	JobVertex v1 = new JobVertex("vertex1");
	JobVertex v2 = new JobVertex("vertex2");
	JobVertex v3 = new JobVertex("vertex3");
	JobVertex v4 = new JobVertex("vertex4");

	v1.setParallelism(2);
	v2.setParallelism(2);
	v3.setParallelism(2);
	v4.setParallelism(2);

	v1.setInvokableClass(AbstractInvokable.class);
	v2.setInvokableClass(AbstractInvokable.class);
	v3.setInvokableClass(AbstractInvokable.class);
	v4.setInvokableClass(AbstractInvokable.class);

	v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
	v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.BLOCKING);
	v4.connectNewDataSetAsInput(v3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);

	List<JobVertex> ordered = Arrays.asList(v1, v2, v3, v4);

	ExecutionGraph eg = new ExecutionGraph(
			new DummyJobInformation(
				jobId,
				jobName),
			TestingUtils.defaultExecutor(),
			TestingUtils.defaultExecutor(),
			AkkaUtils.getDefaultTimeout(),
			new InfiniteDelayRestartStrategy(10),
			new RestartPipelinedRegionStrategy.Factory(),
			slotProvider);
	try {
		eg.attachJobGraph(ordered);
	}
	catch (JobException e) {
		e.printStackTrace();
		fail("Job failed with exception: " + e.getMessage());
	}
	eg.start(TestingComponentMainThreadExecutorServiceAdapter.forMainThread());
	eg.scheduleForExecution();
	RestartPipelinedRegionStrategy strategy = (RestartPipelinedRegionStrategy)eg.getFailoverStrategy();

	ExecutionVertex ev11 = eg.getJobVertex(v1.getID()).getTaskVertices()[0];
	ExecutionVertex ev12 = eg.getJobVertex(v1.getID()).getTaskVertices()[1];
	ExecutionVertex ev31 = eg.getJobVertex(v3.getID()).getTaskVertices()[0];
	ExecutionVertex ev32 = eg.getJobVertex(v3.getID()).getTaskVertices()[1];
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev11).getState());
	assertEquals(JobStatus.RUNNING, strategy.getFailoverRegion(ev31).getState());

	ev11.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	ev31.getCurrentExecutionAttempt().fail(new Exception("new fail"));
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev11).getState());
	assertEquals(JobStatus.CANCELLING, strategy.getFailoverRegion(ev31).getState());

	ev32.getCurrentExecutionAttempt().completeCancelling();
	waitUntilFailoverRegionState(strategy.getFailoverRegion(ev31), JobStatus.RUNNING, 1000);

	ev12.getCurrentExecutionAttempt().completeCancelling();
	waitUntilFailoverRegionState(strategy.getFailoverRegion(ev11), JobStatus.RUNNING, 1000);
}
 
Example #29
Source File: ExecutionGraphRestartTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testGlobalFailAndRestarts() throws Exception {
	final int parallelism = 10;
	final JobID jid = new JobID();
	final JobVertex vertex = createNoOpVertex(parallelism);
	final NotCancelAckingTaskGateway taskManagerGateway = new NotCancelAckingTaskGateway();
	final SlotProvider slots = new SimpleSlotProvider(jid, parallelism, taskManagerGateway);
	final TestRestartStrategy restartStrategy = TestRestartStrategy.manuallyTriggered();

	final ExecutionGraph eg = createSimpleTestGraph(jid, slots, restartStrategy, vertex);
	eg.start(mainThreadExecutor);

	eg.setScheduleMode(ScheduleMode.EAGER);
	eg.scheduleForExecution();

	switchToRunning(eg);

	// fail into 'RESTARTING'
	eg.failGlobal(new Exception("intended test failure 1"));
	assertEquals(JobStatus.FAILING, eg.getState());

	completeCancellingForAllVertices(eg);

	assertEquals(JobStatus.RESTARTING, eg.getState());

	eg.failGlobal(new Exception("intended test failure 2"));
	assertEquals(JobStatus.RESTARTING, eg.getState());

	restartStrategy.triggerAll().join();

	assertEquals(JobStatus.RUNNING, eg.getState());

	switchToRunning(eg);
	finishAllVertices(eg);

	eg.waitUntilTerminal();
	assertEquals(JobStatus.FINISHED, eg.getState());

	if (eg.getNumberOfFullRestarts() > 2) {
		fail("Too many restarts: " + eg.getNumberOfFullRestarts());
	}
}
 
Example #30
Source File: ConcurrentFailoverStrategyExecutionGraphTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a terminal global failure concurrent to a local failover
 * leads to a properly failed state.
 */
@Test
public void testGlobalFailureConcurrentToLocalFailover() throws Exception {

	// the logic in this test is as follows:
	//  - start a job
	//  - cause a task failure and delay the local recovery action via the manual executor
	//  - cause a global failure
	//  - resume in local recovery action
	//  - validate that this does in fact not start a new task, because the graph as a
	//    whole should now be terminally failed already

	final JobID jid = new JobID();
	final int parallelism = 2;

	final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jid, parallelism);

	final ExecutionGraph graph = createSampleGraph(
		jid,
		TestRestartPipelinedRegionStrategy::new,
		TestRestartStrategy.directExecuting(),
		slotProvider,
		parallelism);

	graph.start(mainThreadExecutor);
	TestRestartPipelinedRegionStrategy strategy = (TestRestartPipelinedRegionStrategy) graph.getFailoverStrategy();

	// This future is used to block the failover strategy execution until we complete it
	final CompletableFuture<?> blocker = new CompletableFuture<>();
	strategy.setBlockerFuture(blocker);

	final ExecutionJobVertex ejv = graph.getVerticesTopologically().iterator().next();
	final ExecutionVertex vertex1 = ejv.getTaskVertices()[0];
	final ExecutionVertex vertex2 = ejv.getTaskVertices()[1];

	graph.scheduleForExecution();
	assertEquals(JobStatus.RUNNING, graph.getState());

	// let one of the vertices fail - that triggers a local recovery action
	vertex1.getCurrentExecutionAttempt().fail(new Exception("test failure"));
	assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState());

	// graph should still be running and the failover recovery action should be queued
	assertEquals(JobStatus.RUNNING, graph.getState());

	// now cancel the job
	graph.failGlobal(new SuppressRestartsException(new Exception("test exception")));

	assertEquals(JobStatus.FAILING, graph.getState());
	assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState());
	assertEquals(ExecutionState.CANCELING, vertex2.getCurrentExecutionAttempt().getState());

	// let the recovery action continue
	blocker.complete(null);

	// now report that cancelling is complete for the other vertex
	vertex2.getCurrentExecutionAttempt().completeCancelling();

	assertEquals(JobStatus.FAILED, graph.getState());
	assertTrue(vertex1.getCurrentExecutionAttempt().getState().isTerminal());
	assertTrue(vertex2.getCurrentExecutionAttempt().getState().isTerminal());

	// make sure all slots are recycled
	assertEquals(parallelism, slotProvider.getNumberOfAvailableSlots());
}