Java Code Examples for org.apache.flink.runtime.state.SharedStateRegistry#DEFAULT_FACTORY
The following examples show how to use
org.apache.flink.runtime.state.SharedStateRegistry#DEFAULT_FACTORY .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CheckpointCoordinatorMasterHooksTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
private static CheckpointCoordinator instantiateCheckpointCoordinator(JobID jid, ExecutionVertex... ackVertices) { return new CheckpointCoordinator( jid, 10000000L, 600000L, 0L, 1, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, new ExecutionVertex[0], ackVertices, new ExecutionVertex[0], new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(10), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY); }
Example 2
Source File: CheckpointCoordinatorMasterHooksTest.java From flink with Apache License 2.0 | 6 votes |
private static CheckpointCoordinator instantiateCheckpointCoordinator(JobID jid, ExecutionVertex... ackVertices) { CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration( 10000000L, 600000L, 0L, 1, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0); return new CheckpointCoordinator( jid, chkConfig, new ExecutionVertex[0], ackVertices, new ExecutionVertex[0], new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(10), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY, new CheckpointFailureManager( 0, NoOpFailJobCall.INSTANCE)); }
Example 3
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 5 votes |
private CheckpointCoordinator getCheckpointCoordinator( final JobID jobId, final ExecutionVertex vertex1, final ExecutionVertex vertex2, final CheckpointFailureManager failureManager) { final CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration( 600000, 600000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0); return new CheckpointCoordinator( jobId, chkConfig, new ExecutionVertex[]{vertex1, vertex2}, new ExecutionVertex[]{vertex1, vertex2}, new ExecutionVertex[]{vertex1, vertex2}, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY, failureManager); }
Example 4
Source File: CheckpointCoordinatorMasterHooksTest.java From flink with Apache License 2.0 | 5 votes |
private CheckpointCoordinator instantiateCheckpointCoordinator( JobID jid, ScheduledExecutor testingScheduledExecutor, ExecutionVertex... ackVertices) { CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration( 10000000L, 600000L, 0L, 1, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, false, 0); return new CheckpointCoordinator( jid, chkConfig, new ExecutionVertex[0], ackVertices, new ExecutionVertex[0], Collections.emptyList(), new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(10), new MemoryStateBackend(), Executors.directExecutor(), testingScheduledExecutor, SharedStateRegistry.DEFAULT_FACTORY, new CheckpointFailureManager( 0, NoOpFailJobCall.INSTANCE)); }
Example 5
Source File: CheckpointCoordinatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that no minimum delay between savepoints is enforced. */ @Test public void testMinDelayBetweenSavepoints() throws Exception { JobID jobId = new JobID(); final ExecutionAttemptID attemptID1 = new ExecutionAttemptID(); ExecutionVertex vertex1 = mockExecutionVertex(attemptID1); CheckpointCoordinator coord = new CheckpointCoordinator( jobId, 100000, 200000, 100000000L, // very long min delay => should not affect savepoints 1, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY); String savepointDir = tmpFolder.newFolder().getAbsolutePath(); CompletableFuture<CompletedCheckpoint> savepoint0 = coord.triggerSavepoint(0, savepointDir); assertFalse("Did not trigger savepoint", savepoint0.isDone()); CompletableFuture<CompletedCheckpoint> savepoint1 = coord.triggerSavepoint(1, savepointDir); assertFalse("Did not trigger savepoint", savepoint1.isDone()); }
Example 6
Source File: CheckpointCoordinatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that the pending checkpoint stats callbacks are created. */ @Test public void testCheckpointStatsTrackerPendingCheckpointCallback() { final long timestamp = System.currentTimeMillis(); ExecutionVertex vertex1 = mockExecutionVertex(new ExecutionAttemptID()); // set up the coordinator and validate the initial state CheckpointCoordinator coord = new CheckpointCoordinator( new JobID(), 600000, 600000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, new ExecutionVertex[]{vertex1}, new ExecutionVertex[]{vertex1}, new ExecutionVertex[]{vertex1}, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY); CheckpointStatsTracker tracker = mock(CheckpointStatsTracker.class); coord.setCheckpointStatsTracker(tracker); when(tracker.reportPendingCheckpoint(anyLong(), anyLong(), any(CheckpointProperties.class))) .thenReturn(mock(PendingCheckpointStats.class)); // Trigger a checkpoint and verify callback assertTrue(coord.triggerCheckpoint(timestamp, false)); verify(tracker, times(1)) .reportPendingCheckpoint(eq(1L), eq(timestamp), eq(CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION))); }
Example 7
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testCheckpointTimeoutIsolated() { try { final JobID jid = new JobID(); final long timestamp = System.currentTimeMillis(); // create some mock execution vertices final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID(); final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID(); final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID(); ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID); ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1); ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2); ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID); // set up the coordinator // the timeout for the checkpoint is a 200 milliseconds CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration( 600000, 200, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0); CheckpointCoordinator coord = new CheckpointCoordinator( jid, chkConfig, new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex1, ackVertex2 }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY, failureManager); // trigger a checkpoint, partially acknowledged assertTrue(coord.triggerCheckpoint(timestamp, false)); assertEquals(1, coord.getNumberOfPendingCheckpoints()); PendingCheckpoint checkpoint = coord.getPendingCheckpoints().values().iterator().next(); assertFalse(checkpoint.isDiscarded()); OperatorID opID1 = OperatorID.fromJobVertexID(ackVertex1.getJobvertexId()); TaskStateSnapshot taskOperatorSubtaskStates1 = spy(new TaskStateSnapshot()); OperatorSubtaskState subtaskState1 = mock(OperatorSubtaskState.class); taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID1, subtaskState1); coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID1, checkpoint.getCheckpointId(), new CheckpointMetrics(), taskOperatorSubtaskStates1), TASK_MANAGER_LOCATION_INFO); // wait until the checkpoint must have expired. // we check every 250 msecs conservatively for 5 seconds // to give even slow build servers a very good chance of completing this long deadline = System.currentTimeMillis() + 5000; do { Thread.sleep(250); } while (!checkpoint.isDiscarded() && coord.getNumberOfPendingCheckpoints() > 0 && System.currentTimeMillis() < deadline); assertTrue("Checkpoint was not canceled by the timeout", checkpoint.isDiscarded()); assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // validate that the received states have been discarded verify(subtaskState1, times(1)).discardState(); // no confirm message must have been sent verify(commitVertex.getCurrentExecutionAttempt(), times(0)).notifyCheckpointComplete(anyLong(), anyLong()); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 8
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testCheckpointAbortsIfAckTasksAreNotExecuted() { try { final JobID jid = new JobID(); final long timestamp = System.currentTimeMillis(); // create some mock Execution vertices that need to ack the checkpoint final ExecutionAttemptID triggerAttemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID triggerAttemptID2 = new ExecutionAttemptID(); ExecutionVertex triggerVertex1 = mockExecutionVertex(triggerAttemptID1); ExecutionVertex triggerVertex2 = mockExecutionVertex(triggerAttemptID2); // create some mock Execution vertices that receive the checkpoint trigger messages ExecutionVertex ackVertex1 = mock(ExecutionVertex.class); ExecutionVertex ackVertex2 = mock(ExecutionVertex.class); // set up the coordinator and validate the initial state CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration( 600000, 600000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0); CheckpointCoordinator coord = new CheckpointCoordinator( jid, chkConfig, new ExecutionVertex[] { triggerVertex1, triggerVertex2 }, new ExecutionVertex[] { ackVertex1, ackVertex2 }, new ExecutionVertex[] {}, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY, failureManager); // nothing should be happening assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // trigger the first checkpoint. this should not succeed assertFalse(coord.triggerCheckpoint(timestamp, false)); // still, nothing should be happening assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 9
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testCheckpointAbortsIfTriggerTasksAreFinished() { try { final JobID jid = new JobID(); final long timestamp = System.currentTimeMillis(); // create some mock Execution vertices that receive the checkpoint trigger messages final ExecutionAttemptID triggerAttemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID triggerAttemptID2 = new ExecutionAttemptID(); ExecutionVertex triggerVertex1 = mockExecutionVertex(triggerAttemptID1); JobVertexID jobVertexID2 = new JobVertexID(); ExecutionVertex triggerVertex2 = mockExecutionVertex( triggerAttemptID2, jobVertexID2, Collections.singletonList(OperatorID.fromJobVertexID(jobVertexID2)), 1, 1, ExecutionState.FINISHED); // create some mock Execution vertices that need to ack the checkpoint final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID(); ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1); ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2); // set up the coordinator and validate the initial state CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration( 600000, 600000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0); CheckpointCoordinator coord = new CheckpointCoordinator( jid, chkConfig, new ExecutionVertex[] { triggerVertex1, triggerVertex2 }, new ExecutionVertex[] { ackVertex1, ackVertex2 }, new ExecutionVertex[] {}, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY, failureManager); // nothing should be happening assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // trigger the first checkpoint. this should not succeed assertFalse(coord.triggerCheckpoint(timestamp, false)); // still, nothing should be happening assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 10
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 4 votes |
private void testMaxConcurrentAttempts(int maxConcurrentAttempts) { try { final JobID jid = new JobID(); // create some mock execution vertices and trigger some checkpoint final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID(); final ExecutionAttemptID ackAttemptID = new ExecutionAttemptID(); final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID(); ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID); ExecutionVertex ackVertex = mockExecutionVertex(ackAttemptID); ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID); final AtomicInteger numCalls = new AtomicInteger(); final Execution execution = triggerVertex.getCurrentExecutionAttempt(); doAnswer(invocation -> { numCalls.incrementAndGet(); return null; }).when(execution).triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class)); doAnswer(invocation -> { numCalls.incrementAndGet(); return null; }).when(execution).notifyCheckpointComplete(anyLong(), anyLong()); CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration( 10, // periodic interval is 10 ms 200000, // timeout is very long (200 s) 0L, // no extra delay maxConcurrentAttempts, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0); CheckpointCoordinator coord = new CheckpointCoordinator( jid, chkConfig, new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY, failureManager); coord.startCheckpointScheduler(); // after a while, there should be exactly as many checkpoints // as concurrently permitted long now = System.currentTimeMillis(); long timeout = now + 60000; long minDuration = now + 100; do { Thread.sleep(20); } while ((now = System.currentTimeMillis()) < minDuration || (numCalls.get() < maxConcurrentAttempts && now < timeout)); assertEquals(maxConcurrentAttempts, numCalls.get()); verify(triggerVertex.getCurrentExecutionAttempt(), times(maxConcurrentAttempts)) .triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class)); // now, once we acknowledge one checkpoint, it should trigger the next one coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID, 1L), TASK_MANAGER_LOCATION_INFO); // this should have immediately triggered a new checkpoint now = System.currentTimeMillis(); timeout = now + 60000; do { Thread.sleep(20); } while (numCalls.get() < maxConcurrentAttempts + 1 && now < timeout); assertEquals(maxConcurrentAttempts + 1, numCalls.get()); // no further checkpoints should happen Thread.sleep(200); assertEquals(maxConcurrentAttempts + 1, numCalls.get()); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 11
Source File: FailoverStrategyCheckpointCoordinatorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that {@link CheckpointCoordinator#abortPendingCheckpoints(CheckpointException)} * called by {@link AdaptedRestartPipelinedRegionStrategyNG} or {@link FailoverRegion} could handle * the {@code currentPeriodicTrigger} null situation well. */ @Test public void testAbortPendingCheckpointsWithTriggerValidation() { final int maxConcurrentCheckpoints = ThreadLocalRandom.current().nextInt(10) + 1; ExecutionVertex executionVertex = mockExecutionVertex(); CheckpointCoordinatorConfiguration checkpointCoordinatorConfiguration = new CheckpointCoordinatorConfiguration( Integer.MAX_VALUE, Integer.MAX_VALUE, 0, maxConcurrentCheckpoints, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0); CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinator( new JobID(), checkpointCoordinatorConfiguration, new ExecutionVertex[] { executionVertex }, new ExecutionVertex[] { executionVertex }, new ExecutionVertex[] { executionVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY, mock(CheckpointFailureManager.class)); // switch current execution's state to running to allow checkpoint could be triggered. mockExecutionRunning(executionVertex); // use manual checkpoint timer to trigger period checkpoints as we expect. ManualCheckpointTimer manualCheckpointTimer = new ManualCheckpointTimer(manualThreadExecutor); // set the init delay as 0 to ensure first checkpoint could be triggered once we trigger the manual executor // this is used to avoid the randomness of when to trigger the first checkpoint (introduced via FLINK-9352) manualCheckpointTimer.setManualDelay(0L); Whitebox.setInternalState(checkpointCoordinator, "timer", manualCheckpointTimer); checkpointCoordinator.startCheckpointScheduler(); assertTrue(checkpointCoordinator.isCurrentPeriodicTriggerAvailable()); manualThreadExecutor.triggerAll(); manualThreadExecutor.triggerScheduledTasks(); assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints()); for (int i = 1; i < maxConcurrentCheckpoints; i++) { checkpointCoordinator.triggerCheckpoint(System.currentTimeMillis(), false); assertEquals(i + 1, checkpointCoordinator.getNumberOfPendingCheckpoints()); assertTrue(checkpointCoordinator.isCurrentPeriodicTriggerAvailable()); } // as we only support limited concurrent checkpoints, after checkpoint triggered more than the limits, // the currentPeriodicTrigger would been assigned as null. checkpointCoordinator.triggerCheckpoint(System.currentTimeMillis(), false); assertFalse(checkpointCoordinator.isCurrentPeriodicTriggerAvailable()); assertEquals(maxConcurrentCheckpoints, checkpointCoordinator.getNumberOfPendingCheckpoints()); checkpointCoordinator.abortPendingCheckpoints( new CheckpointException(CheckpointFailureReason.JOB_FAILOVER_REGION)); // after aborting checkpoints, we ensure currentPeriodicTrigger still available. assertTrue(checkpointCoordinator.isCurrentPeriodicTriggerAvailable()); assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints()); }
Example 12
Source File: CheckpointCoordinatorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that the restore callbacks are called if registered. */ @Test public void testCheckpointStatsTrackerRestoreCallback() throws Exception { ExecutionVertex vertex1 = mockExecutionVertex(new ExecutionAttemptID()); StandaloneCompletedCheckpointStore store = new StandaloneCompletedCheckpointStore(1); // set up the coordinator and validate the initial state CheckpointCoordinator coord = new CheckpointCoordinator( new JobID(), 600000, 600000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, new ExecutionVertex[]{vertex1}, new ExecutionVertex[]{vertex1}, new ExecutionVertex[]{vertex1}, new StandaloneCheckpointIDCounter(), store, new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY); store.addCheckpoint(new CompletedCheckpoint( new JobID(), 0, 0, 0, Collections.<OperatorID, OperatorState>emptyMap(), Collections.<MasterState>emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation())); CheckpointStatsTracker tracker = mock(CheckpointStatsTracker.class); coord.setCheckpointStatsTracker(tracker); assertTrue(coord.restoreLatestCheckpointedState(Collections.<JobVertexID, ExecutionJobVertex>emptyMap(), false, true)); verify(tracker, times(1)) .reportRestoredCheckpoint(any(RestoredCheckpointStats.class)); }
Example 13
Source File: CheckpointCoordinatorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testCheckpointAbortsIfTriggerTasksAreNotExecuted() { try { final JobID jid = new JobID(); final long timestamp = System.currentTimeMillis(); // create some mock Execution vertices that receive the checkpoint trigger messages ExecutionVertex triggerVertex1 = mock(ExecutionVertex.class); ExecutionVertex triggerVertex2 = mock(ExecutionVertex.class); // create some mock Execution vertices that need to ack the checkpoint final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID(); ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1); ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2); // set up the coordinator and validate the initial state CheckpointCoordinator coord = new CheckpointCoordinator( jid, 600000, 600000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, new ExecutionVertex[] { triggerVertex1, triggerVertex2 }, new ExecutionVertex[] { ackVertex1, ackVertex2 }, new ExecutionVertex[] {}, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY); // nothing should be happening assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // trigger the first checkpoint. this should not succeed assertFalse(coord.triggerCheckpoint(timestamp, false)); // still, nothing should be happening assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 14
Source File: CheckpointCoordinatorFailureTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a failure while storing a completed checkpoint in the completed checkpoint store * will properly fail the originating pending checkpoint and clean upt the completed checkpoint. */ @Test public void testFailingCompletedCheckpointStoreAdd() throws Exception { JobID jid = new JobID(); final ExecutionAttemptID executionAttemptId = new ExecutionAttemptID(); final ExecutionVertex vertex = CheckpointCoordinatorTest.mockExecutionVertex(executionAttemptId); final long triggerTimestamp = 1L; CheckpointFailureManager failureManager = new CheckpointFailureManager( 0, NoOpFailJobCall.INSTANCE); // set up the coordinator and validate the initial state CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration( 600000, 600000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0); CheckpointCoordinator coord = new CheckpointCoordinator( jid, chkConfig, new ExecutionVertex[]{vertex}, new ExecutionVertex[]{vertex}, new ExecutionVertex[]{vertex}, new StandaloneCheckpointIDCounter(), new FailingCompletedCheckpointStore(), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY, failureManager); coord.triggerCheckpoint(triggerTimestamp, false); assertEquals(1, coord.getNumberOfPendingCheckpoints()); PendingCheckpoint pendingCheckpoint = coord.getPendingCheckpoints().values().iterator().next(); assertFalse(pendingCheckpoint.isDiscarded()); final long checkpointId = coord.getPendingCheckpoints().keySet().iterator().next(); KeyedStateHandle managedKeyedHandle = mock(KeyedStateHandle.class); KeyedStateHandle rawKeyedHandle = mock(KeyedStateHandle.class); OperatorStateHandle managedOpHandle = mock(OperatorStreamStateHandle.class); OperatorStateHandle rawOpHandle = mock(OperatorStreamStateHandle.class); final OperatorSubtaskState operatorSubtaskState = spy(new OperatorSubtaskState( managedOpHandle, rawOpHandle, managedKeyedHandle, rawKeyedHandle)); TaskStateSnapshot subtaskState = spy(new TaskStateSnapshot()); subtaskState.putSubtaskStateByOperatorID(new OperatorID(), operatorSubtaskState); when(subtaskState.getSubtaskStateByOperatorID(OperatorID.fromJobVertexID(vertex.getJobvertexId()))).thenReturn(operatorSubtaskState); AcknowledgeCheckpoint acknowledgeMessage = new AcknowledgeCheckpoint(jid, executionAttemptId, checkpointId, new CheckpointMetrics(), subtaskState); try { coord.receiveAcknowledgeMessage(acknowledgeMessage, "Unknown location"); fail("Expected a checkpoint exception because the completed checkpoint store could not " + "store the completed checkpoint."); } catch (CheckpointException e) { // ignore because we expected this exception } // make sure that the pending checkpoint has been discarded after we could not complete it assertTrue(pendingCheckpoint.isDiscarded()); // make sure that the subtask state has been discarded after we could not complete it. verify(operatorSubtaskState).discardState(); verify(operatorSubtaskState.getManagedOperatorState().iterator().next()).discardState(); verify(operatorSubtaskState.getRawOperatorState().iterator().next()).discardState(); verify(operatorSubtaskState.getManagedKeyedState().iterator().next()).discardState(); verify(operatorSubtaskState.getRawKeyedState().iterator().next()).discardState(); }
Example 15
Source File: CheckpointCoordinatorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testMaxConcurrentAttempsWithSubsumption() { try { final int maxConcurrentAttempts = 2; final JobID jid = new JobID(); // create some mock execution vertices and trigger some checkpoint final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID(); final ExecutionAttemptID ackAttemptID = new ExecutionAttemptID(); final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID(); ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID); ExecutionVertex ackVertex = mockExecutionVertex(ackAttemptID); ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID); CheckpointCoordinator coord = new CheckpointCoordinator( jid, 10, // periodic interval is 10 ms 200000, // timeout is very long (200 s) 0L, // no extra delay maxConcurrentAttempts, // max two concurrent checkpoints CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY); coord.startCheckpointScheduler(); // after a while, there should be exactly as many checkpoints // as concurrently permitted long now = System.currentTimeMillis(); long timeout = now + 60000; long minDuration = now + 100; do { Thread.sleep(20); } while ((now = System.currentTimeMillis()) < minDuration || (coord.getNumberOfPendingCheckpoints() < maxConcurrentAttempts && now < timeout)); // validate that the pending checkpoints are there assertEquals(maxConcurrentAttempts, coord.getNumberOfPendingCheckpoints()); assertNotNull(coord.getPendingCheckpoints().get(1L)); assertNotNull(coord.getPendingCheckpoints().get(2L)); // now we acknowledge the second checkpoint, which should subsume the first checkpoint // and allow two more checkpoints to be triggered // now, once we acknowledge one checkpoint, it should trigger the next one coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID, 2L)); // after a while, there should be the new checkpoints final long newTimeout = System.currentTimeMillis() + 60000; do { Thread.sleep(20); } while (coord.getPendingCheckpoints().get(4L) == null && System.currentTimeMillis() < newTimeout); // do the final check assertEquals(maxConcurrentAttempts, coord.getNumberOfPendingCheckpoints()); assertNotNull(coord.getPendingCheckpoints().get(3L)); assertNotNull(coord.getPendingCheckpoints().get(4L)); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 16
Source File: CheckpointCoordinatorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private void testMaxConcurrentAttempts(int maxConcurrentAttempts) { try { final JobID jid = new JobID(); // create some mock execution vertices and trigger some checkpoint final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID(); final ExecutionAttemptID ackAttemptID = new ExecutionAttemptID(); final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID(); ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID); ExecutionVertex ackVertex = mockExecutionVertex(ackAttemptID); ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID); final AtomicInteger numCalls = new AtomicInteger(); final Execution execution = triggerVertex.getCurrentExecutionAttempt(); doAnswer(invocation -> { numCalls.incrementAndGet(); return null; }).when(execution).triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class)); doAnswer(invocation -> { numCalls.incrementAndGet(); return null; }).when(execution).notifyCheckpointComplete(anyLong(), anyLong()); CheckpointCoordinator coord = new CheckpointCoordinator( jid, 10, // periodic interval is 10 ms 200000, // timeout is very long (200 s) 0L, // no extra delay maxConcurrentAttempts, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY); coord.startCheckpointScheduler(); // after a while, there should be exactly as many checkpoints // as concurrently permitted long now = System.currentTimeMillis(); long timeout = now + 60000; long minDuration = now + 100; do { Thread.sleep(20); } while ((now = System.currentTimeMillis()) < minDuration || (numCalls.get() < maxConcurrentAttempts && now < timeout)); assertEquals(maxConcurrentAttempts, numCalls.get()); verify(triggerVertex.getCurrentExecutionAttempt(), times(maxConcurrentAttempts)) .triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class)); // now, once we acknowledge one checkpoint, it should trigger the next one coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID, 1L)); // this should have immediately triggered a new checkpoint now = System.currentTimeMillis(); timeout = now + 60000; do { Thread.sleep(20); } while (numCalls.get() < maxConcurrentAttempts + 1 && now < timeout); assertEquals(maxConcurrentAttempts + 1, numCalls.get()); // no further checkpoints should happen Thread.sleep(200); assertEquals(maxConcurrentAttempts + 1, numCalls.get()); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 17
Source File: CheckpointCoordinatorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Triggers a savepoint and two checkpoints. The second checkpoint completes * and subsumes the first checkpoint, but not the first savepoint. Then we * trigger another checkpoint and savepoint. The 2nd savepoint completes and * subsumes the last checkpoint, but not the first savepoint. */ @Test public void testSavepointsAreNotSubsumed() throws Exception { final JobID jid = new JobID(); final long timestamp = System.currentTimeMillis(); // create some mock Execution vertices that receive the checkpoint trigger messages final ExecutionAttemptID attemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID attemptID2 = new ExecutionAttemptID(); ExecutionVertex vertex1 = mockExecutionVertex(attemptID1); ExecutionVertex vertex2 = mockExecutionVertex(attemptID2); StandaloneCheckpointIDCounter counter = new StandaloneCheckpointIDCounter(); // set up the coordinator and validate the initial state CheckpointCoordinator coord = new CheckpointCoordinator( jid, 600000, 600000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, counter, new StandaloneCompletedCheckpointStore(10), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY); String savepointDir = tmpFolder.newFolder().getAbsolutePath(); // Trigger savepoint and checkpoint CompletableFuture<CompletedCheckpoint> savepointFuture1 = coord.triggerSavepoint(timestamp, savepointDir); long savepointId1 = counter.getLast(); assertEquals(1, coord.getNumberOfPendingCheckpoints()); assertTrue(coord.triggerCheckpoint(timestamp + 1, false)); assertEquals(2, coord.getNumberOfPendingCheckpoints()); assertTrue(coord.triggerCheckpoint(timestamp + 2, false)); long checkpointId2 = counter.getLast(); assertEquals(3, coord.getNumberOfPendingCheckpoints()); // 2nd checkpoint should subsume the 1st checkpoint, but not the savepoint coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointId2)); coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId2)); assertEquals(1, coord.getNumberOfPendingCheckpoints()); assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints()); assertFalse(coord.getPendingCheckpoints().get(savepointId1).isDiscarded()); assertFalse(savepointFuture1.isDone()); assertTrue(coord.triggerCheckpoint(timestamp + 3, false)); assertEquals(2, coord.getNumberOfPendingCheckpoints()); CompletableFuture<CompletedCheckpoint> savepointFuture2 = coord.triggerSavepoint(timestamp + 4, savepointDir); long savepointId2 = counter.getLast(); assertEquals(3, coord.getNumberOfPendingCheckpoints()); // 2nd savepoint should subsume the last checkpoint, but not the 1st savepoint coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, savepointId2)); coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, savepointId2)); assertEquals(1, coord.getNumberOfPendingCheckpoints()); assertEquals(2, coord.getNumberOfRetainedSuccessfulCheckpoints()); assertFalse(coord.getPendingCheckpoints().get(savepointId1).isDiscarded()); assertFalse(savepointFuture1.isDone()); assertTrue(savepointFuture2.isDone()); // Ack first savepoint coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, savepointId1)); coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, savepointId1)); assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(3, coord.getNumberOfRetainedSuccessfulCheckpoints()); assertTrue(savepointFuture1.isDone()); }
Example 18
Source File: CheckpointCoordinatorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testHandleMessagesForNonExistingCheckpoints() { try { final JobID jid = new JobID(); final long timestamp = System.currentTimeMillis(); // create some mock execution vertices and trigger some checkpoint final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID(); final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID(); final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID(); ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID); ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1); ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2); ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID); CheckpointCoordinator coord = new CheckpointCoordinator( jid, 200000, 200000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex1, ackVertex2 }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY); assertTrue(coord.triggerCheckpoint(timestamp, false)); long checkpointId = coord.getPendingCheckpoints().keySet().iterator().next(); // send some messages that do not belong to either the job or the any // of the vertices that need to be acknowledged. // non of the messages should throw an exception // wrong job id coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(new JobID(), ackAttemptID1, checkpointId)); // unknown checkpoint coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID1, 1L)); // unknown ack vertex coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, new ExecutionAttemptID(), checkpointId)); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 19
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Triggers a savepoint and two checkpoints. The second checkpoint completes * and subsumes the first checkpoint, but not the first savepoint. Then we * trigger another checkpoint and savepoint. The 2nd savepoint completes and * subsumes the last checkpoint, but not the first savepoint. */ @Test public void testSavepointsAreNotSubsumed() throws Exception { final JobID jid = new JobID(); final long timestamp = System.currentTimeMillis(); // create some mock Execution vertices that receive the checkpoint trigger messages final ExecutionAttemptID attemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID attemptID2 = new ExecutionAttemptID(); ExecutionVertex vertex1 = mockExecutionVertex(attemptID1); ExecutionVertex vertex2 = mockExecutionVertex(attemptID2); StandaloneCheckpointIDCounter counter = new StandaloneCheckpointIDCounter(); // set up the coordinator and validate the initial state CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration( 600000, 600000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0); CheckpointCoordinator coord = new CheckpointCoordinator( jid, chkConfig, new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, counter, new StandaloneCompletedCheckpointStore(10), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY, failureManager); String savepointDir = tmpFolder.newFolder().getAbsolutePath(); // Trigger savepoint and checkpoint CompletableFuture<CompletedCheckpoint> savepointFuture1 = coord.triggerSavepoint(timestamp, savepointDir); long savepointId1 = counter.getLast(); assertEquals(1, coord.getNumberOfPendingCheckpoints()); assertTrue(coord.triggerCheckpoint(timestamp + 1, false)); assertEquals(2, coord.getNumberOfPendingCheckpoints()); assertTrue(coord.triggerCheckpoint(timestamp + 2, false)); long checkpointId2 = counter.getLast(); assertEquals(3, coord.getNumberOfPendingCheckpoints()); // 2nd checkpoint should subsume the 1st checkpoint, but not the savepoint coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointId2), TASK_MANAGER_LOCATION_INFO); coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId2), TASK_MANAGER_LOCATION_INFO); assertEquals(1, coord.getNumberOfPendingCheckpoints()); assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints()); assertFalse(coord.getPendingCheckpoints().get(savepointId1).isDiscarded()); assertFalse(savepointFuture1.isDone()); assertTrue(coord.triggerCheckpoint(timestamp + 3, false)); assertEquals(2, coord.getNumberOfPendingCheckpoints()); CompletableFuture<CompletedCheckpoint> savepointFuture2 = coord.triggerSavepoint(timestamp + 4, savepointDir); long savepointId2 = counter.getLast(); assertEquals(3, coord.getNumberOfPendingCheckpoints()); // 2nd savepoint should subsume the last checkpoint, but not the 1st savepoint coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, savepointId2), TASK_MANAGER_LOCATION_INFO); coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, savepointId2), TASK_MANAGER_LOCATION_INFO); assertEquals(1, coord.getNumberOfPendingCheckpoints()); assertEquals(2, coord.getNumberOfRetainedSuccessfulCheckpoints()); assertFalse(coord.getPendingCheckpoints().get(savepointId1).isDiscarded()); assertFalse(savepointFuture1.isDone()); assertTrue(savepointFuture2.isDone()); // Ack first savepoint coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, savepointId1), TASK_MANAGER_LOCATION_INFO); coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, savepointId1), TASK_MANAGER_LOCATION_INFO); assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(3, coord.getNumberOfRetainedSuccessfulCheckpoints()); assertTrue(savepointFuture1.isDone()); }
Example 20
Source File: CheckpointCoordinatorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testCheckpointAbortsIfTriggerTasksAreFinished() { try { final JobID jid = new JobID(); final long timestamp = System.currentTimeMillis(); // create some mock Execution vertices that receive the checkpoint trigger messages final ExecutionAttemptID triggerAttemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID triggerAttemptID2 = new ExecutionAttemptID(); ExecutionVertex triggerVertex1 = mockExecutionVertex(triggerAttemptID1); JobVertexID jobVertexID2 = new JobVertexID(); ExecutionVertex triggerVertex2 = mockExecutionVertex( triggerAttemptID2, jobVertexID2, Collections.singletonList(OperatorID.fromJobVertexID(jobVertexID2)), 1, 1, ExecutionState.FINISHED); // create some mock Execution vertices that need to ack the checkpoint final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID(); ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1); ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2); // set up the coordinator and validate the initial state CheckpointCoordinator coord = new CheckpointCoordinator( jid, 600000, 600000, 0, Integer.MAX_VALUE, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, new ExecutionVertex[] { triggerVertex1, triggerVertex2 }, new ExecutionVertex[] { ackVertex1, ackVertex2 }, new ExecutionVertex[] {}, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), new MemoryStateBackend(), Executors.directExecutor(), SharedStateRegistry.DEFAULT_FACTORY); // nothing should be happening assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // trigger the first checkpoint. this should not succeed assertFalse(coord.triggerCheckpoint(timestamp, false)); // still, nothing should be happening assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }