org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway Java Examples
The following examples show how to use
org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 6 votes |
private TaskExecutorTestingContext createTaskExecutorTestingContext(final TaskSlotTable<Task> taskSlotTable) throws IOException { final OneShotLatch offerSlotsLatch = new OneShotLatch(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { offerSlotsLatch.trigger(); return CompletableFuture.completedFuture(slotOffers); }).build(); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final JobLeaderService jobLeaderService = new DefaultJobLeaderService( unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); TaskExecutorLocalStateStoresManager stateStoresManager = createTaskExecutorLocalStateStoresManager(); final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(stateStoresManager) .build()); jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); return new TaskExecutorTestingContext(jobMasterGateway, taskSlotTable, taskExecutor); }
Example #2
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 6 votes |
private static TestingJobMasterGateway createJobMasterWithSlotOfferAndTaskTerminationHooks( OneShotLatch offerSlotsLatch, OneShotLatch taskInTerminalState, CompletableFuture<Collection<SlotOffer>> offerResultFuture) { return new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { offerSlotsLatch.trigger(); return offerResultFuture; }) .setUpdateTaskExecutionStateFunction(taskExecutionState -> { if (taskExecutionState.getExecutionState().isTerminal()) { taskInTerminalState.trigger(); } return CompletableFuture.completedFuture(Acknowledge.get()); }) .build(); }
Example #3
Source File: DefaultJobLeaderServiceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the JobLeaderService won't try to reconnect to JobMaster after it * has lost the leadership. See FLINK-16836. */ @Test public void doesNotReconnectAfterTargetLostLeadership() throws Exception { final JobID jobId = new JobID(); final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(); final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder() .setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService) .build(); final TestingJobMasterGateway jobMasterGateway = registerJobMaster(); final OneShotLatch jobManagerGainedLeadership = new OneShotLatch(); final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(ignored -> jobManagerGainedLeadership.trigger()); final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener); try { jobLeaderService.addJob(jobId, jobMasterGateway.getAddress()); leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), UUID.randomUUID()); jobManagerGainedLeadership.await(); // revoke the leadership leaderRetrievalService.notifyListener(null, null); testingJobLeaderListener.waitUntilJobManagerLostLeadership(); jobLeaderService.reconnect(jobId); } finally { jobLeaderService.stop(); } }
Example #4
Source File: ResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testHeartbeatTimeoutWithJobMaster() throws Exception { final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>(); final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setResourceManagerHeartbeatConsumer(heartbeatRequestFuture::complete) .setDisconnectResourceManagerConsumer(disconnectFuture::complete) .build(); rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final JobID jobId = new JobID(); final ResourceID jobMasterResourceId = ResourceID.generate(); final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> { assertThat(requestedJobId, is(equalTo(jobId))); return jobMasterLeaderRetrievalService; }); runHeartbeatTimeoutTest( resourceManagerGateway -> { final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager( jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT); assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class)); }, resourceManagerResourceId -> { // might have been completed or not depending whether the timeout was triggered first final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null); assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue()))); assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId))); }); }
Example #5
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 5 votes |
private TaskExecutorTestingContext( TestingJobMasterGateway jobMasterGateway, TaskSlotTable taskSlotTable, TestingTaskExecutor taskExecutor) { this.jobMasterGateway = jobMasterGateway; this.taskSlotTable = taskSlotTable; this.taskExecutor = taskExecutor; }
Example #6
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 5 votes |
private void submitNoOpInvokableTask( AllocationID allocationId, TestingJobMasterGateway jobMasterGateway, TaskExecutorGateway tmGateway) throws IOException { final TaskDeploymentDescriptor tdd = TaskDeploymentDescriptorBuilder .newBuilder(jobId, NoOpInvokable.class) .setAllocationId(allocationId) .build(); tmGateway.submitTask(tdd, jobMasterGateway.getFencingToken(), timeout).join(); }
Example #7
Source File: DefaultJobLeaderServiceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void removeJobWithFailingLeaderRetrievalServiceStopWillStopListeningToLeaderNotifications() throws Exception { final FailingSettableLeaderRetrievalService leaderRetrievalService = new FailingSettableLeaderRetrievalService(); final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder() .setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService) .build(); final JobID jobId = new JobID(); final CompletableFuture<JobID> newLeaderFuture = new CompletableFuture<>(); final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(newLeaderFuture::complete); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().build(); rpcServiceResource.getTestingRpcService().registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener); try { jobLeaderService.addJob(jobId, "foobar"); jobLeaderService.removeJob(jobId); leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); try { newLeaderFuture.get(10, TimeUnit.MILLISECONDS); fail("The leader future should not be completed."); } catch (TimeoutException expected) {} } finally { jobLeaderService.stop(); } }
Example #8
Source File: DefaultJobLeaderServiceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the JobLeaderService can reconnect to an old leader which seemed * to have lost the leadership in between. See FLINK-14316. */ @Test public void canReconnectToOldLeaderWithSameLeaderAddress() throws Exception { final JobID jobId = new JobID(); final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(); final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder() .setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService) .build(); final TestingJobMasterGateway jobMasterGateway = registerJobMaster(); final BlockingQueue<JobID> leadershipQueue = new ArrayBlockingQueue<>(1); final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(leadershipQueue::offer); final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener); try { jobLeaderService.addJob(jobId, jobMasterGateway.getAddress()); final UUID leaderSessionId = UUID.randomUUID(); leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), leaderSessionId); // wait for the first leadership assertThat(leadershipQueue.take(), is(jobId)); // revoke the leadership leaderRetrievalService.notifyListener(null, null); testingJobLeaderListener.waitUntilJobManagerLostLeadership(); leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), leaderSessionId); // check that we obtain the leadership a second time assertThat(leadershipQueue.take(), is(jobId)); } finally { jobLeaderService.stop(); } }
Example #9
Source File: ResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testHeartbeatTimeoutWithJobMaster() throws Exception { final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>(); final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setResourceManagerHeartbeatConsumer(heartbeatRequestFuture::complete) .setDisconnectResourceManagerConsumer(disconnectFuture::complete) .build(); rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final JobID jobId = new JobID(); final ResourceID jobMasterResourceId = ResourceID.generate(); final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> { assertThat(requestedJobId, is(equalTo(jobId))); return jobMasterLeaderRetrievalService; }); runHeartbeatTimeoutTest( resourceManagerGateway -> { final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager( jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT); assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class)); }, resourceManagerResourceId -> { // might have been completed or not depending whether the timeout was triggered first final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null); assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue()))); assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId))); }); }
Example #10
Source File: ResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testHeartbeatTimeoutWithJobMaster() throws Exception { final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>(); final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setResourceManagerHeartbeatConsumer(heartbeatRequestFuture::complete) .setDisconnectResourceManagerConsumer(disconnectFuture::complete) .build(); rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final JobID jobId = new JobID(); final ResourceID jobMasterResourceId = ResourceID.generate(); final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> { assertThat(requestedJobId, is(equalTo(jobId))); return jobMasterLeaderRetrievalService; }); runHeartbeatTimeoutTest( resourceManagerGateway -> { final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager( jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT); assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class)); }, resourceManagerResourceId -> { // might have been completed or not depending whether the timeout was triggered first final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null); assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue()))); assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId))); }); }
Example #11
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting * the job leader, it will offer all reserved slots to the JobManager. */ @Test public void testJobLeaderDetection() throws Exception { final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1); final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { offeredSlotsFuture.complete(new ArrayList<>(slotOffers)); return CompletableFuture.completedFuture(slotOffers); }) .build(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final AllocationID allocationId = new AllocationID(); final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // tell the task manager about the rm leader resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID()); // wait for the initial slot report initialSlotReportFuture.get(); // request slots from the task manager under the given allocation id CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot( slotId, jobId, allocationId, ResourceProfile.ZERO, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken(), timeout); slotRequestAck.get(); // now inform the task manager about the new job leader jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get(); final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList()); assertThat(allocationIds, containsInAnyOrder(allocationId)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #12
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that offers slots to job master timeout and retry. */ @Test public void testOfferSlotToJobMasterAfterTimeout() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable( Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN), timerService); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final AllocationID allocationId = new AllocationID(); final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>(); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final CountDownLatch slotOfferings = new CountDownLatch(3); final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { assertThat(slotOffers.size(), is(1)); slotOfferings.countDown(); if (slotOfferings.getCount() == 0) { offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId()); return CompletableFuture.completedFuture(slotOffers); } else { return FutureUtils.completedExceptionally(new TimeoutException()); } }) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); try { taskExecutor.start(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); // wait for the connection to the ResourceManager initialSlotReportFuture.get(); taskExecutorGateway.requestSlot( new SlotID(taskExecutor.getResourceID(), 0), jobId, allocationId, jobManagerAddress, testingResourceManagerGateway.getFencingToken(), timeout).get(); slotOfferings.await(); assertThat(offeredSlotFuture.get(), is(allocationId)); assertTrue(taskSlotTable.isSlotFree(1)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example #13
Source File: TaskSubmissionTestEnvironment.java From flink with Apache License 2.0 | 4 votes |
public Builder setJobMasterGateway(TestingJobMasterGateway jobMasterGateway) { this.jobMasterGateway = jobMasterGateway; return this; }
Example #14
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that the TaskExecutor syncs its slots view with the JobMaster's view * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059). */ @Test public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception { final CountDownLatch activeSlots = new CountDownLatch(2); final TaskSlotTable taskSlotTable = new ActivateSlotNotifyingTaskSlotTable( Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN), timerService, activeSlots); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2); OneShotLatch initialSlotReporting = new OneShotLatch(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReporting.trigger(); return CompletableFuture.completedFuture(Acknowledge.get()); }); testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 -> allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2)); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2); final ResourceID jobManagerResourceId = ResourceID.generate(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setFailSlotConsumer((resourceID, allocationID, throwable) -> failedSlotFutures.offer(allocationID)) .setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers))) .setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId))) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); taskExecutor.start(); try { final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); initialSlotReporting.await(); final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0); final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1); final AllocationID allocationIdInBoth = new AllocationID(); final AllocationID allocationIdOnlyInJM = new AllocationID(); final AllocationID allocationIdOnlyInTM = new AllocationID(); taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, "foobar", testingResourceManagerGateway.getFencingToken(), timeout); taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, "foobar", testingResourceManagerGateway.getFencingToken(), timeout); activeSlots.await(); List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList( new AllocatedSlotInfo(0, allocationIdInBoth), new AllocatedSlotInfo(1, allocationIdOnlyInJM) ); AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos); taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport); assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM)); assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM)); assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue()); assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue()); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example #15
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the TaskExecutor syncs its slots view with the JobMaster's view * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059). */ @Test public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception { final CountDownLatch activeSlots = new CountDownLatch(2); final TaskSlotTable<Task> taskSlotTable = new ActivateSlotNotifyingTaskSlotTable( 2, activeSlots); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2); OneShotLatch initialSlotReporting = new OneShotLatch(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReporting.trigger(); return CompletableFuture.completedFuture(Acknowledge.get()); }); testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 -> allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2)); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2); final ResourceID jobManagerResourceId = ResourceID.generate(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setFailSlotConsumer((resourceID, allocationID, throwable) -> failedSlotFutures.offer(allocationID)) .setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers))) .setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId))) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); taskExecutor.start(); try { final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); initialSlotReporting.await(); final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0); final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1); final AllocationID allocationIdInBoth = new AllocationID(); final AllocationID allocationIdOnlyInJM = new AllocationID(); final AllocationID allocationIdOnlyInTM = new AllocationID(); taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, ResourceProfile.ZERO, "foobar", testingResourceManagerGateway.getFencingToken(), timeout); taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, ResourceProfile.ZERO, "foobar", testingResourceManagerGateway.getFencingToken(), timeout); activeSlots.await(); List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList( new AllocatedSlotInfo(0, allocationIdInBoth), new AllocatedSlotInfo(1, allocationIdOnlyInJM) ); AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos); taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport); assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM)); assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM)); assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue()); assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue()); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example #16
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that offers slots to job master timeout and retry. */ @Test public void testOfferSlotToJobMasterAfterTimeout() throws Exception { final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final AllocationID allocationId = new AllocationID(); final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>(); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final CountDownLatch slotOfferings = new CountDownLatch(3); final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { assertThat(slotOffers.size(), is(1)); slotOfferings.countDown(); if (slotOfferings.getCount() == 0) { offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId()); return CompletableFuture.completedFuture(slotOffers); } else { return FutureUtils.completedExceptionally(new TimeoutException()); } }) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); try { taskExecutor.start(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); // wait for the connection to the ResourceManager initialSlotReportFuture.get(); taskExecutorGateway.requestSlot( new SlotID(taskExecutor.getResourceID(), 0), jobId, allocationId, ResourceProfile.ZERO, jobManagerAddress, testingResourceManagerGateway.getFencingToken(), timeout).get(); slotOfferings.await(); assertThat(offeredSlotFuture.get(), is(allocationId)); assertTrue(taskSlotTable.isSlotFree(1)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example #17
Source File: TaskExecutorSubmissionTest.java From flink with Apache License 2.0 | 4 votes |
@Test(timeout = 10000L) public void testRunJobWithForwardChannel() throws Exception { ResourceID producerLocation = ResourceID.generate(); NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation); TaskDeploymentDescriptor tdd1 = createSender(sdd); TaskDeploymentDescriptor tdd2 = createReceiver(sdd); ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId(); ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId(); final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>(); final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>(); final CompletableFuture<Void> task1FinishedFuture = new CompletableFuture<>(); final CompletableFuture<Void> task2FinishedFuture = new CompletableFuture<>(); final JobMasterId jobMasterId = JobMasterId.generate(); TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder() .setFencingTokenSupplier(() -> jobMasterId) .setScheduleOrUpdateConsumersFunction( resultPartitionID -> CompletableFuture.completedFuture(Acknowledge.get())) .build(); try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId) .setResourceID(producerLocation) .setSlotSize(2) .addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture) .addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture) .addTaskManagerActionListener(eid1, ExecutionState.FINISHED, task1FinishedFuture) .addTaskManagerActionListener(eid2, ExecutionState.FINISHED, task2FinishedFuture) .setJobMasterId(jobMasterId) .setJobMasterGateway(testingJobMasterGateway) .useRealNonMockShuffleEnvironment() .build()) { TaskExecutorGateway tmGateway = env.getTaskExecutorGateway(); TaskSlotTable taskSlotTable = env.getTaskSlotTable(); taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60)); tmGateway.submitTask(tdd1, jobMasterId, timeout).get(); task1RunningFuture.get(); taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60)); tmGateway.submitTask(tdd2, jobMasterId, timeout).get(); task2RunningFuture.get(); task1FinishedFuture.get(); task2FinishedFuture.get(); assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FINISHED); assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.FINISHED); } }
Example #18
Source File: TaskExecutorSubmissionTest.java From flink with Apache License 2.0 | 4 votes |
/** * This tests creates two tasks. The sender sends data but fails to send the * state update back to the job manager. * the second one blocks to be canceled */ @Test(timeout = 10000L) public void testCancellingDependentAndStateUpdateFails() throws Exception { ResourceID producerLocation = ResourceID.generate(); NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation); TaskDeploymentDescriptor tdd1 = createSender(sdd); TaskDeploymentDescriptor tdd2 = createReceiver(sdd); ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId(); ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId(); final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>(); final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>(); final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>(); final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>(); final JobMasterId jobMasterId = JobMasterId.generate(); TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder() .setFencingTokenSupplier(() -> jobMasterId) .setUpdateTaskExecutionStateFunction(taskExecutionState -> { if (taskExecutionState != null && taskExecutionState.getID().equals(eid1)) { return FutureUtils.completedExceptionally( new ExecutionGraphException("The execution attempt " + eid2 + " was not found.")); } else { return CompletableFuture.completedFuture(Acknowledge.get()); } }) .build(); try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId) .setResourceID(producerLocation) .setSlotSize(2) .addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture) .addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture) .addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture) .addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture) .setJobMasterId(jobMasterId) .setJobMasterGateway(testingJobMasterGateway) .useRealNonMockShuffleEnvironment() .build()) { TaskExecutorGateway tmGateway = env.getTaskExecutorGateway(); TaskSlotTable taskSlotTable = env.getTaskSlotTable(); taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60)); tmGateway.submitTask(tdd1, jobMasterId, timeout).get(); task1RunningFuture.get(); taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60)); tmGateway.submitTask(tdd2, jobMasterId, timeout).get(); task2RunningFuture.get(); task1FailedFuture.get(); assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED); tmGateway.cancelTask(eid2, timeout); task2CanceledFuture.get(); assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED); } }
Example #19
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * This tests task executor receive SubmitTask before OfferSlot response. */ @Test public void testSubmitTaskBeforeAcceptSlot() throws Exception { final InstanceID registrationId = new InstanceID(); final OneShotLatch taskExecutorIsRegistered = new OneShotLatch(); final CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture = new CompletableFuture<>(); final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(registrationId, taskExecutorIsRegistered, availableSlotFuture); final AllocationID allocationId1 = new AllocationID(); final AllocationID allocationId2 = new AllocationID(); final SlotOffer offer1 = new SlotOffer(allocationId1, 0, ResourceProfile.ANY); final OneShotLatch offerSlotsLatch = new OneShotLatch(); final OneShotLatch taskInTerminalState = new OneShotLatch(); final CompletableFuture<Collection<SlotOffer>> offerResultFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = createJobMasterWithSlotOfferAndTaskTerminationHooks(offerSlotsLatch, taskInTerminalState, offerResultFuture); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2); final TaskManagerServices taskManagerServices = createTaskManagerServicesWithTaskSlotTable(taskSlotTable); final TestingTaskExecutor taskManager = createTestingTaskExecutor(taskManagerServices); try { taskManager.start(); taskManager.waitUntilStarted(); final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // wait until registered at the RM taskExecutorIsRegistered.await(); // request 2 slots for the given allocation ids requestSlots( tmGateway, Arrays.asList(allocationId1, allocationId2), resourceManagerGateway.getFencingToken(), jobMasterGateway.getAddress()); // notify job leader to start slot offering jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); // wait until slots have been offered offerSlotsLatch.await(); submitNoOpInvokableTask(allocationId1, jobMasterGateway, tmGateway); // acknowledge the offered slots offerResultFuture.complete(Collections.singleton(offer1)); // check that the rejected slot will be made available again final Tuple3<InstanceID, SlotID, AllocationID> instanceIDSlotIDAllocationIDTuple3 = availableSlotFuture.get(); assertThat(instanceIDSlotIDAllocationIDTuple3.f2, equalTo(allocationId2)); // wait for the task completion taskInTerminalState.await(); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #20
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that accepted slots go into state assigned and the others are returned to the resource * manager. */ @Test public void testSlotAcceptance() throws Exception { final InstanceID registrationId = new InstanceID(); final OneShotLatch taskExecutorIsRegistered = new OneShotLatch(); final CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture = new CompletableFuture<>(); final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(registrationId, taskExecutorIsRegistered, availableSlotFuture); final AllocationID allocationId1 = new AllocationID(); final AllocationID allocationId2 = new AllocationID(); final SlotOffer offer1 = new SlotOffer(allocationId1, 0, ResourceProfile.ANY); final OneShotLatch offerSlotsLatch = new OneShotLatch(); final OneShotLatch taskInTerminalState = new OneShotLatch(); final CompletableFuture<Collection<SlotOffer>> offerResultFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = createJobMasterWithSlotOfferAndTaskTerminationHooks(offerSlotsLatch, taskInTerminalState, offerResultFuture); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2); final TaskManagerServices taskManagerServices = createTaskManagerServicesWithTaskSlotTable(taskSlotTable); final TestingTaskExecutor taskManager = createTestingTaskExecutor(taskManagerServices); try { taskManager.start(); taskManager.waitUntilStarted(); final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // wait until registered at the RM taskExecutorIsRegistered.await(); // request 2 slots for the given allocation ids requestSlots( tmGateway, Arrays.asList(allocationId1, allocationId2), resourceManagerGateway.getFencingToken(), jobMasterGateway.getAddress()); // notify job leader to start slot offering jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); // wait until slots have been offered offerSlotsLatch.await(); offerResultFuture.complete(Collections.singletonList(offer1)); final Tuple3<InstanceID, SlotID, AllocationID> instanceIDSlotIDAllocationIDTuple3 = availableSlotFuture.get(); final Tuple3<InstanceID, SlotID, AllocationID> expectedResult = Tuple3.of(registrationId, new SlotID(unresolvedTaskManagerLocation.getResourceID(), 1), allocationId2); assertThat(instanceIDSlotIDAllocationIDTuple3, equalTo(expectedResult)); // the slot 1 can be activate for task submission submitNoOpInvokableTask(allocationId1, jobMasterGateway, tmGateway); // wait for the task completion taskInTerminalState.await(); // the slot 2 can NOT be activate for task submission try { submitNoOpInvokableTask(allocationId2, jobMasterGateway, tmGateway); fail("It should not be possible to submit task to acquired by JM slot with index 1 (allocationId2)"); } catch (CompletionException e) { assertThat(e.getCause(), instanceOf(TaskSubmissionException.class)); } // the slot 2 is free to request tmGateway .requestSlot( new SlotID(unresolvedTaskManagerLocation.getResourceID(), 1), jobId, allocationId2, ResourceProfile.UNKNOWN, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken(), timeout) .join(); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #21
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the TaskExecutor syncs its slots view with the JobMaster's view * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059). */ @Test public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception { final CountDownLatch activeSlots = new CountDownLatch(2); final TaskSlotTable taskSlotTable = new ActivateSlotNotifyingTaskSlotTable( Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN), timerService, activeSlots); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2); OneShotLatch initialSlotReporting = new OneShotLatch(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReporting.trigger(); return CompletableFuture.completedFuture(Acknowledge.get()); }); testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 -> allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2)); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2); final ResourceID jobManagerResourceId = ResourceID.generate(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setFailSlotConsumer((resourceID, allocationID, throwable) -> failedSlotFutures.offer(allocationID)) .setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers))) .setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId))) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); taskExecutor.start(); try { final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); initialSlotReporting.await(); final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0); final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1); final AllocationID allocationIdInBoth = new AllocationID(); final AllocationID allocationIdOnlyInJM = new AllocationID(); final AllocationID allocationIdOnlyInTM = new AllocationID(); taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, "foobar", testingResourceManagerGateway.getFencingToken(), timeout); taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, "foobar", testingResourceManagerGateway.getFencingToken(), timeout); activeSlots.await(); List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList( new AllocatedSlotInfo(0, allocationIdInBoth), new AllocatedSlotInfo(1, allocationIdOnlyInJM) ); AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos); taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport); assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM)); assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM)); assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue()); assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue()); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example #22
Source File: DefaultJobLeaderServiceTest.java From flink with Apache License 2.0 | 4 votes |
private TestingJobMasterGateway registerJobMaster() { final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().build(); rpcServiceResource.getTestingRpcService().registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); return jobMasterGateway; }
Example #23
Source File: TaskExecutorSubmissionTest.java From flink with Apache License 2.0 | 4 votes |
/** * Test that a failing schedule or update consumers call leads to the failing of the respective * task. * * <p>IMPORTANT: We have to make sure that the invokable's cancel method is called, because only * then the future is completed. We do this by not eagerly deploying consumer tasks and requiring * the invokable to fill one memory segment. The completed memory segment will trigger the * scheduling of the downstream operator since it is in pipeline mode. After we've filled the * memory segment, we'll block the invokable and wait for the task failure due to the failed * schedule or update consumers call. */ @Test(timeout = 10000L) public void testFailingScheduleOrUpdateConsumers() throws Exception { final Configuration configuration = new Configuration(); // set the memory segment to the smallest size possible, because we have to fill one // memory buffer to trigger the schedule or update consumers message to the downstream // operators configuration.setString(TaskManagerOptions.MEMORY_SEGMENT_SIZE, "4096"); NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate()); TaskDeploymentDescriptor tdd = createSender(sdd, TestingAbstractInvokables.TestInvokableRecordCancel.class); ExecutionAttemptID eid = tdd.getExecutionAttemptId(); final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>(); final Exception exception = new Exception("Failed schedule or update consumers"); final JobMasterId jobMasterId = JobMasterId.generate(); TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder() .setFencingTokenSupplier(() -> jobMasterId) .setUpdateTaskExecutionStateFunction(resultPartitionID -> FutureUtils.completedExceptionally(exception)) .build(); try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId) .setSlotSize(1) .setConfiguration(configuration) .addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture) .setJobMasterId(jobMasterId) .setJobMasterGateway(testingJobMasterGateway) .useRealNonMockShuffleEnvironment() .build()) { TaskExecutorGateway tmGateway = env.getTaskExecutorGateway(); TaskSlotTable taskSlotTable = env.getTaskSlotTable(); TestingAbstractInvokables.TestInvokableRecordCancel.resetGotCanceledFuture(); taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60)); tmGateway.submitTask(tdd, jobMasterId, timeout).get(); taskRunningFuture.get(); CompletableFuture<Boolean> cancelFuture = TestingAbstractInvokables.TestInvokableRecordCancel.gotCanceled(); assertTrue(cancelFuture.get()); assertTrue(ExceptionUtils.findThrowableWithMessage(taskSlotTable.getTask(eid).getFailureCause(), exception.getMessage()).isPresent()); } }
Example #24
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting * the job leader, it will offer all reserved slots to the JobManager. */ @Test public void testJobLeaderDetection() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final JobManagerTable jobManagerTable = new JobManagerTable(); final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { offeredSlotsFuture.complete(new ArrayList<>(slotOffers)); return CompletableFuture.completedFuture(slotOffers); }) .build(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final AllocationID allocationId = new AllocationID(); final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setJobManagerTable(jobManagerTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // tell the task manager about the rm leader resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID()); // wait for the initial slot report initialSlotReportFuture.get(); // request slots from the task manager under the given allocation id CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot( slotId, jobId, allocationId, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken(), timeout); slotRequestAck.get(); // now inform the task manager about the new job leader jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get(); final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList()); assertThat(allocationIds, containsInAnyOrder(allocationId)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #25
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting * the job leader, it will offer all reserved slots to the JobManager. */ @Test public void testJobLeaderDetection() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final JobManagerTable jobManagerTable = new JobManagerTable(); final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { offeredSlotsFuture.complete(new ArrayList<>(slotOffers)); return CompletableFuture.completedFuture(slotOffers); }) .build(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final AllocationID allocationId = new AllocationID(); final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setJobManagerTable(jobManagerTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); TaskExecutor taskManager = new TaskExecutor( rpc, taskManagerConfiguration, haServices, taskManagerServices, HEARTBEAT_SERVICES, UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(), null, dummyBlobCacheService, testingFatalErrorHandler); try { taskManager.start(); final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // tell the task manager about the rm leader resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID()); // wait for the initial slot report initialSlotReportFuture.get(); // request slots from the task manager under the given allocation id CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot( slotId, jobId, allocationId, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken(), timeout); slotRequestAck.get(); // now inform the task manager about the new job leader jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get(); final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList()); assertThat(allocationIds, containsInAnyOrder(allocationId)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #26
Source File: TaskExecutorSubmissionTest.java From flink with Apache License 2.0 | 4 votes |
/** * Test that a failing schedule or update consumers call leads to the failing of the respective * task. * * <p>IMPORTANT: We have to make sure that the invokable's cancel method is called, because only * then the future is completed. We do this by not eagerly deploying consumer tasks and requiring * the invokable to fill one memory segment. The completed memory segment will trigger the * scheduling of the downstream operator since it is in pipeline mode. After we've filled the * memory segment, we'll block the invokable and wait for the task failure due to the failed * schedule or update consumers call. */ @Test(timeout = TEST_TIMEOUT) public void testFailingScheduleOrUpdateConsumers() throws Exception { final Configuration configuration = new Configuration(); // set the memory segment to the smallest size possible, because we have to fill one // memory buffer to trigger the schedule or update consumers message to the downstream // operators configuration.set(TaskManagerOptions.MEMORY_SEGMENT_SIZE, MemorySize.parse("4096")); NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate()); TaskDeploymentDescriptor tdd = createSender(sdd, TestingAbstractInvokables.TestInvokableRecordCancel.class); ExecutionAttemptID eid = tdd.getExecutionAttemptId(); final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>(); final Exception exception = new Exception("Failed schedule or update consumers"); final JobMasterId jobMasterId = JobMasterId.generate(); TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder() .setFencingTokenSupplier(() -> jobMasterId) .setUpdateTaskExecutionStateFunction(resultPartitionID -> FutureUtils.completedExceptionally(exception)) .build(); try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId) .setSlotSize(1) .setConfiguration(configuration) .addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture) .setJobMasterId(jobMasterId) .setJobMasterGateway(testingJobMasterGateway) .useRealNonMockShuffleEnvironment() .build()) { TaskExecutorGateway tmGateway = env.getTaskExecutorGateway(); TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable(); TestingAbstractInvokables.TestInvokableRecordCancel.resetGotCanceledFuture(); taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60)); tmGateway.submitTask(tdd, jobMasterId, timeout).get(); taskRunningFuture.get(); CompletableFuture<Boolean> cancelFuture = TestingAbstractInvokables.TestInvokableRecordCancel.gotCanceled(); assertTrue(cancelFuture.get()); assertTrue(ExceptionUtils.findThrowableWithMessage(taskSlotTable.getTask(eid).getFailureCause(), exception.getMessage()).isPresent()); } }
Example #27
Source File: TaskExecutorSubmissionTest.java From flink with Apache License 2.0 | 4 votes |
/** * This tests creates two tasks. The sender sends data but fails to send the * state update back to the job manager. * the second one blocks to be canceled */ @Test(timeout = TEST_TIMEOUT) public void testCancellingDependentAndStateUpdateFails() throws Exception { ResourceID producerLocation = ResourceID.generate(); NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation); TaskDeploymentDescriptor tdd1 = createSender(sdd); TaskDeploymentDescriptor tdd2 = createReceiver(sdd); ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId(); ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId(); final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>(); final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>(); final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>(); final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>(); final JobMasterId jobMasterId = JobMasterId.generate(); TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder() .setFencingTokenSupplier(() -> jobMasterId) .setUpdateTaskExecutionStateFunction(taskExecutionState -> { if (taskExecutionState != null && taskExecutionState.getID().equals(eid1)) { return FutureUtils.completedExceptionally( new ExecutionGraphException("The execution attempt " + eid2 + " was not found.")); } else { return CompletableFuture.completedFuture(Acknowledge.get()); } }) .build(); try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId) .setResourceID(producerLocation) .setSlotSize(2) .addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture) .addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture) .addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture) .addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture) .setJobMasterId(jobMasterId) .setJobMasterGateway(testingJobMasterGateway) .useRealNonMockShuffleEnvironment() .build()) { TaskExecutorGateway tmGateway = env.getTaskExecutorGateway(); TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable(); taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60)); tmGateway.submitTask(tdd1, jobMasterId, timeout).get(); task1RunningFuture.get(); taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60)); tmGateway.submitTask(tdd2, jobMasterId, timeout).get(); task2RunningFuture.get(); task1FailedFuture.get(); assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED); tmGateway.cancelTask(eid2, timeout); task2CanceledFuture.get(); assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED); } }
Example #28
Source File: TaskExecutorSubmissionTest.java From flink with Apache License 2.0 | 4 votes |
@Test(timeout = TEST_TIMEOUT) public void testRunJobWithForwardChannel() throws Exception { ResourceID producerLocation = ResourceID.generate(); NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation); TaskDeploymentDescriptor tdd1 = createSender(sdd); TaskDeploymentDescriptor tdd2 = createReceiver(sdd); ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId(); ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId(); final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>(); final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>(); final CompletableFuture<Void> task1FinishedFuture = new CompletableFuture<>(); final CompletableFuture<Void> task2FinishedFuture = new CompletableFuture<>(); final JobMasterId jobMasterId = JobMasterId.generate(); TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder() .setFencingTokenSupplier(() -> jobMasterId) .setScheduleOrUpdateConsumersFunction( resultPartitionID -> CompletableFuture.completedFuture(Acknowledge.get())) .build(); try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId) .setResourceID(producerLocation) .setSlotSize(2) .addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture) .addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture) .addTaskManagerActionListener(eid1, ExecutionState.FINISHED, task1FinishedFuture) .addTaskManagerActionListener(eid2, ExecutionState.FINISHED, task2FinishedFuture) .setJobMasterId(jobMasterId) .setJobMasterGateway(testingJobMasterGateway) .useRealNonMockShuffleEnvironment() .build()) { TaskExecutorGateway tmGateway = env.getTaskExecutorGateway(); TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable(); taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60)); tmGateway.submitTask(tdd1, jobMasterId, timeout).get(); task1RunningFuture.get(); taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60)); tmGateway.submitTask(tdd2, jobMasterId, timeout).get(); task2RunningFuture.get(); task1FinishedFuture.get(); task2FinishedFuture.get(); assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FINISHED); assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.FINISHED); } }
Example #29
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that offers slots to job master timeout and retry. */ @Test public void testOfferSlotToJobMasterAfterTimeout() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable( Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN), timerService); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final AllocationID allocationId = new AllocationID(); final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>(); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final CountDownLatch slotOfferings = new CountDownLatch(3); final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { assertThat(slotOffers.size(), is(1)); slotOfferings.countDown(); if (slotOfferings.getCount() == 0) { offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId()); return CompletableFuture.completedFuture(slotOffers); } else { return FutureUtils.completedExceptionally(new TimeoutException()); } }) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); try { taskExecutor.start(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); // wait for the connection to the ResourceManager initialSlotReportFuture.get(); taskExecutorGateway.requestSlot( new SlotID(taskExecutor.getResourceID(), 0), jobId, allocationId, jobManagerAddress, testingResourceManagerGateway.getFencingToken(), timeout).get(); slotOfferings.await(); assertThat(offeredSlotFuture.get(), is(allocationId)); assertTrue(taskSlotTable.isSlotFree(1)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example #30
Source File: TaskSubmissionTestEnvironment.java From flink with Apache License 2.0 | 4 votes |
public Builder setJobMasterGateway(TestingJobMasterGateway jobMasterGateway) { this.jobMasterGateway = jobMasterGateway; return this; }