Java Code Examples for org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway#setSendSlotReportFunction()

The following examples show how to use org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway#setSendSlotReportFunction() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private TestingResourceManagerGateway createRmWithTmRegisterAndNotifySlotHooks(
		InstanceID registrationId,
		OneShotLatch taskExecutorIsRegistered,
		CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture) {
	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	resourceManagerLeaderRetriever.notifyListener(
		resourceManagerGateway.getAddress(),
		resourceManagerGateway.getFencingToken().toUUID());

	resourceManagerGateway.setRegisterTaskExecutorFunction(
		taskExecutorRegistration -> CompletableFuture.completedFuture(
			new TaskExecutorRegistrationSuccess(registrationId, resourceManagerGateway.getOwnResourceId(),
				new ClusterInformation("localhost", 1234))));

	resourceManagerGateway.setNotifySlotAvailableConsumer(availableSlotFuture::complete);

	resourceManagerGateway.setSendSlotReportFunction(ignored -> {
		taskExecutorIsRegistered.trigger();
		return CompletableFuture.completedFuture(Acknowledge.get());
	});
	return resourceManagerGateway;
}
 
Example 2
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the {@link TaskExecutor} sends the initial slot report after it
 * registered at the ResourceManager.
 */
@Test
public void testInitialSlotReport() throws Exception {
	final TaskExecutor taskExecutor = createTaskExecutor(1);

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
		final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

		testingResourceManagerGateway.setSendSlotReportFunction(
			resourceIDInstanceIDSlotReportTuple3 -> {
				initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f0);
				return CompletableFuture.completedFuture(Acknowledge.get());
			});

		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		assertThat(initialSlotReportFuture.get(), equalTo(taskExecutor.getResourceID()));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 3
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the {@link TaskExecutor} sends the initial slot report after it
 * registered at the ResourceManager.
 */
@Test
public void testInitialSlotReport() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.setTaskManagerLocation(taskManagerLocation)
		.build();
	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
		final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

		testingResourceManagerGateway.setSendSlotReportFunction(
			resourceIDInstanceIDSlotReportTuple3 -> {
				initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f0);
				return CompletableFuture.completedFuture(Acknowledge.get());
			});

		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		assertThat(initialSlotReportFuture.get(), equalTo(taskManagerLocation.getResourceID()));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 4
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the {@link TaskExecutor} sends the initial slot report after it
 * registered at the ResourceManager.
 */
@Test
public void testInitialSlotReport() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.setTaskManagerLocation(taskManagerLocation)
		.build();
	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
		final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

		testingResourceManagerGateway.setSendSlotReportFunction(
			resourceIDInstanceIDSlotReportTuple3 -> {
				initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f0);
				return CompletableFuture.completedFuture(Acknowledge.get());
			});

		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		assertThat(initialSlotReportFuture.get(), equalTo(taskManagerLocation.getResourceID()));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 5
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the {@link TaskExecutor} tries to reconnect if the initial slot report
 * fails.
 */
@Test
public void testInitialSlotReportFailure() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
	final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation)
		.build();
	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

		final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2);
		testingResourceManagerGateway.setSendSlotReportFunction(
			resourceIDInstanceIDSlotReportTuple3 -> {
				try {
					return responseQueue.take();
				} catch (InterruptedException e) {
					return FutureUtils.completedExceptionally(e);
				}
			});

		final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture(
			new TaskExecutorRegistrationSuccess(
				new InstanceID(),
				testingResourceManagerGateway.getOwnResourceId(),
				new ClusterInformation("foobar", 1234)));

		final CountDownLatch numberRegistrations = new CountDownLatch(2);

		testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
				numberRegistrations.countDown();
				return registrationResponse;
		});

		responseQueue.offer(FutureUtils.completedExceptionally(new FlinkException("Test exception")));
		responseQueue.offer(CompletableFuture.completedFuture(Acknowledge.get()));

		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		//wait for the second registration attempt
		numberRegistrations.await();
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 6
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting
 * the job leader, it will offer all reserved slots to the JobManager.
 */
@Test
public void testJobLeaderDetection() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
	final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
	resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {

			offeredSlotsFuture.complete(new ArrayList<>(slotOffers));
			return CompletableFuture.completedFuture(slotOffers);
		})
		.build();

	rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final AllocationID allocationId = new AllocationID();
	final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	TaskExecutor taskManager = createTaskExecutor(taskManagerServices);

	try {
		taskManager.start();

		final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// tell the task manager about the rm leader
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());

		// wait for the initial slot report
		initialSlotReportFuture.get();

		// request slots from the task manager under the given allocation id
		CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			ResourceProfile.ZERO,
			jobMasterGateway.getAddress(),
			resourceManagerGateway.getFencingToken(),
			timeout);

		slotRequestAck.get();

		// now inform the task manager about the new job leader
		jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get();
		final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList());
		assertThat(allocationIds, containsInAnyOrder(allocationId));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 7
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the correct partition/slot report is sent as part of the heartbeat response.
 */
@Test
public void testHeartbeatReporting() throws Exception {
	final String rmAddress = "rm";
	final UUID rmLeaderId = UUID.randomUUID();

	// register the mock resource manager gateway
	final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway();
	final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>();
	final ResourceID rmResourceId = rmGateway.getOwnResourceId();
	final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture(
		new TaskExecutorRegistrationSuccess(
			new InstanceID(),
			rmResourceId,
			new ClusterInformation("localhost", 1234)));

	rmGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
		taskExecutorRegistrationFuture.complete(taskExecutorRegistration.getResourceId());
		return registrationResponse;
	});

	final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>();
	rmGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<TaskExecutorHeartbeatPayload> heartbeatPayloadCompletableFuture = new CompletableFuture<>();
	rmGateway.setTaskExecutorHeartbeatConsumer((resourceID, heartbeatPayload) -> heartbeatPayloadCompletableFuture.complete(heartbeatPayload));

	rpc.registerGateway(rmAddress, rmGateway);

	final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0);
	final ResourceProfile resourceProfile = ResourceProfile.fromResources(1.0, 1);
	final SlotReport slotReport1 = new SlotReport(
		new SlotStatus(
			slotId,
			resourceProfile));
	final SlotReport slotReport2 = new SlotReport(
		new SlotStatus(
			slotId,
			resourceProfile,
			new JobID(),
			new AllocationID()));

	final Queue<SlotReport> reports = new ArrayDeque<>(Arrays.asList(slotReport1, slotReport2));
	final TaskSlotTable<Task> taskSlotTable = TestingTaskSlotTable
		.<Task>newBuilder()
		.createSlotReportSupplier(reports::poll)
		.closeAsyncReturns(CompletableFuture.completedFuture(null))
		.build();

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TaskExecutorPartitionTracker partitionTracker = createPartitionTrackerWithFixedPartitionReport(taskManagerServices.getShuffleEnvironment());

	final TaskExecutor taskManager = createTaskExecutor(taskManagerServices, HEARTBEAT_SERVICES, partitionTracker);

	try {
		taskManager.start();

		// define a leader and see that a registration happens
		resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId);

		// register resource manager success will trigger monitoring heartbeat target between tm and rm
		assertThat(taskExecutorRegistrationFuture.get(), equalTo(unresolvedTaskManagerLocation.getResourceID()));
		assertThat(initialSlotReportFuture.get(), equalTo(slotReport1));

		TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// trigger the heartbeat asynchronously
		taskExecutorGateway.heartbeatFromResourceManager(rmResourceId);

		// wait for heartbeat response
		SlotReport actualSlotReport = heartbeatPayloadCompletableFuture.get().getSlotReport();

		// the new slot report should be reported
		assertEquals(slotReport2, actualSlotReport);

		ClusterPartitionReport actualClusterPartitionReport = heartbeatPayloadCompletableFuture.get().getClusterPartitionReport();
		assertEquals(partitionTracker.createClusterPartitionReport(), actualClusterPartitionReport);
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 8
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the {@link SlotReport} sent to the RM does not contain
 * out dated/stale information as slots are being requested from the
 * TM.
 *
 * <p>This is a probabilistic test case and needs to be executed
 * several times to produce a failure without the fix for FLINK-12865.
 */
@Test
public void testSlotReportDoesNotContainStaleInformation() throws Exception {
	final OneShotLatch receivedSlotRequest = new OneShotLatch();
	final CompletableFuture<Void> verifySlotReportFuture = new CompletableFuture<>();
	final OneShotLatch terminateSlotReportVerification = new OneShotLatch();
	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	// Assertions for this test
	testingResourceManagerGateway.setTaskExecutorHeartbeatConsumer((ignored, slotReport) -> {
		try {
			final ArrayList<SlotStatus> slots = Lists.newArrayList(slotReport);
			assertThat(slots, hasSize(1));
			final SlotStatus slotStatus = slots.get(0);

			log.info("Received SlotStatus: {}", slotStatus);

			if (receivedSlotRequest.isTriggered()) {
				assertThat(slotStatus.getAllocationID(), is(notNullValue()));
			} else {
				assertThat(slotStatus.getAllocationID(), is(nullValue()));
			}
		} catch (AssertionError e) {
			verifySlotReportFuture.completeExceptionally(e);
		}

		if (terminateSlotReportVerification.isTriggered()) {
			verifySlotReportFuture.complete(null);
		}
	});
	final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>();

	testingResourceManagerGateway.setSendSlotReportFunction(ignored -> {
		taskExecutorRegistrationFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(new AllocateSlotNotifyingTaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService, receivedSlotRequest))
		.build();
	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);
	final ResourceID taskExecutorResourceId = taskManagerServices.getTaskManagerLocation().getResourceID();

	taskExecutor.start();

	final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

	final ScheduledExecutorService heartbeatExecutor = java.util.concurrent.Executors.newSingleThreadScheduledExecutor();

	try {
		taskExecutorRegistrationFuture.get();

		final OneShotLatch scheduleFirstHeartbeat = new OneShotLatch();
		final ResourceID resourceManagerResourceId = testingResourceManagerGateway.getOwnResourceId();
		final long heartbeatInterval = 5L;
		heartbeatExecutor.scheduleWithFixedDelay(
			() -> {
				scheduleFirstHeartbeat.trigger();
				taskExecutorGateway.heartbeatFromResourceManager(resourceManagerResourceId);
			},
			0L,
			heartbeatInterval,
			TimeUnit.MILLISECONDS);

		scheduleFirstHeartbeat.await();

		SlotID slotId = new SlotID(taskExecutorResourceId, 0);
		final CompletableFuture<Acknowledge> requestSlotFuture = taskExecutorGateway.requestSlot(
			slotId,
			jobId,
			new AllocationID(),
			"foobar",
			testingResourceManagerGateway.getFencingToken(),
			timeout);

		requestSlotFuture.get();

		terminateSlotReportVerification.trigger();

		verifySlotReportFuture.get();
	} finally {
		ExecutorUtils.gracefulShutdown(timeout.toMilliseconds(), TimeUnit.MILLISECONDS, heartbeatExecutor);
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 9
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor syncs its slots view with the JobMaster's view
 * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059).
 */
@Test
public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception {
	final CountDownLatch activeSlots = new CountDownLatch(2);
	final TaskSlotTable taskSlotTable = new ActivateSlotNotifyingTaskSlotTable(
			Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
			timerService,
			activeSlots);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

	final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2);

	OneShotLatch initialSlotReporting = new OneShotLatch();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReporting.trigger();
		return CompletableFuture.completedFuture(Acknowledge.get());

	});

	testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 ->
			allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2));

	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2);
	final ResourceID jobManagerResourceId = ResourceID.generate();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
			.setFailSlotConsumer((resourceID, allocationID, throwable) ->
				failedSlotFutures.offer(allocationID))
			.setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers)))
			.setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId)))
			.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	taskExecutor.start();

	try {
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		initialSlotReporting.await();

		final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0);
		final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1);
		final AllocationID allocationIdInBoth = new AllocationID();
		final AllocationID allocationIdOnlyInJM = new AllocationID();
		final AllocationID allocationIdOnlyInTM = new AllocationID();

		taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);
		taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);

		activeSlots.await();

		List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList(
				new AllocatedSlotInfo(0, allocationIdInBoth),
				new AllocatedSlotInfo(1, allocationIdOnlyInJM)
		);
		AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos);
		taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport);

		assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM));
		assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM));
		assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue());
		assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue());
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 10
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a job is removed from the JobLeaderService once a TaskExecutor has
 * no more slots assigned to this job.
 *
 * <p>See FLINK-8504
 */
@Test
public void testRemoveJobFromJobLeaderService() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices);

	try {
		final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
		final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>();
		resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
			initialSlotReport.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		});
		final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken();

		rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID());

		final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>();
		final CompletableFuture<Void> stopFuture = new CompletableFuture<>();

		final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService(
			startFuture,
			stopFuture);
		haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);

		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0);
		final AllocationID allocationId = new AllocationID();

		assertThat(startFuture.isDone(), is(false));
		final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService();
		assertThat(jobLeaderService.containsJob(jobId), is(false));

		// wait for the initial slot report
		initialSlotReport.get();

		taskExecutorGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			ResourceProfile.ZERO,
			"foobar",
			resourceManagerId,
			timeout).get();

		// wait until the job leader retrieval service for jobId is started
		startFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(true));

		taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get();

		// wait that the job leader retrieval service for jobId stopped becaue it should get removed
		stopFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(false));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 11
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the {@link TaskExecutor} tries to reconnect if the initial slot report
 * fails.
 */
@Test
public void testInitialSlotReportFailure() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.setTaskManagerLocation(taskManagerLocation)
		.build();
	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

		final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2);
		testingResourceManagerGateway.setSendSlotReportFunction(
			resourceIDInstanceIDSlotReportTuple3 -> {
				try {
					return responseQueue.take();
				} catch (InterruptedException e) {
					return FutureUtils.completedExceptionally(e);
				}
			});

		final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture(
			new TaskExecutorRegistrationSuccess(
				new InstanceID(),
				testingResourceManagerGateway.getOwnResourceId(),
				new ClusterInformation("foobar", 1234)));

		final CountDownLatch numberRegistrations = new CountDownLatch(2);

		testingResourceManagerGateway.setRegisterTaskExecutorFunction(new Function<Tuple4<String, ResourceID, Integer, HardwareDescription>, CompletableFuture<RegistrationResponse>>() {
			@Override
			public CompletableFuture<RegistrationResponse> apply(Tuple4<String, ResourceID, Integer, HardwareDescription> stringResourceIDIntegerHardwareDescriptionTuple4) {
				numberRegistrations.countDown();
				return registrationResponse;
			}
		});

		responseQueue.offer(FutureUtils.completedExceptionally(new FlinkException("Test exception")));
		responseQueue.offer(CompletableFuture.completedFuture(Acknowledge.get()));

		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		//wait for the second registration attempt
		numberRegistrations.await();
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 12
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the correct slot report is sent as part of the heartbeat response.
 */
@Test
public void testHeartbeatSlotReporting() throws Exception {
	final String rmAddress = "rm";
	final UUID rmLeaderId = UUID.randomUUID();

	// register the mock resource manager gateway
	final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway();
	final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>();
	final ResourceID rmResourceId = rmGateway.getOwnResourceId();
	final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture(
		new TaskExecutorRegistrationSuccess(
			new InstanceID(),
			rmResourceId,
			new ClusterInformation("localhost", 1234)));

	rmGateway.setRegisterTaskExecutorFunction(stringResourceIDIntegerHardwareDescriptionTuple4 -> {
		taskExecutorRegistrationFuture.complete(stringResourceIDIntegerHardwareDescriptionTuple4.f1);
		return registrationResponse;
	});

	final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>();
	rmGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<SlotReport> heartbeatSlotReportFuture = new CompletableFuture<>();
	rmGateway.setTaskExecutorHeartbeatConsumer((resourceID, slotReport) -> heartbeatSlotReportFuture.complete(slotReport));

	rpc.registerGateway(rmAddress, rmGateway);

	final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);
	final ResourceProfile resourceProfile = new ResourceProfile(1.0, 1);
	final SlotReport slotReport1 = new SlotReport(
		new SlotStatus(
			slotId,
			resourceProfile));
	final SlotReport slotReport2 = new SlotReport(
		new SlotStatus(
			slotId,
			resourceProfile,
			new JobID(),
			new AllocationID()));

	final TestingTaskSlotTable taskSlotTable = new TestingTaskSlotTable(new ArrayDeque<>(Arrays.asList(slotReport1, slotReport2)));

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TaskExecutor taskManager = new TaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		HEARTBEAT_SERVICES,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		taskManager.start();

		// define a leader and see that a registration happens
		resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId);

		// register resource manager success will trigger monitoring heartbeat target between tm and rm
		assertThat(taskExecutorRegistrationFuture.get(), equalTo(taskManagerLocation.getResourceID()));
		assertThat(initialSlotReportFuture.get(), equalTo(slotReport1));

		TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// trigger the heartbeat asynchronously
		taskExecutorGateway.heartbeatFromResourceManager(rmResourceId);

		// wait for heartbeat response
		SlotReport actualSlotReport = heartbeatSlotReportFuture.get();

		// the new slot report should be reported
		assertEquals(slotReport2, actualSlotReport);
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 13
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting
 * the job leader, it will offer all reserved slots to the JobManager.
 */
@Test
public void testJobLeaderDetection() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final JobManagerTable jobManagerTable = new JobManagerTable();
	final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
	resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {

			offeredSlotsFuture.complete(new ArrayList<>(slotOffers));
			return CompletableFuture.completedFuture(slotOffers);
		})
		.build();

	rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final AllocationID allocationId = new AllocationID();
	final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setJobManagerTable(jobManagerTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	TaskExecutor taskManager = createTaskExecutor(taskManagerServices);

	try {
		taskManager.start();

		final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// tell the task manager about the rm leader
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());

		// wait for the initial slot report
		initialSlotReportFuture.get();

		// request slots from the task manager under the given allocation id
		CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			jobMasterGateway.getAddress(),
			resourceManagerGateway.getFencingToken(),
			timeout);

		slotRequestAck.get();

		// now inform the task manager about the new job leader
		jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get();
		final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList());
		assertThat(allocationIds, containsInAnyOrder(allocationId));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 14
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the correct slot report is sent as part of the heartbeat response.
 */
@Test
public void testHeartbeatSlotReporting() throws Exception {
	final String rmAddress = "rm";
	final UUID rmLeaderId = UUID.randomUUID();

	// register the mock resource manager gateway
	final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway();
	final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>();
	final ResourceID rmResourceId = rmGateway.getOwnResourceId();
	final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture(
		new TaskExecutorRegistrationSuccess(
			new InstanceID(),
			rmResourceId,
			new ClusterInformation("localhost", 1234)));

	rmGateway.setRegisterTaskExecutorFunction(stringResourceIDIntegerHardwareDescriptionTuple4 -> {
		taskExecutorRegistrationFuture.complete(stringResourceIDIntegerHardwareDescriptionTuple4.f1);
		return registrationResponse;
	});

	final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>();
	rmGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<SlotReport> heartbeatSlotReportFuture = new CompletableFuture<>();
	rmGateway.setTaskExecutorHeartbeatConsumer((resourceID, slotReport) -> heartbeatSlotReportFuture.complete(slotReport));

	rpc.registerGateway(rmAddress, rmGateway);

	final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);
	final ResourceProfile resourceProfile = new ResourceProfile(1.0, 1);
	final SlotReport slotReport1 = new SlotReport(
		new SlotStatus(
			slotId,
			resourceProfile));
	final SlotReport slotReport2 = new SlotReport(
		new SlotStatus(
			slotId,
			resourceProfile,
			new JobID(),
			new AllocationID()));

	final TestingTaskSlotTable taskSlotTable = new TestingTaskSlotTable(new ArrayDeque<>(Arrays.asList(slotReport1, slotReport2)));

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TaskExecutor taskManager = createTaskExecutor(taskManagerServices);

	try {
		taskManager.start();

		// define a leader and see that a registration happens
		resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId);

		// register resource manager success will trigger monitoring heartbeat target between tm and rm
		assertThat(taskExecutorRegistrationFuture.get(), equalTo(taskManagerLocation.getResourceID()));
		assertThat(initialSlotReportFuture.get(), equalTo(slotReport1));

		TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// trigger the heartbeat asynchronously
		taskExecutorGateway.heartbeatFromResourceManager(rmResourceId);

		// wait for heartbeat response
		SlotReport actualSlotReport = heartbeatSlotReportFuture.get();

		// the new slot report should be reported
		assertEquals(slotReport2, actualSlotReport);
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 15
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the {@link SlotReport} sent to the RM does not contain
 * out dated/stale information as slots are being requested from the
 * TM.
 *
 * <p>This is a probabilistic test case and needs to be executed
 * several times to produce a failure without the fix for FLINK-12865.
 */
@Test
public void testSlotReportDoesNotContainStaleInformation() throws Exception {
	final OneShotLatch receivedSlotRequest = new OneShotLatch();
	final CompletableFuture<Void> verifySlotReportFuture = new CompletableFuture<>();
	final OneShotLatch terminateSlotReportVerification = new OneShotLatch();
	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	// Assertions for this test
	testingResourceManagerGateway.setTaskExecutorHeartbeatConsumer((ignored, slotReport) -> {
		try {
			final ArrayList<SlotStatus> slots = Lists.newArrayList(slotReport);
			assertThat(slots, hasSize(1));
			final SlotStatus slotStatus = slots.get(0);

			log.info("Received SlotStatus: {}", slotStatus);

			if (receivedSlotRequest.isTriggered()) {
				assertThat(slotStatus.getAllocationID(), is(notNullValue()));
			} else {
				assertThat(slotStatus.getAllocationID(), is(nullValue()));
			}
		} catch (AssertionError e) {
			verifySlotReportFuture.completeExceptionally(e);
		}

		if (terminateSlotReportVerification.isTriggered()) {
			verifySlotReportFuture.complete(null);
		}
	});
	final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>();

	testingResourceManagerGateway.setSendSlotReportFunction(ignored -> {
		taskExecutorRegistrationFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(new AllocateSlotNotifyingTaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService, receivedSlotRequest))
		.build();
	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);
	final ResourceID taskExecutorResourceId = taskManagerServices.getTaskManagerLocation().getResourceID();

	taskExecutor.start();

	final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

	final ScheduledExecutorService heartbeatExecutor = java.util.concurrent.Executors.newSingleThreadScheduledExecutor();

	try {
		taskExecutorRegistrationFuture.get();

		final OneShotLatch scheduleFirstHeartbeat = new OneShotLatch();
		final ResourceID resourceManagerResourceId = testingResourceManagerGateway.getOwnResourceId();
		final long heartbeatInterval = 5L;
		heartbeatExecutor.scheduleWithFixedDelay(
			() -> {
				scheduleFirstHeartbeat.trigger();
				taskExecutorGateway.heartbeatFromResourceManager(resourceManagerResourceId);
			},
			0L,
			heartbeatInterval,
			TimeUnit.MILLISECONDS);

		scheduleFirstHeartbeat.await();

		SlotID slotId = new SlotID(taskExecutorResourceId, 0);
		final CompletableFuture<Acknowledge> requestSlotFuture = taskExecutorGateway.requestSlot(
			slotId,
			jobId,
			new AllocationID(),
			"foobar",
			testingResourceManagerGateway.getFencingToken(),
			timeout);

		requestSlotFuture.get();

		terminateSlotReportVerification.trigger();

		verifySlotReportFuture.get();
	} finally {
		ExecutorUtils.gracefulShutdown(timeout.toMilliseconds(), TimeUnit.MILLISECONDS, heartbeatExecutor);
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 16
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor syncs its slots view with the JobMaster's view
 * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059).
 */
@Test
public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception {
	final CountDownLatch activeSlots = new CountDownLatch(2);
	final TaskSlotTable taskSlotTable = new ActivateSlotNotifyingTaskSlotTable(
			Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
			timerService,
			activeSlots);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

	final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2);

	OneShotLatch initialSlotReporting = new OneShotLatch();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReporting.trigger();
		return CompletableFuture.completedFuture(Acknowledge.get());

	});

	testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 ->
			allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2));

	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2);
	final ResourceID jobManagerResourceId = ResourceID.generate();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
			.setFailSlotConsumer((resourceID, allocationID, throwable) ->
				failedSlotFutures.offer(allocationID))
			.setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers)))
			.setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId)))
			.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	taskExecutor.start();

	try {
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		initialSlotReporting.await();

		final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0);
		final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1);
		final AllocationID allocationIdInBoth = new AllocationID();
		final AllocationID allocationIdOnlyInJM = new AllocationID();
		final AllocationID allocationIdOnlyInTM = new AllocationID();

		taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);
		taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);

		activeSlots.await();

		List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList(
				new AllocatedSlotInfo(0, allocationIdInBoth),
				new AllocatedSlotInfo(1, allocationIdOnlyInJM)
		);
		AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos);
		taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport);

		assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM));
		assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM));
		assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue());
		assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue());
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 17
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that offers slots to job master timeout and retry.
 */
@Test
public void testOfferSlotToJobMasterAfterTimeout() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final AllocationID allocationId = new AllocationID();

	final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());

	});
	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final CountDownLatch slotOfferings = new CountDownLatch(3);
	final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			assertThat(slotOffers.size(), is(1));
			slotOfferings.countDown();

			if (slotOfferings.getCount() == 0) {
				offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId());
				return CompletableFuture.completedFuture(slotOffers);
			} else {
				return FutureUtils.completedExceptionally(new TimeoutException());
			}
		})
		.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	try {
		taskExecutor.start();
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// wait for the connection to the ResourceManager
		initialSlotReportFuture.get();

		taskExecutorGateway.requestSlot(
			new SlotID(taskExecutor.getResourceID(), 0),
			jobId,
			allocationId,
			ResourceProfile.ZERO,
			jobManagerAddress,
			testingResourceManagerGateway.getFencingToken(),
			timeout).get();

		slotOfferings.await();

		assertThat(offeredSlotFuture.get(), is(allocationId));
		assertTrue(taskSlotTable.isSlotFree(1));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 18
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the {@link TaskExecutor} tries to reconnect if the initial slot report
 * fails.
 */
@Test
public void testInitialSlotReportFailure() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.setTaskManagerLocation(taskManagerLocation)
		.build();
	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

		final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2);
		testingResourceManagerGateway.setSendSlotReportFunction(
			resourceIDInstanceIDSlotReportTuple3 -> {
				try {
					return responseQueue.take();
				} catch (InterruptedException e) {
					return FutureUtils.completedExceptionally(e);
				}
			});

		final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture(
			new TaskExecutorRegistrationSuccess(
				new InstanceID(),
				testingResourceManagerGateway.getOwnResourceId(),
				new ClusterInformation("foobar", 1234)));

		final CountDownLatch numberRegistrations = new CountDownLatch(2);

		testingResourceManagerGateway.setRegisterTaskExecutorFunction(new Function<Tuple4<String, ResourceID, Integer, HardwareDescription>, CompletableFuture<RegistrationResponse>>() {
			@Override
			public CompletableFuture<RegistrationResponse> apply(Tuple4<String, ResourceID, Integer, HardwareDescription> stringResourceIDIntegerHardwareDescriptionTuple4) {
				numberRegistrations.countDown();
				return registrationResponse;
			}
		});

		responseQueue.offer(FutureUtils.completedExceptionally(new FlinkException("Test exception")));
		responseQueue.offer(CompletableFuture.completedFuture(Acknowledge.get()));

		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		//wait for the second registration attempt
		numberRegistrations.await();
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 19
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a job is removed from the JobLeaderService once a TaskExecutor has
 * no more slots assigned to this job.
 *
 * <p>See FLINK-8504
 */
@Test
public void testRemoveJobFromJobLeaderService() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(
		Collections.singleton(ResourceProfile.UNKNOWN),
		timerService);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TestingTaskExecutor taskExecutor = new TestingTaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		HEARTBEAT_SERVICES,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
		final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>();
		resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
			initialSlotReport.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		});
		final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken();

		rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID());

		final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>();
		final CompletableFuture<Void> stopFuture = new CompletableFuture<>();

		final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService(
			startFuture,
			stopFuture);
		haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);

		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);
		final AllocationID allocationId = new AllocationID();

		assertThat(startFuture.isDone(), is(false));
		final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService();
		assertThat(jobLeaderService.containsJob(jobId), is(false));

		// wait for the initial slot report
		initialSlotReport.get();

		taskExecutorGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			"foobar",
			resourceManagerId,
			timeout).get();

		// wait until the job leader retrieval service for jobId is started
		startFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(true));

		taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get();

		// wait that the job leader retrieval service for jobId stopped becaue it should get removed
		stopFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(false));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 20
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting
 * the job leader, it will offer all reserved slots to the JobManager.
 */
@Test
public void testJobLeaderDetection() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final JobManagerTable jobManagerTable = new JobManagerTable();
	final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
	resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {

			offeredSlotsFuture.complete(new ArrayList<>(slotOffers));
			return CompletableFuture.completedFuture(slotOffers);
		})
		.build();

	rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final AllocationID allocationId = new AllocationID();
	final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setJobManagerTable(jobManagerTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	TaskExecutor taskManager = new TaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		HEARTBEAT_SERVICES,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		taskManager.start();

		final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// tell the task manager about the rm leader
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());

		// wait for the initial slot report
		initialSlotReportFuture.get();

		// request slots from the task manager under the given allocation id
		CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			jobMasterGateway.getAddress(),
			resourceManagerGateway.getFencingToken(),
			timeout);

		slotRequestAck.get();

		// now inform the task manager about the new job leader
		jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get();
		final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList());
		assertThat(allocationIds, containsInAnyOrder(allocationId));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}