Java Code Examples for org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway#getFencingToken()

The following examples show how to use org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway#getFencingToken() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private ResourceManagerId createAndRegisterResourceManager(
		CompletableFuture<Tuple3<ResourceID, InstanceID, SlotReport>> initialSlotReportFuture) {
	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});
	rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);

	// tell the task manager about the rm leader
	resourceManagerLeaderRetriever.notifyListener(
		resourceManagerGateway.getAddress(),
		resourceManagerGateway.getFencingToken().toUUID());

	return resourceManagerGateway.getFencingToken();
}
 
Example 2
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that we continue reconnecting to the latest known RM after a disconnection
 * message.
 */
@Test
public void testReconnectionAfterDisconnect() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
		final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
		final BlockingQueue<JobMasterId> registrationsQueue = new ArrayBlockingQueue<>(1);

		testingResourceManagerGateway.setRegisterJobManagerConsumer(
			jobMasterIdResourceIDStringJobIDTuple4 -> registrationsQueue.offer(jobMasterIdResourceIDStringJobIDTuple4.f0));

		final ResourceManagerId resourceManagerId = testingResourceManagerGateway.getFencingToken();
		notifyResourceManagerLeaderListeners(testingResourceManagerGateway);

		// wait for first registration attempt
		final JobMasterId firstRegistrationAttempt = registrationsQueue.take();

		assertThat(firstRegistrationAttempt, equalTo(jobMasterId));

		assertThat(registrationsQueue.isEmpty(), is(true));
		jobMasterGateway.disconnectResourceManager(resourceManagerId, new FlinkException("Test exception"));

		// wait for the second registration attempt after the disconnect call
		assertThat(registrationsQueue.take(), equalTo(jobMasterId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 3
Source File: JobMasterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that we continue reconnecting to the latest known RM after a disconnection
 * message.
 */
@Test
public void testReconnectionAfterDisconnect() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
		final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
		final BlockingQueue<JobMasterId> registrationsQueue = new ArrayBlockingQueue<>(1);

		testingResourceManagerGateway.setRegisterJobManagerConsumer(
			jobMasterIdResourceIDStringJobIDTuple4 -> registrationsQueue.offer(jobMasterIdResourceIDStringJobIDTuple4.f0));

		final ResourceManagerId resourceManagerId = testingResourceManagerGateway.getFencingToken();
		notifyResourceManagerLeaderListeners(testingResourceManagerGateway);

		// wait for first registration attempt
		final JobMasterId firstRegistrationAttempt = registrationsQueue.take();

		assertThat(firstRegistrationAttempt, equalTo(jobMasterId));

		assertThat(registrationsQueue.isEmpty(), is(true));
		jobMasterGateway.disconnectResourceManager(resourceManagerId, new FlinkException("Test exception"));

		// wait for the second registration attempt after the disconnect call
		assertThat(registrationsQueue.take(), equalTo(jobMasterId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 4
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a job is removed from the JobLeaderService once a TaskExecutor has
 * no more slots assigned to this job.
 *
 * <p>See FLINK-8504
 */
@Test
public void testRemoveJobFromJobLeaderService() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(
		Collections.singleton(ResourceProfile.UNKNOWN),
		timerService);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TestingTaskExecutor taskExecutor = new TestingTaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		HEARTBEAT_SERVICES,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
		final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>();
		resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
			initialSlotReport.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		});
		final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken();

		rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID());

		final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>();
		final CompletableFuture<Void> stopFuture = new CompletableFuture<>();

		final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService(
			startFuture,
			stopFuture);
		haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);

		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);
		final AllocationID allocationId = new AllocationID();

		assertThat(startFuture.isDone(), is(false));
		final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService();
		assertThat(jobLeaderService.containsJob(jobId), is(false));

		// wait for the initial slot report
		initialSlotReport.get();

		taskExecutorGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			"foobar",
			resourceManagerId,
			timeout).get();

		// wait until the job leader retrieval service for jobId is started
		startFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(true));

		taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get();

		// wait that the job leader retrieval service for jobId stopped becaue it should get removed
		stopFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(false));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 5
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a job is removed from the JobLeaderService once a TaskExecutor has
 * no more slots assigned to this job.
 *
 * <p>See FLINK-8504
 */
@Test
public void testRemoveJobFromJobLeaderService() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(
		Collections.singleton(ResourceProfile.UNKNOWN),
		timerService);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices);

	try {
		final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
		final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>();
		resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
			initialSlotReport.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		});
		final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken();

		rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID());

		final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>();
		final CompletableFuture<Void> stopFuture = new CompletableFuture<>();

		final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService(
			startFuture,
			stopFuture);
		haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);

		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);
		final AllocationID allocationId = new AllocationID();

		assertThat(startFuture.isDone(), is(false));
		final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService();
		assertThat(jobLeaderService.containsJob(jobId), is(false));

		// wait for the initial slot report
		initialSlotReport.get();

		taskExecutorGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			"foobar",
			resourceManagerId,
			timeout).get();

		// wait until the job leader retrieval service for jobId is started
		startFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(true));

		taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get();

		// wait that the job leader retrieval service for jobId stopped becaue it should get removed
		stopFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(false));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 6
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that we continue reconnecting to the latest known RM after a disconnection
 * message.
 */
@Test
public void testReconnectionAfterDisconnect() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build(),
		heartbeatServices);

	final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
		final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
		final BlockingQueue<JobMasterId> registrationsQueue = new ArrayBlockingQueue<>(1);

		testingResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
			registrationsQueue.offer(jobMasterId);
			return CompletableFuture.completedFuture(testingResourceManagerGateway.getJobMasterRegistrationSuccess());
		});

		final ResourceManagerId resourceManagerId = testingResourceManagerGateway.getFencingToken();
		notifyResourceManagerLeaderListeners(testingResourceManagerGateway);

		// wait for first registration attempt
		final JobMasterId firstRegistrationAttempt = registrationsQueue.take();

		assertThat(firstRegistrationAttempt, equalTo(jobMasterId));

		assertThat(registrationsQueue.isEmpty(), is(true));
		jobMasterGateway.disconnectResourceManager(resourceManagerId, new FlinkException("Test exception"));

		// wait for the second registration attempt after the disconnect call
		assertThat(registrationsQueue.take(), equalTo(jobMasterId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 7
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a job is removed from the JobLeaderService once a TaskExecutor has
 * no more slots assigned to this job.
 *
 * <p>See FLINK-8504
 */
@Test
public void testRemoveJobFromJobLeaderService() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices);

	try {
		final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
		final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>();
		resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
			initialSlotReport.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		});
		final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken();

		rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID());

		final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>();
		final CompletableFuture<Void> stopFuture = new CompletableFuture<>();

		final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService(
			startFuture,
			stopFuture);
		haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);

		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0);
		final AllocationID allocationId = new AllocationID();

		assertThat(startFuture.isDone(), is(false));
		final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService();
		assertThat(jobLeaderService.containsJob(jobId), is(false));

		// wait for the initial slot report
		initialSlotReport.get();

		taskExecutorGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			ResourceProfile.ZERO,
			"foobar",
			resourceManagerId,
			timeout).get();

		// wait until the job leader retrieval service for jobId is started
		startFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(true));

		taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get();

		// wait that the job leader retrieval service for jobId stopped becaue it should get removed
		stopFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(false));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}