Java Code Examples for org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway#setRegisterJobManagerConsumer()

The following examples show how to use org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway#setRegisterJobManagerConsumer() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that we can close an unestablished ResourceManager connection.
 */
@Test
public void testCloseUnestablishedResourceManagerConnection() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		jobMaster.start(JobMasterId.generate()).get();

		final TestingResourceManagerGateway firstResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
		final TestingResourceManagerGateway secondResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();

		final OneShotLatch firstJobManagerRegistration = new OneShotLatch();
		final OneShotLatch secondJobManagerRegistration = new OneShotLatch();

		firstResourceManagerGateway.setRegisterJobManagerConsumer(
			jobMasterIdResourceIDStringJobIDTuple4 -> firstJobManagerRegistration.trigger());

		secondResourceManagerGateway.setRegisterJobManagerConsumer(
			jobMasterIdResourceIDStringJobIDTuple4 -> secondJobManagerRegistration.trigger());

		notifyResourceManagerLeaderListeners(firstResourceManagerGateway);

		// wait until we have seen the first registration attempt
		firstJobManagerRegistration.await();

		// this should stop the connection attempts towards the first RM
		notifyResourceManagerLeaderListeners(secondResourceManagerGateway);

		// check that we start registering at the second RM
		secondJobManagerRegistration.await();
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 2
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that we continue reconnecting to the latest known RM after a disconnection
 * message.
 */
@Test
public void testReconnectionAfterDisconnect() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
		final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
		final BlockingQueue<JobMasterId> registrationsQueue = new ArrayBlockingQueue<>(1);

		testingResourceManagerGateway.setRegisterJobManagerConsumer(
			jobMasterIdResourceIDStringJobIDTuple4 -> registrationsQueue.offer(jobMasterIdResourceIDStringJobIDTuple4.f0));

		final ResourceManagerId resourceManagerId = testingResourceManagerGateway.getFencingToken();
		notifyResourceManagerLeaderListeners(testingResourceManagerGateway);

		// wait for first registration attempt
		final JobMasterId firstRegistrationAttempt = registrationsQueue.take();

		assertThat(firstRegistrationAttempt, equalTo(jobMasterId));

		assertThat(registrationsQueue.isEmpty(), is(true));
		jobMasterGateway.disconnectResourceManager(resourceManagerId, new FlinkException("Test exception"));

		// wait for the second registration attempt after the disconnect call
		assertThat(registrationsQueue.take(), equalTo(jobMasterId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 3
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the a JM connects to the leading RM after regaining leadership.
 */
@Test
public void testResourceManagerConnectionAfterRegainingLeadership() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();

		final BlockingQueue<JobMasterId> registrationQueue = new ArrayBlockingQueue<>(1);
		testingResourceManagerGateway.setRegisterJobManagerConsumer(
			jobMasterIdResourceIDStringJobIDTuple4 -> registrationQueue.offer(jobMasterIdResourceIDStringJobIDTuple4.f0));

		notifyResourceManagerLeaderListeners(testingResourceManagerGateway);

		final JobMasterId firstRegistrationAttempt = registrationQueue.take();

		assertThat(firstRegistrationAttempt, equalTo(jobMasterId));

		jobMaster.suspend(new FlinkException("Test exception.")).get();

		final JobMasterId jobMasterId2 = JobMasterId.generate();

		jobMaster.start(jobMasterId2).get();

		final JobMasterId secondRegistrationAttempt = registrationQueue.take();

		assertThat(secondRegistrationAttempt, equalTo(jobMasterId2));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 4
Source File: JobMasterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that we can close an unestablished ResourceManager connection.
 */
@Test
public void testCloseUnestablishedResourceManagerConnection() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		jobMaster.start(JobMasterId.generate()).get();

		final TestingResourceManagerGateway firstResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
		final TestingResourceManagerGateway secondResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();

		final OneShotLatch firstJobManagerRegistration = new OneShotLatch();
		final OneShotLatch secondJobManagerRegistration = new OneShotLatch();

		firstResourceManagerGateway.setRegisterJobManagerConsumer(
			jobMasterIdResourceIDStringJobIDTuple4 -> firstJobManagerRegistration.trigger());

		secondResourceManagerGateway.setRegisterJobManagerConsumer(
			jobMasterIdResourceIDStringJobIDTuple4 -> secondJobManagerRegistration.trigger());

		notifyResourceManagerLeaderListeners(firstResourceManagerGateway);

		// wait until we have seen the first registration attempt
		firstJobManagerRegistration.await();

		// this should stop the connection attempts towards the first RM
		notifyResourceManagerLeaderListeners(secondResourceManagerGateway);

		// check that we start registering at the second RM
		secondJobManagerRegistration.await();
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 5
Source File: JobMasterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that we continue reconnecting to the latest known RM after a disconnection
 * message.
 */
@Test
public void testReconnectionAfterDisconnect() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
		final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
		final BlockingQueue<JobMasterId> registrationsQueue = new ArrayBlockingQueue<>(1);

		testingResourceManagerGateway.setRegisterJobManagerConsumer(
			jobMasterIdResourceIDStringJobIDTuple4 -> registrationsQueue.offer(jobMasterIdResourceIDStringJobIDTuple4.f0));

		final ResourceManagerId resourceManagerId = testingResourceManagerGateway.getFencingToken();
		notifyResourceManagerLeaderListeners(testingResourceManagerGateway);

		// wait for first registration attempt
		final JobMasterId firstRegistrationAttempt = registrationsQueue.take();

		assertThat(firstRegistrationAttempt, equalTo(jobMasterId));

		assertThat(registrationsQueue.isEmpty(), is(true));
		jobMasterGateway.disconnectResourceManager(resourceManagerId, new FlinkException("Test exception"));

		// wait for the second registration attempt after the disconnect call
		assertThat(registrationsQueue.take(), equalTo(jobMasterId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 6
Source File: JobMasterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the a JM connects to the leading RM after regaining leadership.
 */
@Test
public void testResourceManagerConnectionAfterRegainingLeadership() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();

		final BlockingQueue<JobMasterId> registrationQueue = new ArrayBlockingQueue<>(1);
		testingResourceManagerGateway.setRegisterJobManagerConsumer(
			jobMasterIdResourceIDStringJobIDTuple4 -> registrationQueue.offer(jobMasterIdResourceIDStringJobIDTuple4.f0));

		notifyResourceManagerLeaderListeners(testingResourceManagerGateway);

		final JobMasterId firstRegistrationAttempt = registrationQueue.take();

		assertThat(firstRegistrationAttempt, equalTo(jobMasterId));

		jobMaster.suspend(new FlinkException("Test exception.")).get();

		final JobMasterId jobMasterId2 = JobMasterId.generate();

		jobMaster.start(jobMasterId2).get();

		final JobMasterId secondRegistrationAttempt = registrationQueue.take();

		assertThat(secondRegistrationAttempt, equalTo(jobMasterId2));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 7
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithResourceManager() throws Exception {
	final String resourceManagerAddress = "rm";
	final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
	final ResourceID rmResourceId = new ResourceID(resourceManagerAddress);

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(
		resourceManagerId,
		rmResourceId,
		resourceManagerAddress,
		"localhost");

	final CompletableFuture<Tuple3<JobMasterId, ResourceID, JobID>> jobManagerRegistrationFuture = new CompletableFuture<>();
	final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
	final CountDownLatch registrationAttempts = new CountDownLatch(2);

	resourceManagerGateway.setRegisterJobManagerConsumer(tuple -> {
		jobManagerRegistrationFuture.complete(
			Tuple3.of(
				tuple.f0,
				tuple.f1,
				tuple.f3));
		registrationAttempts.countDown();
	});

	resourceManagerGateway.setDisconnectJobManagerConsumer(tuple -> disconnectedJobManagerFuture.complete(tuple.f0));

	rpcService.registerGateway(resourceManagerAddress, resourceManagerGateway);

	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		jobManagerSharedServices);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start operation to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		// define a leader and see that a registration happens
		rmLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerId.toUUID());

		// register job manager success will trigger monitor heartbeat target between jm and rm
		final Tuple3<JobMasterId, ResourceID, JobID> registrationInformation = jobManagerRegistrationFuture.get(
			testingTimeout.toMilliseconds(),
			TimeUnit.MILLISECONDS);

		assertThat(registrationInformation.f0, Matchers.equalTo(jobMasterId));
		assertThat(registrationInformation.f1, Matchers.equalTo(jmResourceId));
		assertThat(registrationInformation.f2, Matchers.equalTo(jobGraph.getJobID()));

		final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		// heartbeat timeout should trigger disconnect JobManager from ResourceManager
		assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID()));

		// the JobMaster should try to reconnect to the RM
		registrationAttempts.await();
	} finally {
		jobManagerSharedServices.shutdown();
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 8
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithResourceManager() throws Exception {
	final String resourceManagerAddress = "rm";
	final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
	final ResourceID rmResourceId = new ResourceID(resourceManagerAddress);

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(
		resourceManagerId,
		rmResourceId,
		resourceManagerAddress,
		"localhost");

	final CompletableFuture<Tuple3<JobMasterId, ResourceID, JobID>> jobManagerRegistrationFuture = new CompletableFuture<>();
	final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
	final CountDownLatch registrationAttempts = new CountDownLatch(2);

	resourceManagerGateway.setRegisterJobManagerConsumer(tuple -> {
		jobManagerRegistrationFuture.complete(
			Tuple3.of(
				tuple.f0,
				tuple.f1,
				tuple.f3));
		registrationAttempts.countDown();
	});

	resourceManagerGateway.setDisconnectJobManagerConsumer(tuple -> disconnectedJobManagerFuture.complete(tuple.f0));

	rpcService.registerGateway(resourceManagerAddress, resourceManagerGateway);

	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		jobManagerSharedServices);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start operation to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		// define a leader and see that a registration happens
		rmLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerId.toUUID());

		// register job manager success will trigger monitor heartbeat target between jm and rm
		final Tuple3<JobMasterId, ResourceID, JobID> registrationInformation = jobManagerRegistrationFuture.get(
			testingTimeout.toMilliseconds(),
			TimeUnit.MILLISECONDS);

		assertThat(registrationInformation.f0, Matchers.equalTo(jobMasterId));
		assertThat(registrationInformation.f1, Matchers.equalTo(jmResourceId));
		assertThat(registrationInformation.f2, Matchers.equalTo(jobGraph.getJobID()));

		final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		// heartbeat timeout should trigger disconnect JobManager from ResourceManager
		assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID()));

		// the JobMaster should try to reconnect to the RM
		registrationAttempts.await();
	} finally {
		jobManagerSharedServices.shutdown();
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}