Java Code Examples for org.apache.flink.runtime.rpc.RpcUtils#terminateRpcEndpoint()

The following examples show how to use org.apache.flink.runtime.rpc.RpcUtils#terminateRpcEndpoint() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DispatcherTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@After
public void tearDown() throws Exception {
	try {
		fatalErrorHandler.rethrowError();
	} finally {
		if (dispatcher != null) {
			RpcUtils.terminateRpcEndpoint(dispatcher, TIMEOUT);
		}
	}

	if (haServices != null) {
		haServices.closeAndCleanupAllData();
	}

	if (blobServer != null) {
		blobServer.close();
	}
}
 
Example 2
Source File: AkkaRpcActorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that the {@link AkkaRpcActor} only completes after the asynchronous
 * post stop action has completed.
 */
@Test
public void testActorTerminationWithAsynchronousOnStopAction() throws Exception {
	final CompletableFuture<Void> onStopFuture = new CompletableFuture<>();
	final AsynchronousOnStopEndpoint endpoint = new AsynchronousOnStopEndpoint(akkaRpcService, onStopFuture);

	try {
		endpoint.start();

		final CompletableFuture<Void> terminationFuture = endpoint.closeAsync();

		assertFalse(terminationFuture.isDone());

		onStopFuture.complete(null);

		// the onStopFuture completion should allow the endpoint to terminate
		terminationFuture.get();
	} finally {
		RpcUtils.terminateRpcEndpoint(endpoint, timeout);
	}
}
 
Example 3
Source File: AkkaRpcActorHandshakeTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testVersionMatchBetweenRpcComponents() throws Exception {
	AkkaRpcActorTest.DummyRpcEndpoint rpcEndpoint = new AkkaRpcActorTest.DummyRpcEndpoint(akkaRpcService1);
	final int value = 42;
	rpcEndpoint.setFoobar(value);

	rpcEndpoint.start();

	try {
		final AkkaRpcActorTest.DummyRpcGateway dummyRpcGateway = akkaRpcService2.connect(rpcEndpoint.getAddress(), AkkaRpcActorTest.DummyRpcGateway.class).get();

		assertThat(dummyRpcGateway.foobar().get(), equalTo(value));
	} finally {
		RpcUtils.terminateRpcEndpoint(rpcEndpoint, timeout);
	}
}
 
Example 4
Source File: AkkaRpcActorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the {@link AkkaRpcActor} discards messages until the corresponding
 * {@link RpcEndpoint} has been started.
 */
@Test
public void testMessageDiscarding() throws Exception {
	int expectedValue = 1337;

	DummyRpcEndpoint rpcEndpoint = new DummyRpcEndpoint(akkaRpcService);

	DummyRpcGateway rpcGateway = rpcEndpoint.getSelfGateway(DummyRpcGateway.class);

	// this message should be discarded and completed with an AkkaRpcException
	CompletableFuture<Integer> result = rpcGateway.foobar();

	try {
		result.get(timeout.getSize(), timeout.getUnit());
		fail("Expected an AkkaRpcException.");
	} catch (ExecutionException ee) {
		// expected this exception, because the endpoint has not been started
		assertTrue(ee.getCause() instanceof AkkaRpcException);
	}

	// set a new value which we expect to be returned
	rpcEndpoint.setFoobar(expectedValue);

	// start the endpoint so that it can process messages
	rpcEndpoint.start();

	try {
		// send the rpc again
		result = rpcGateway.foobar();

		// now we should receive a result :-)
		Integer actualValue = result.get(timeout.getSize(), timeout.getUnit());

		assertThat("The new foobar value should have been returned.", actualValue, Is.is(expectedValue));
	} finally {
		RpcUtils.terminateRpcEndpoint(rpcEndpoint, timeout);
	}
}
 
Example 5
Source File: JobMasterPartitionReleaseTest.java    From flink with Apache License 2.0 5 votes vote down vote up
public void close() throws Exception {
	try {
		if (jobMaster != null) {
			RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
		}
	} finally {
		temporaryFolder.delete();
	}
}
 
Example 6
Source File: AkkaRpcActorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that multiple termination calls won't trigger the onStop action multiple times.
 * Note that this test is a probabilistic test which only fails sometimes without the fix.
 * See FLINK-16703.
 */
@Test
public void callsOnStopOnlyOnce() throws Exception {
	final CompletableFuture<Void> onStopFuture = new CompletableFuture<>();
	final OnStopCountingRpcEndpoint endpoint = new OnStopCountingRpcEndpoint(akkaRpcService, onStopFuture);

	try {
		endpoint.start();

		final AkkaBasedEndpoint selfGateway = endpoint.getSelfGateway(AkkaBasedEndpoint.class);

		// try to terminate the actor twice
		selfGateway.getActorRef().tell(ControlMessages.TERMINATE, ActorRef.noSender());
		selfGateway.getActorRef().tell(ControlMessages.TERMINATE, ActorRef.noSender());

		endpoint.waitUntilOnStopHasBeenCalled();

		onStopFuture.complete(null);

		endpoint.getTerminationFuture().get();

		assertThat(endpoint.getNumOnStopCalls(), is(1));
	} finally {
		onStopFuture.complete(null);
		RpcUtils.terminateRpcEndpoint(endpoint, timeout);
	}
}
 
Example 7
Source File: MiniDispatcherTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the {@link MiniDispatcher} only terminates in {@link ClusterEntrypoint.ExecutionMode#NORMAL}
 * after it has served the {@link org.apache.flink.runtime.jobmaster.JobResult} once.
 */
@Test
public void testJobResultRetrieval() throws Exception {
	final MiniDispatcher miniDispatcher = createMiniDispatcher(ClusterEntrypoint.ExecutionMode.NORMAL);

	miniDispatcher.start();

	try {
		// wait until the Dispatcher is the leader
		dispatcherLeaderElectionService.isLeader(UUID.randomUUID()).get();

		// wait until we have submitted the job
		jobGraphFuture.get();

		resultFuture.complete(archivedExecutionGraph);

		assertFalse(miniDispatcher.getTerminationFuture().isDone());

		final DispatcherGateway dispatcherGateway = miniDispatcher.getSelfGateway(DispatcherGateway.class);

		final CompletableFuture<JobResult> jobResultFuture = dispatcherGateway.requestJobResult(jobGraph.getJobID(), timeout);

		final JobResult jobResult = jobResultFuture.get();

		assertThat(jobResult.getJobId(), is(jobGraph.getJobID()));
	}
	finally {
		RpcUtils.terminateRpcEndpoint(miniDispatcher, timeout);
	}
}
 
Example 8
Source File: DispatcherTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@After
public void tearDown() throws Exception {
	if (dispatcher != null) {
		RpcUtils.terminateRpcEndpoint(dispatcher, TIMEOUT);
	}

	if (haServices != null) {
		haServices.closeAndCleanupAllData();
	}

	if (blobServer != null) {
		blobServer.close();
	}
}
 
Example 9
Source File: ResourceManagerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@After
public void after() throws Exception {
	if (resourceManager != null) {
		RpcUtils.terminateRpcEndpoint(resourceManager, TIMEOUT);
	}

	if (highAvailabilityServices != null) {
		highAvailabilityServices.closeAndCleanupAllData();
	}

	if (testingFatalErrorHandler.hasExceptionOccurred()) {
		testingFatalErrorHandler.rethrowError();
	}
}
 
Example 10
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a job is removed from the JobLeaderService once a TaskExecutor has
 * no more slots assigned to this job.
 *
 * <p>See FLINK-8504
 */
@Test
public void testRemoveJobFromJobLeaderService() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(
		Collections.singleton(ResourceProfile.UNKNOWN),
		timerService);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices);

	try {
		final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
		final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>();
		resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
			initialSlotReport.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		});
		final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken();

		rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID());

		final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>();
		final CompletableFuture<Void> stopFuture = new CompletableFuture<>();

		final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService(
			startFuture,
			stopFuture);
		haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);

		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);
		final AllocationID allocationId = new AllocationID();

		assertThat(startFuture.isDone(), is(false));
		final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService();
		assertThat(jobLeaderService.containsJob(jobId), is(false));

		// wait for the initial slot report
		initialSlotReport.get();

		taskExecutorGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			"foobar",
			resourceManagerId,
			timeout).get();

		// wait until the job leader retrieval service for jobId is started
		startFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(true));

		taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get();

		// wait that the job leader retrieval service for jobId stopped becaue it should get removed
		stopFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(false));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 11
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testDuplicatedKvStateRegistrationsFailTask() throws Exception {
	final JobGraph graph = createKvJobGraph();
	final List<JobVertex> jobVertices = graph.getVerticesSortedTopologicallyFromSources();
	final JobVertex vertex1 = jobVertices.get(0);
	final JobVertex vertex2 = jobVertices.get(1);

	final JobMaster jobMaster = createJobMaster(
		configuration,
		graph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build(),
		heartbeatServices);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);
	final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		// duplicate registration fails task

		// register a KvState
		final String registrationName = "duplicate-me";
		final KvStateID kvStateID = new KvStateID();
		final KeyGroupRange keyGroupRange = new KeyGroupRange(0, 0);
		final InetSocketAddress address = new InetSocketAddress(InetAddress.getLocalHost(), 4396);

		jobMasterGateway.notifyKvStateRegistered(
			graph.getJobID(),
			vertex1.getID(),
			keyGroupRange,
			registrationName,
			kvStateID,
			address).get();

		try {
			jobMasterGateway.notifyKvStateRegistered(
				graph.getJobID(),
				vertex2.getID(), // <--- different operator, but...
				keyGroupRange,
				registrationName,  // ...same name
				kvStateID,
				address).get();
			fail("Expected to fail because of clashing registration message.");
		} catch (Exception e) {
			assertTrue(ExceptionUtils.findThrowableWithMessage(e, "Registration name clash").isPresent());
			assertEquals(JobStatus.FAILED, jobMasterGateway.requestJobStatus(testingTimeout).get());
		}
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 12
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithResourceManager() throws Exception {
	final String resourceManagerAddress = "rm";
	final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
	final ResourceID rmResourceId = new ResourceID(resourceManagerAddress);

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(
		resourceManagerId,
		rmResourceId,
		resourceManagerAddress,
		"localhost");

	final CompletableFuture<Tuple3<JobMasterId, ResourceID, JobID>> jobManagerRegistrationFuture = new CompletableFuture<>();
	final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
	final CountDownLatch registrationAttempts = new CountDownLatch(2);

	resourceManagerGateway.setRegisterJobManagerConsumer(tuple -> {
		jobManagerRegistrationFuture.complete(
			Tuple3.of(
				tuple.f0,
				tuple.f1,
				tuple.f3));
		registrationAttempts.countDown();
	});

	resourceManagerGateway.setDisconnectJobManagerConsumer(tuple -> disconnectedJobManagerFuture.complete(tuple.f0));

	rpcService.registerGateway(resourceManagerAddress, resourceManagerGateway);

	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		jobManagerSharedServices);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start operation to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		// define a leader and see that a registration happens
		rmLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerId.toUUID());

		// register job manager success will trigger monitor heartbeat target between jm and rm
		final Tuple3<JobMasterId, ResourceID, JobID> registrationInformation = jobManagerRegistrationFuture.get(
			testingTimeout.toMilliseconds(),
			TimeUnit.MILLISECONDS);

		assertThat(registrationInformation.f0, Matchers.equalTo(jobMasterId));
		assertThat(registrationInformation.f1, Matchers.equalTo(jmResourceId));
		assertThat(registrationInformation.f2, Matchers.equalTo(jobGraph.getJobID()));

		final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		// heartbeat timeout should trigger disconnect JobManager from ResourceManager
		assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID()));

		// the JobMaster should try to reconnect to the RM
		registrationAttempts.await();
	} finally {
		jobManagerSharedServices.shutdown();
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 13
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the updateGlobalAggregate functionality.
 */
@Test
public void testJobMasterAggregatesValuesCorrectly() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build(),
		heartbeatServices);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);
	final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		CompletableFuture<Object> updateAggregateFuture;

		AggregateFunction<Integer, Integer, Integer> aggregateFunction = createAggregateFunction();

		ClosureCleaner.clean(aggregateFunction, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true);
		byte[] serializedAggregateFunction = InstantiationUtil.serializeObject(aggregateFunction);

		updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg1", 1, serializedAggregateFunction);
		assertThat(updateAggregateFuture.get(), equalTo(1));

		updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg1", 2, serializedAggregateFunction);
		assertThat(updateAggregateFuture.get(), equalTo(3));

		updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg1", 3, serializedAggregateFunction);
		assertThat(updateAggregateFuture.get(), equalTo(6));

		updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg1", 4, serializedAggregateFunction);
		assertThat(updateAggregateFuture.get(), equalTo(10));

		updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg2", 10, serializedAggregateFunction);
		assertThat(updateAggregateFuture.get(), equalTo(10));

		updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg2", 23, serializedAggregateFunction);
		assertThat(updateAggregateFuture.get(), equalTo(33));

	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 14
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
	final CompletableFuture<ResourceID> heartbeatResourceIdFuture = new CompletableFuture<>();
	final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
	final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setHeartbeatJobManagerConsumer((taskManagerId, ignored) -> heartbeatResourceIdFuture.complete(taskManagerId))
		.setDisconnectJobManagerConsumer((jobId, throwable) -> disconnectedJobManagerFuture.complete(jobId))
		.createTestingTaskExecutorGateway();

	rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);

	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		jobManagerSharedServices);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		// register task manager will trigger monitor heartbeat target, schedule heartbeat request at interval time
		CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(
			taskExecutorGateway.getAddress(),
			unresolvedTaskManagerLocation,
			testingTimeout);

		// wait for the completion of the registration
		registrationResponse.get();

		final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID()));

		final ResourceID heartbeatResourceId = heartbeatResourceIdFuture.getNow(null);

		assertThat(heartbeatResourceId, anyOf(nullValue(), equalTo(jmResourceId)));
	} finally {
		jobManagerSharedServices.shutdown();
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 15
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that we continue reconnecting to the latest known RM after a disconnection
 * message.
 */
@Test
public void testReconnectionAfterDisconnect() throws Exception {
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build(),
		heartbeatServices);

	final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
		final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
		final BlockingQueue<JobMasterId> registrationsQueue = new ArrayBlockingQueue<>(1);

		testingResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
			registrationsQueue.offer(jobMasterId);
			return CompletableFuture.completedFuture(testingResourceManagerGateway.getJobMasterRegistrationSuccess());
		});

		final ResourceManagerId resourceManagerId = testingResourceManagerGateway.getFencingToken();
		notifyResourceManagerLeaderListeners(testingResourceManagerGateway);

		// wait for first registration attempt
		final JobMasterId firstRegistrationAttempt = registrationsQueue.take();

		assertThat(firstRegistrationAttempt, equalTo(jobMasterId));

		assertThat(registrationsQueue.isEmpty(), is(true));
		jobMasterGateway.disconnectResourceManager(resourceManagerId, new FlinkException("Test exception"));

		// wait for the second registration attempt after the disconnect call
		assertThat(registrationsQueue.take(), equalTo(jobMasterId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example 16
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that offers slots to job master timeout and retry.
 */
@Test
public void testOfferSlotToJobMasterAfterTimeout() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(
		Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
		timerService);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final AllocationID allocationId = new AllocationID();

	final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());

	});
	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final CountDownLatch slotOfferings = new CountDownLatch(3);
	final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			assertThat(slotOffers.size(), is(1));
			slotOfferings.countDown();

			if (slotOfferings.getCount() == 0) {
				offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId());
				return CompletableFuture.completedFuture(slotOffers);
			} else {
				return FutureUtils.completedExceptionally(new TimeoutException());
			}
		})
		.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	try {
		taskExecutor.start();
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// wait for the connection to the ResourceManager
		initialSlotReportFuture.get();

		taskExecutorGateway.requestSlot(
			new SlotID(taskExecutor.getResourceID(), 0),
			jobId,
			allocationId,
			jobManagerAddress,
			testingResourceManagerGateway.getFencingToken(),
			timeout).get();

		slotOfferings.await();

		assertThat(offeredSlotFuture.get(), is(allocationId));
		assertTrue(taskSlotTable.isSlotFree(1));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 17
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that offers slots to job master timeout and retry.
 */
@Test
public void testOfferSlotToJobMasterAfterTimeout() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(
		Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
		timerService);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final AllocationID allocationId = new AllocationID();

	final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());

	});
	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final CountDownLatch slotOfferings = new CountDownLatch(3);
	final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			assertThat(slotOffers.size(), is(1));
			slotOfferings.countDown();

			if (slotOfferings.getCount() == 0) {
				offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId());
				return CompletableFuture.completedFuture(slotOffers);
			} else {
				return FutureUtils.completedExceptionally(new TimeoutException());
			}
		})
		.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	try {
		taskExecutor.start();
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// wait for the connection to the ResourceManager
		initialSlotReportFuture.get();

		taskExecutorGateway.requestSlot(
			new SlotID(taskExecutor.getResourceID(), 0),
			jobId,
			allocationId,
			jobManagerAddress,
			testingResourceManagerGateway.getFencingToken(),
			timeout).get();

		slotOfferings.await();

		assertThat(offeredSlotFuture.get(), is(allocationId));
		assertTrue(taskSlotTable.isSlotFree(1));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 18
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testMaximumRegistrationDurationAfterConnectionLoss() throws Exception {
	configuration.setString(TaskManagerOptions.REGISTRATION_TIMEOUT, "100 ms");
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);

	final long heartbeatInterval = 10L;
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();
	final TaskExecutor taskExecutor = new TaskExecutor(
		rpc,
		TaskManagerConfiguration.fromConfiguration(configuration),
		haServices,
		taskManagerServices,
		new HeartbeatServices(heartbeatInterval, 10L),
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	taskExecutor.start();

	final CompletableFuture<ResourceID> registrationFuture = new CompletableFuture<>();
	final OneShotLatch secondRegistration = new OneShotLatch();
	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
		testingResourceManagerGateway.setRegisterTaskExecutorFunction(
			tuple -> {
				if (registrationFuture.complete(tuple.f1)) {
					return CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(
						new InstanceID(),
						testingResourceManagerGateway.getOwnResourceId(),
						new ClusterInformation("localhost", 1234)));
				} else {
					secondRegistration.trigger();
					return CompletableFuture.completedFuture(new RegistrationResponse.Decline("Only the first registration should succeed."));
				}
			}
		);
		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);

		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), UUID.randomUUID());

		final ResourceID registrationResourceId = registrationFuture.get();

		assertThat(registrationResourceId, equalTo(taskManagerServices.getTaskManagerLocation().getResourceID()));

		secondRegistration.await();

		final Throwable error = testingFatalErrorHandler.getErrorFuture().get();
		assertThat(error, is(notNullValue()));
		assertThat(ExceptionUtils.stripExecutionException(error), instanceOf(RegistrationTimeoutException.class));

		testingFatalErrorHandler.clearError();
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 19
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor tries to reconnect to a ResourceManager from which it
 * was explicitly disconnected.
 */
@Test
public void testReconnectionAttemptIfExplicitlyDisconnected() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
	final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
	final TaskExecutor taskExecutor = createTaskExecutor(new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation)
		.build());

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
		final ClusterInformation clusterInformation = new ClusterInformation("foobar", 1234);
		final CompletableFuture<RegistrationResponse> registrationResponseFuture = CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), ResourceID.generate(), clusterInformation));
		final BlockingQueue<ResourceID> registrationQueue = new ArrayBlockingQueue<>(1);

		testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
			registrationQueue.offer(taskExecutorRegistration.getResourceId());
			return registrationResponseFuture;
		});
		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);

		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		final ResourceID firstRegistrationAttempt = registrationQueue.take();

		assertThat(firstRegistrationAttempt, equalTo(unresolvedTaskManagerLocation.getResourceID()));

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		assertThat(registrationQueue, is(empty()));

		taskExecutorGateway.disconnectResourceManager(new FlinkException("Test exception"));

		final ResourceID secondRegistrationAttempt = registrationQueue.take();

		assertThat(secondRegistrationAttempt, equalTo(unresolvedTaskManagerLocation.getResourceID()));

	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 20
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that offers slots to job master timeout and retry.
 */
@Test
public void testOfferSlotToJobMasterAfterTimeout() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final AllocationID allocationId = new AllocationID();

	final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());

	});
	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final CountDownLatch slotOfferings = new CountDownLatch(3);
	final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			assertThat(slotOffers.size(), is(1));
			slotOfferings.countDown();

			if (slotOfferings.getCount() == 0) {
				offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId());
				return CompletableFuture.completedFuture(slotOffers);
			} else {
				return FutureUtils.completedExceptionally(new TimeoutException());
			}
		})
		.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	try {
		taskExecutor.start();
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// wait for the connection to the ResourceManager
		initialSlotReportFuture.get();

		taskExecutorGateway.requestSlot(
			new SlotID(taskExecutor.getResourceID(), 0),
			jobId,
			allocationId,
			ResourceProfile.ZERO,
			jobManagerAddress,
			testingResourceManagerGateway.getFencingToken(),
			timeout).get();

		slotOfferings.await();

		assertThat(offeredSlotFuture.get(), is(allocationId));
		assertTrue(taskSlotTable.isSlotFree(1));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}