Java Code Examples for org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder

The following examples show how to use org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink   Source File: TaskExecutorTest.java    License: Apache License 2.0 6 votes vote down vote up
private static TestingJobMasterGateway createJobMasterWithSlotOfferAndTaskTerminationHooks(
		OneShotLatch offerSlotsLatch,
		OneShotLatch taskInTerminalState,
		CompletableFuture<Collection<SlotOffer>> offerResultFuture) {
	return new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			offerSlotsLatch.trigger();
			return offerResultFuture;
		})
		.setUpdateTaskExecutionStateFunction(taskExecutionState -> {
			if (taskExecutionState.getExecutionState().isTerminal()) {
				taskInTerminalState.trigger();
			}
			return CompletableFuture.completedFuture(Acknowledge.get());
		})
		.build();
}
 
Example 2
Source Project: flink   Source File: TaskExecutorTest.java    License: Apache License 2.0 6 votes vote down vote up
private TaskExecutorTestingContext createTaskExecutorTestingContext(final TaskSlotTable<Task> taskSlotTable) throws IOException {
	final OneShotLatch offerSlotsLatch = new OneShotLatch();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			offerSlotsLatch.trigger();
			return CompletableFuture.completedFuture(slotOffers);
		}).build();
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final JobLeaderService jobLeaderService = new DefaultJobLeaderService(
		unresolvedTaskManagerLocation,
		RetryingRegistrationConfiguration.defaultConfiguration());

	TaskExecutorLocalStateStoresManager stateStoresManager = createTaskExecutorLocalStateStoresManager();
	final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(stateStoresManager)
		.build());

	jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
	return new TaskExecutorTestingContext(jobMasterGateway, taskSlotTable, taskExecutor);
}
 
Example 3
Source Project: Flink-CEPplus   Source File: ResourceManagerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithJobMaster() throws Exception {
	final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
	final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setResourceManagerHeartbeatConsumer(heartbeatRequestFuture::complete)
		.setDisconnectResourceManagerConsumer(disconnectFuture::complete)
		.build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
	final JobID jobId = new JobID();
	final ResourceID jobMasterResourceId = ResourceID.generate();
	final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

	highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
		assertThat(requestedJobId, is(equalTo(jobId)));
		return jobMasterLeaderRetrievalService;
	});

	runHeartbeatTimeoutTest(
		resourceManagerGateway -> {
			final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager(
				jobMasterGateway.getFencingToken(),
				jobMasterResourceId,
				jobMasterGateway.getAddress(),
				jobId,
				TIMEOUT);

			assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
		},
		resourceManagerResourceId -> {
			// might have been completed or not depending whether the timeout was triggered first
			final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
			assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
			assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId)));
		});
}
 
Example 4
Source Project: flink   Source File: ResourceManagerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithJobMaster() throws Exception {
	final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
	final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setResourceManagerHeartbeatConsumer(heartbeatRequestFuture::complete)
		.setDisconnectResourceManagerConsumer(disconnectFuture::complete)
		.build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
	final JobID jobId = new JobID();
	final ResourceID jobMasterResourceId = ResourceID.generate();
	final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

	highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
		assertThat(requestedJobId, is(equalTo(jobId)));
		return jobMasterLeaderRetrievalService;
	});

	runHeartbeatTimeoutTest(
		resourceManagerGateway -> {
			final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager(
				jobMasterGateway.getFencingToken(),
				jobMasterResourceId,
				jobMasterGateway.getAddress(),
				jobId,
				TIMEOUT);

			assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
		},
		resourceManagerResourceId -> {
			// might have been completed or not depending whether the timeout was triggered first
			final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
			assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
			assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId)));
		});
}
 
Example 5
Source Project: flink   Source File: DefaultJobLeaderServiceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void removeJobWithFailingLeaderRetrievalServiceStopWillStopListeningToLeaderNotifications() throws Exception {
	final FailingSettableLeaderRetrievalService leaderRetrievalService = new FailingSettableLeaderRetrievalService();
	final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder()
		.setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService)
		.build();

	final JobID jobId = new JobID();
	final CompletableFuture<JobID> newLeaderFuture = new CompletableFuture<>();
	final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(newLeaderFuture::complete);

	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
	rpcServiceResource.getTestingRpcService().registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener);

	try {
		jobLeaderService.addJob(jobId, "foobar");

		jobLeaderService.removeJob(jobId);

		leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		try {
			newLeaderFuture.get(10, TimeUnit.MILLISECONDS);
			fail("The leader future should not be completed.");
		} catch (TimeoutException expected) {}
	} finally {
		jobLeaderService.stop();
	}
}
 
Example 6
private TaskSubmissionTestEnvironment createExecutorWithRunningTask(
		JobID jobId,
		ExecutionAttemptID executionAttemptId,
		Class<? extends AbstractInvokable> invokableClass) throws Exception {

	final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(
			jobId, executionAttemptId, invokableClass);

	final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();

	final JobMasterId token = JobMasterId.generate();
	final TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId)
			.setJobMasterId(token)
			.setSlotSize(1)
			.addTaskManagerActionListener(executionAttemptId, ExecutionState.RUNNING, taskRunningFuture)
			.setMetricQueryServiceAddress(metricRegistry.getMetricQueryServiceGatewayRpcAddress())
			.setJobMasterGateway(new TestingJobMasterGatewayBuilder()
				.setFencingTokenSupplier(() -> token)
				.setOperatorEventSender((eio, oid, value) -> {
					throw new RuntimeException();
				})
				.build())
			.build();

	env.getTaskSlotTable().allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));

	final TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
	tmGateway.submitTask(tdd, env.getJobMasterId(), Time.seconds(10)).get();
	taskRunningFuture.get();

	return env;
}
 
Example 7
Source Project: flink   Source File: DefaultJobTableTest.java    License: Apache License 2.0 5 votes vote down vote up
private JobTable.Connection connectJob(JobTable.Job job, ResourceID resourceId) {
	return job.connect(
			resourceId,
			new TestingJobMasterGatewayBuilder().build(),
			new NoOpTaskManagerActions(),
			NoOpCheckpointResponder.INSTANCE,
			new TestGlobalAggregateManager(),
			new NoOpResultPartitionConsumableNotifier(),
			new NoOpPartitionProducerStateChecker());
}
 
Example 8
Source Project: flink   Source File: ResourceManagerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithJobMaster() throws Exception {
	final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
	final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setResourceManagerHeartbeatConsumer(heartbeatRequestFuture::complete)
		.setDisconnectResourceManagerConsumer(disconnectFuture::complete)
		.build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
	final JobID jobId = new JobID();
	final ResourceID jobMasterResourceId = ResourceID.generate();
	final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

	highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
		assertThat(requestedJobId, is(equalTo(jobId)));
		return jobMasterLeaderRetrievalService;
	});

	runHeartbeatTimeoutTest(
		resourceManagerGateway -> {
			final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager(
				jobMasterGateway.getFencingToken(),
				jobMasterResourceId,
				jobMasterGateway.getAddress(),
				jobId,
				TIMEOUT);

			assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
		},
		resourceManagerResourceId -> {
			// might have been completed or not depending whether the timeout was triggered first
			final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
			assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
			assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId)));
		});
}
 
Example 9
Source Project: Flink-CEPplus   Source File: TestingJobMasterService.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> start(JobMasterId jobMasterId) {
		jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
		return startFunction.apply(jobMasterId);
}
 
Example 10
Source Project: Flink-CEPplus   Source File: TaskExecutorTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting
 * the job leader, it will offer all reserved slots to the JobManager.
 */
@Test
public void testJobLeaderDetection() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final JobManagerTable jobManagerTable = new JobManagerTable();
	final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
	resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {

			offeredSlotsFuture.complete(new ArrayList<>(slotOffers));
			return CompletableFuture.completedFuture(slotOffers);
		})
		.build();

	rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final AllocationID allocationId = new AllocationID();
	final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setJobManagerTable(jobManagerTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	TaskExecutor taskManager = new TaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		HEARTBEAT_SERVICES,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		taskManager.start();

		final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// tell the task manager about the rm leader
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());

		// wait for the initial slot report
		initialSlotReportFuture.get();

		// request slots from the task manager under the given allocation id
		CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			jobMasterGateway.getAddress(),
			resourceManagerGateway.getFencingToken(),
			timeout);

		slotRequestAck.get();

		// now inform the task manager about the new job leader
		jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get();
		final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList());
		assertThat(allocationIds, containsInAnyOrder(allocationId));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 11
Source Project: Flink-CEPplus   Source File: TaskExecutorTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests that offers slots to job master timeout and retry.
 */
@Test
public void testOfferSlotToJobMasterAfterTimeout() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(
		Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
		timerService);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final AllocationID allocationId = new AllocationID();

	final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());

	});
	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final CountDownLatch slotOfferings = new CountDownLatch(3);
	final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			assertThat(slotOffers.size(), is(1));
			slotOfferings.countDown();

			if (slotOfferings.getCount() == 0) {
				offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId());
				return CompletableFuture.completedFuture(slotOffers);
			} else {
				return FutureUtils.completedExceptionally(new TimeoutException());
			}
		})
		.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	try {
		taskExecutor.start();
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// wait for the connection to the ResourceManager
		initialSlotReportFuture.get();

		taskExecutorGateway.requestSlot(
			new SlotID(taskExecutor.getResourceID(), 0),
			jobId,
			allocationId,
			jobManagerAddress,
			testingResourceManagerGateway.getFencingToken(),
			timeout).get();

		slotOfferings.await();

		assertThat(offeredSlotFuture.get(), is(allocationId));
		assertTrue(taskSlotTable.isSlotFree(1));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 12
Source Project: Flink-CEPplus   Source File: TaskExecutorTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor syncs its slots view with the JobMaster's view
 * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059).
 */
@Test
public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception {
	final CountDownLatch activeSlots = new CountDownLatch(2);
	final TaskSlotTable taskSlotTable = new ActivateSlotNotifyingTaskSlotTable(
			Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
			timerService,
			activeSlots);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

	final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2);

	OneShotLatch initialSlotReporting = new OneShotLatch();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReporting.trigger();
		return CompletableFuture.completedFuture(Acknowledge.get());

	});

	testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 ->
			allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2));

	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2);
	final ResourceID jobManagerResourceId = ResourceID.generate();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
			.setFailSlotConsumer((resourceID, allocationID, throwable) ->
				failedSlotFutures.offer(allocationID))
			.setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers)))
			.setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId)))
			.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	taskExecutor.start();

	try {
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		initialSlotReporting.await();

		final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0);
		final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1);
		final AllocationID allocationIdInBoth = new AllocationID();
		final AllocationID allocationIdOnlyInJM = new AllocationID();
		final AllocationID allocationIdOnlyInTM = new AllocationID();

		taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);
		taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);

		activeSlots.await();

		List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList(
				new AllocatedSlotInfo(0, allocationIdInBoth),
				new AllocatedSlotInfo(1, allocationIdOnlyInJM)
		);
		AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos);
		taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport);

		assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM));
		assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM));
		assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue());
		assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue());
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 13
private void createAndRegisterJobMasterGateway() {
	jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
}
 
Example 14
Source Project: flink   Source File: TestingJobMasterService.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> start(JobMasterId jobMasterId) {
		jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
		return startFunction.apply(jobMasterId);
}
 
Example 15
Source Project: flink   Source File: TaskExecutorSubmissionTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test(timeout = 10000L)
public void testRunJobWithForwardChannel() throws Exception {
	ResourceID producerLocation = ResourceID.generate();
	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);

	TaskDeploymentDescriptor tdd1 = createSender(sdd);
	TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
	ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
	ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();

	final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task1FinishedFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2FinishedFuture = new CompletableFuture<>();

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
		.setFencingTokenSupplier(() -> jobMasterId)
		.setScheduleOrUpdateConsumersFunction(
			resultPartitionID -> CompletableFuture.completedFuture(Acknowledge.get()))
		.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setResourceID(producerLocation)
			.setSlotSize(2)
			.addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture)
			.addTaskManagerActionListener(eid1, ExecutionState.FINISHED, task1FinishedFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.FINISHED, task2FinishedFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable taskSlotTable = env.getTaskSlotTable();

		taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
		task1RunningFuture.get();

		taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
		task2RunningFuture.get();

		task1FinishedFuture.get();
		task2FinishedFuture.get();

		assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FINISHED);
		assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.FINISHED);
	}
}
 
Example 16
Source Project: flink   Source File: TaskExecutorSubmissionTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * This tests creates two tasks. The sender sends data but fails to send the
 * state update back to the job manager.
 * the second one blocks to be canceled
 */
@Test(timeout = 10000L)
public void testCancellingDependentAndStateUpdateFails() throws Exception {
	ResourceID producerLocation = ResourceID.generate();
	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);

	TaskDeploymentDescriptor tdd1 = createSender(sdd);
	TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
	ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
	ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();

	final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>();

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
		.setFencingTokenSupplier(() -> jobMasterId)
		.setUpdateTaskExecutionStateFunction(taskExecutionState -> {
			if (taskExecutionState != null && taskExecutionState.getID().equals(eid1)) {
				return FutureUtils.completedExceptionally(
					new ExecutionGraphException("The execution attempt " + eid2 + " was not found."));
			} else {
				return CompletableFuture.completedFuture(Acknowledge.get());
			}
		})
		.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setResourceID(producerLocation)
			.setSlotSize(2)
			.addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture)
			.addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable taskSlotTable = env.getTaskSlotTable();

		taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
		task1RunningFuture.get();

		taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
		task2RunningFuture.get();

		task1FailedFuture.get();
		assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED);

		tmGateway.cancelTask(eid2, timeout);

		task2CanceledFuture.get();
		assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED);
	}
}
 
Example 17
Source Project: flink   Source File: TaskExecutorSubmissionTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Test that a failing schedule or update consumers call leads to the failing of the respective
 * task.
 *
 * <p>IMPORTANT: We have to make sure that the invokable's cancel method is called, because only
 * then the future is completed. We do this by not eagerly deploying consumer tasks and requiring
 * the invokable to fill one memory segment. The completed memory segment will trigger the
 * scheduling of the downstream operator since it is in pipeline mode. After we've filled the
 * memory segment, we'll block the invokable and wait for the task failure due to the failed
 * schedule or update consumers call.
 */
@Test(timeout = 10000L)
public void testFailingScheduleOrUpdateConsumers() throws Exception {
	final Configuration configuration = new Configuration();

	// set the memory segment to the smallest size possible, because we have to fill one
	// memory buffer to trigger the schedule or update consumers message to the downstream
	// operators
	configuration.setString(TaskManagerOptions.MEMORY_SEGMENT_SIZE, "4096");

	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
	TaskDeploymentDescriptor tdd = createSender(sdd, TestingAbstractInvokables.TestInvokableRecordCancel.class);
	ExecutionAttemptID eid = tdd.getExecutionAttemptId();

	final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();

	final Exception exception = new Exception("Failed schedule or update consumers");

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
			.setFencingTokenSupplier(() -> jobMasterId)
			.setUpdateTaskExecutionStateFunction(resultPartitionID -> FutureUtils.completedExceptionally(exception))
			.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setSlotSize(1)
			.setConfiguration(configuration)
			.addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable taskSlotTable = env.getTaskSlotTable();

		TestingAbstractInvokables.TestInvokableRecordCancel.resetGotCanceledFuture();

		taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd, jobMasterId, timeout).get();
		taskRunningFuture.get();

		CompletableFuture<Boolean> cancelFuture = TestingAbstractInvokables.TestInvokableRecordCancel.gotCanceled();

		assertTrue(cancelFuture.get());
		assertTrue(ExceptionUtils.findThrowableWithMessage(taskSlotTable.getTask(eid).getFailureCause(), exception.getMessage()).isPresent());
	}
}
 
Example 18
Source Project: flink   Source File: TaskExecutorPartitionLifecycleTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testConnectionTerminationAfterExternalRelease() throws Exception {
	final CompletableFuture<Void> disconnectFuture = new CompletableFuture<>();
	final JobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setDisconnectTaskManagerFunction(resourceID -> {
			disconnectFuture.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		}).build();

	final JobManagerConnection jobManagerConnection = TaskSubmissionTestEnvironment.createJobManagerConnection(
		jobId, jobMasterGateway, RPC, new NoOpTaskManagerActions(), timeout);

	final JobManagerTable jobManagerTable = new JobManagerTable();
	jobManagerTable.put(jobId, jobManagerConnection);

	final TestingShuffleEnvironment shuffleEnvironment = new TestingShuffleEnvironment();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setJobManagerTable(jobManagerTable)
		.setShuffleEnvironment(shuffleEnvironment)
		.setTaskSlotTable(createTaskSlotTable())
		.build();

	final PartitionTable<JobID> partitionTable = new PartitionTable<>();
	final ResultPartitionID resultPartitionId = new ResultPartitionID();

	final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices, partitionTable);

	try {
		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// baseline, jobmanager was added in test setup
		runInTaskExecutorThreadAndWait(taskExecutor, () -> assertTrue(jobManagerTable.contains(jobId)));

		runInTaskExecutorThreadAndWait(taskExecutor, () -> partitionTable.startTrackingPartitions(jobId, Collections.singletonList(resultPartitionId)));

		final CompletableFuture<Collection<ResultPartitionID>> firstReleasePartitionsCallFuture = new CompletableFuture<>();
		runInTaskExecutorThreadAndWait(taskExecutor, () -> shuffleEnvironment.releasePartitionsLocallyFuture = firstReleasePartitionsCallFuture);

		taskExecutorGateway.releasePartitions(jobId, Collections.singletonList(new ResultPartitionID()));

		// at this point we only know that the TE has entered releasePartitions; we cannot be certain whether it
		// has already checked whether it should disconnect or not
		firstReleasePartitionsCallFuture.get();

		// connection should be kept alive since the table still contains partitions
		// once this returns we know that the TE has exited releasePartitions and associated connection checks
		runInTaskExecutorThreadAndWait(taskExecutor, () -> assertTrue(jobManagerTable.contains(jobId)));

		final CompletableFuture<Collection<ResultPartitionID>> secondReleasePartitionsCallFuture = new CompletableFuture<>();
		runInTaskExecutorThreadAndWait(taskExecutor, () -> shuffleEnvironment.releasePartitionsLocallyFuture = secondReleasePartitionsCallFuture);

		// the TM should check whether partitions are still stored, and afterwards terminate the connection
		taskExecutorGateway.releasePartitions(jobId, Collections.singletonList(resultPartitionId));

		disconnectFuture.get();
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 19
Source Project: flink   Source File: TaskExecutorTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting
 * the job leader, it will offer all reserved slots to the JobManager.
 */
@Test
public void testJobLeaderDetection() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final JobManagerTable jobManagerTable = new JobManagerTable();
	final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
	resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {

			offeredSlotsFuture.complete(new ArrayList<>(slotOffers));
			return CompletableFuture.completedFuture(slotOffers);
		})
		.build();

	rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final AllocationID allocationId = new AllocationID();
	final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setJobManagerTable(jobManagerTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	TaskExecutor taskManager = createTaskExecutor(taskManagerServices);

	try {
		taskManager.start();

		final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// tell the task manager about the rm leader
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());

		// wait for the initial slot report
		initialSlotReportFuture.get();

		// request slots from the task manager under the given allocation id
		CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			jobMasterGateway.getAddress(),
			resourceManagerGateway.getFencingToken(),
			timeout);

		slotRequestAck.get();

		// now inform the task manager about the new job leader
		jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get();
		final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList());
		assertThat(allocationIds, containsInAnyOrder(allocationId));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 20
Source Project: flink   Source File: TaskExecutorTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests that offers slots to job master timeout and retry.
 */
@Test
public void testOfferSlotToJobMasterAfterTimeout() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(
		Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
		timerService);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final AllocationID allocationId = new AllocationID();

	final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());

	});
	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final CountDownLatch slotOfferings = new CountDownLatch(3);
	final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			assertThat(slotOffers.size(), is(1));
			slotOfferings.countDown();

			if (slotOfferings.getCount() == 0) {
				offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId());
				return CompletableFuture.completedFuture(slotOffers);
			} else {
				return FutureUtils.completedExceptionally(new TimeoutException());
			}
		})
		.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	try {
		taskExecutor.start();
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// wait for the connection to the ResourceManager
		initialSlotReportFuture.get();

		taskExecutorGateway.requestSlot(
			new SlotID(taskExecutor.getResourceID(), 0),
			jobId,
			allocationId,
			jobManagerAddress,
			testingResourceManagerGateway.getFencingToken(),
			timeout).get();

		slotOfferings.await();

		assertThat(offeredSlotFuture.get(), is(allocationId));
		assertTrue(taskSlotTable.isSlotFree(1));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 21
Source Project: flink   Source File: TaskExecutorTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor syncs its slots view with the JobMaster's view
 * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059).
 */
@Test
public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception {
	final CountDownLatch activeSlots = new CountDownLatch(2);
	final TaskSlotTable taskSlotTable = new ActivateSlotNotifyingTaskSlotTable(
			Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
			timerService,
			activeSlots);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

	final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2);

	OneShotLatch initialSlotReporting = new OneShotLatch();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReporting.trigger();
		return CompletableFuture.completedFuture(Acknowledge.get());

	});

	testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 ->
			allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2));

	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2);
	final ResourceID jobManagerResourceId = ResourceID.generate();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
			.setFailSlotConsumer((resourceID, allocationID, throwable) ->
				failedSlotFutures.offer(allocationID))
			.setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers)))
			.setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId)))
			.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	taskExecutor.start();

	try {
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		initialSlotReporting.await();

		final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0);
		final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1);
		final AllocationID allocationIdInBoth = new AllocationID();
		final AllocationID allocationIdOnlyInJM = new AllocationID();
		final AllocationID allocationIdOnlyInTM = new AllocationID();

		taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);
		taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);

		activeSlots.await();

		List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList(
				new AllocatedSlotInfo(0, allocationIdInBoth),
				new AllocatedSlotInfo(1, allocationIdOnlyInJM)
		);
		AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos);
		taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport);

		assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM));
		assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM));
		assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue());
		assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue());
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 22
Source Project: flink   Source File: ResourceManagerJobMasterTest.java    License: Apache License 2.0 4 votes vote down vote up
private void createAndRegisterJobMasterGateway() {
	jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
}
 
Example 23
Source Project: flink   Source File: TestingJobMasterService.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> start(JobMasterId jobMasterId) {
		jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
		return startFunction.apply(jobMasterId);
}
 
Example 24
Source Project: flink   Source File: TaskExecutorSubmissionTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test(timeout = TEST_TIMEOUT)
public void testRunJobWithForwardChannel() throws Exception {
	ResourceID producerLocation = ResourceID.generate();
	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);

	TaskDeploymentDescriptor tdd1 = createSender(sdd);
	TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
	ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
	ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();

	final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task1FinishedFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2FinishedFuture = new CompletableFuture<>();

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
		.setFencingTokenSupplier(() -> jobMasterId)
		.setScheduleOrUpdateConsumersFunction(
			resultPartitionID -> CompletableFuture.completedFuture(Acknowledge.get()))
		.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setResourceID(producerLocation)
			.setSlotSize(2)
			.addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture)
			.addTaskManagerActionListener(eid1, ExecutionState.FINISHED, task1FinishedFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.FINISHED, task2FinishedFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();

		taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
		task1RunningFuture.get();

		taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
		task2RunningFuture.get();

		task1FinishedFuture.get();
		task2FinishedFuture.get();

		assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FINISHED);
		assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.FINISHED);
	}
}
 
Example 25
Source Project: flink   Source File: TaskExecutorSubmissionTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * This tests creates two tasks. The sender sends data but fails to send the
 * state update back to the job manager.
 * the second one blocks to be canceled
 */
@Test(timeout = TEST_TIMEOUT)
public void testCancellingDependentAndStateUpdateFails() throws Exception {
	ResourceID producerLocation = ResourceID.generate();
	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);

	TaskDeploymentDescriptor tdd1 = createSender(sdd);
	TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
	ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
	ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();

	final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>();

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
		.setFencingTokenSupplier(() -> jobMasterId)
		.setUpdateTaskExecutionStateFunction(taskExecutionState -> {
			if (taskExecutionState != null && taskExecutionState.getID().equals(eid1)) {
				return FutureUtils.completedExceptionally(
					new ExecutionGraphException("The execution attempt " + eid2 + " was not found."));
			} else {
				return CompletableFuture.completedFuture(Acknowledge.get());
			}
		})
		.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setResourceID(producerLocation)
			.setSlotSize(2)
			.addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture)
			.addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();

		taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
		task1RunningFuture.get();

		taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
		task2RunningFuture.get();

		task1FailedFuture.get();
		assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED);

		tmGateway.cancelTask(eid2, timeout);

		task2CanceledFuture.get();
		assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED);
	}
}
 
Example 26
Source Project: flink   Source File: TaskExecutorSubmissionTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Test that a failing schedule or update consumers call leads to the failing of the respective
 * task.
 *
 * <p>IMPORTANT: We have to make sure that the invokable's cancel method is called, because only
 * then the future is completed. We do this by not eagerly deploying consumer tasks and requiring
 * the invokable to fill one memory segment. The completed memory segment will trigger the
 * scheduling of the downstream operator since it is in pipeline mode. After we've filled the
 * memory segment, we'll block the invokable and wait for the task failure due to the failed
 * schedule or update consumers call.
 */
@Test(timeout = TEST_TIMEOUT)
public void testFailingScheduleOrUpdateConsumers() throws Exception {
	final Configuration configuration = new Configuration();

	// set the memory segment to the smallest size possible, because we have to fill one
	// memory buffer to trigger the schedule or update consumers message to the downstream
	// operators
	configuration.set(TaskManagerOptions.MEMORY_SEGMENT_SIZE, MemorySize.parse("4096"));

	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
	TaskDeploymentDescriptor tdd = createSender(sdd, TestingAbstractInvokables.TestInvokableRecordCancel.class);
	ExecutionAttemptID eid = tdd.getExecutionAttemptId();

	final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();

	final Exception exception = new Exception("Failed schedule or update consumers");

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
			.setFencingTokenSupplier(() -> jobMasterId)
			.setUpdateTaskExecutionStateFunction(resultPartitionID -> FutureUtils.completedExceptionally(exception))
			.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setSlotSize(1)
			.setConfiguration(configuration)
			.addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();

		TestingAbstractInvokables.TestInvokableRecordCancel.resetGotCanceledFuture();

		taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd, jobMasterId, timeout).get();
		taskRunningFuture.get();

		CompletableFuture<Boolean> cancelFuture = TestingAbstractInvokables.TestInvokableRecordCancel.gotCanceled();

		assertTrue(cancelFuture.get());
		assertTrue(ExceptionUtils.findThrowableWithMessage(taskSlotTable.getTask(eid).getFailureCause(), exception.getMessage()).isPresent());
	}
}
 
Example 27
Source Project: flink   Source File: DefaultJobLeaderServiceTest.java    License: Apache License 2.0 4 votes vote down vote up
private TestingJobMasterGateway registerJobMaster() {
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
	rpcServiceResource.getTestingRpcService().registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	return jobMasterGateway;
}
 
Example 28
Source Project: flink   Source File: TaskExecutorPartitionLifecycleTest.java    License: Apache License 2.0 4 votes vote down vote up
private void testJobMasterConnectionTerminationAfterExternalReleaseOrPromotion(TriConsumer<TaskExecutorGateway, JobID, ResultPartitionID> releaseOrPromoteCall) throws Exception {
	final CompletableFuture<Void> disconnectFuture = new CompletableFuture<>();
	final JobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setDisconnectTaskManagerFunction(resourceID -> {
			disconnectFuture.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		}).build();

	final DefaultJobTable jobTable = DefaultJobTable.create();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setJobTable(jobTable)
		.setShuffleEnvironment(new NettyShuffleEnvironmentBuilder().build())
		.setTaskSlotTable(createTaskSlotTable())
		.build();

	final TestingTaskExecutorPartitionTracker partitionTracker = new TestingTaskExecutorPartitionTracker();

	final AtomicBoolean trackerIsTrackingPartitions = new AtomicBoolean(false);
	partitionTracker.setIsTrackingPartitionsForFunction(jobId -> trackerIsTrackingPartitions.get());

	final CompletableFuture<Collection<ResultPartitionID>> firstReleasePartitionsCallFuture = new CompletableFuture<>();
	partitionTracker.setStopTrackingAndReleasePartitionsConsumer(firstReleasePartitionsCallFuture::complete);

	final ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor = PartitionTestUtils.createPartitionDeploymentDescriptor(ResultPartitionType.BLOCKING);
	final ResultPartitionID resultPartitionId = resultPartitionDeploymentDescriptor.getShuffleDescriptor().getResultPartitionID();

	final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices, partitionTracker);

	try {
		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		TaskSubmissionTestEnvironment.registerJobMasterConnection(
			jobTable,
			jobId,
			rpc,
			jobMasterGateway,
			new NoOpTaskManagerActions(),
			timeout,
			taskExecutor.getMainThreadExecutableForTesting());

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		trackerIsTrackingPartitions.set(true);
		assertThat(firstReleasePartitionsCallFuture.isDone(), is(false));

		taskExecutorGateway.releaseOrPromotePartitions(jobId, Collections.singleton(new ResultPartitionID()), Collections.emptySet());

		// at this point we only know that the TE has entered releasePartitions; we cannot be certain whether it
		// has already checked whether it should disconnect or not
		firstReleasePartitionsCallFuture.get();

		// connection should be kept alive since the table still contains partitions
		assertThat(disconnectFuture.isDone(), is(false));

		trackerIsTrackingPartitions.set(false);

		// the TM should check whether partitions are still stored, and afterwards terminate the connection
		releaseOrPromoteCall.accept(taskExecutorGateway, jobId, resultPartitionId);

		disconnectFuture.get();
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 29
Source Project: flink   Source File: TaskExecutorTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting
 * the job leader, it will offer all reserved slots to the JobManager.
 */
@Test
public void testJobLeaderDetection() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
	final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
	resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {

			offeredSlotsFuture.complete(new ArrayList<>(slotOffers));
			return CompletableFuture.completedFuture(slotOffers);
		})
		.build();

	rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final AllocationID allocationId = new AllocationID();
	final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	TaskExecutor taskManager = createTaskExecutor(taskManagerServices);

	try {
		taskManager.start();

		final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// tell the task manager about the rm leader
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());

		// wait for the initial slot report
		initialSlotReportFuture.get();

		// request slots from the task manager under the given allocation id
		CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			ResourceProfile.ZERO,
			jobMasterGateway.getAddress(),
			resourceManagerGateway.getFencingToken(),
			timeout);

		slotRequestAck.get();

		// now inform the task manager about the new job leader
		jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get();
		final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList());
		assertThat(allocationIds, containsInAnyOrder(allocationId));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 30
Source Project: flink   Source File: TaskExecutorTest.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Tests that offers slots to job master timeout and retry.
 */
@Test
public void testOfferSlotToJobMasterAfterTimeout() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final AllocationID allocationId = new AllocationID();

	final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());

	});
	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final CountDownLatch slotOfferings = new CountDownLatch(3);
	final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			assertThat(slotOffers.size(), is(1));
			slotOfferings.countDown();

			if (slotOfferings.getCount() == 0) {
				offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId());
				return CompletableFuture.completedFuture(slotOffers);
			} else {
				return FutureUtils.completedExceptionally(new TimeoutException());
			}
		})
		.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	try {
		taskExecutor.start();
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// wait for the connection to the ResourceManager
		initialSlotReportFuture.get();

		taskExecutorGateway.requestSlot(
			new SlotID(taskExecutor.getResourceID(), 0),
			jobId,
			allocationId,
			ResourceProfile.ZERO,
			jobManagerAddress,
			testingResourceManagerGateway.getFencingToken(),
			timeout).get();

		slotOfferings.await();

		assertThat(offeredSlotFuture.get(), is(allocationId));
		assertTrue(taskSlotTable.isSlotFree(1));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}