org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder Java Examples

The following examples show how to use org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private TaskExecutorTestingContext createTaskExecutorTestingContext(final TaskSlotTable<Task> taskSlotTable) throws IOException {
	final OneShotLatch offerSlotsLatch = new OneShotLatch();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			offerSlotsLatch.trigger();
			return CompletableFuture.completedFuture(slotOffers);
		}).build();
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final JobLeaderService jobLeaderService = new DefaultJobLeaderService(
		unresolvedTaskManagerLocation,
		RetryingRegistrationConfiguration.defaultConfiguration());

	TaskExecutorLocalStateStoresManager stateStoresManager = createTaskExecutorLocalStateStoresManager();
	final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(stateStoresManager)
		.build());

	jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
	return new TaskExecutorTestingContext(jobMasterGateway, taskSlotTable, taskExecutor);
}
 
Example #2
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private static TestingJobMasterGateway createJobMasterWithSlotOfferAndTaskTerminationHooks(
		OneShotLatch offerSlotsLatch,
		OneShotLatch taskInTerminalState,
		CompletableFuture<Collection<SlotOffer>> offerResultFuture) {
	return new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			offerSlotsLatch.trigger();
			return offerResultFuture;
		})
		.setUpdateTaskExecutionStateFunction(taskExecutionState -> {
			if (taskExecutionState.getExecutionState().isTerminal()) {
				taskInTerminalState.trigger();
			}
			return CompletableFuture.completedFuture(Acknowledge.get());
		})
		.build();
}
 
Example #3
Source File: ResourceManagerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithJobMaster() throws Exception {
	final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
	final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setResourceManagerHeartbeatConsumer(heartbeatRequestFuture::complete)
		.setDisconnectResourceManagerConsumer(disconnectFuture::complete)
		.build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
	final JobID jobId = new JobID();
	final ResourceID jobMasterResourceId = ResourceID.generate();
	final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

	highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
		assertThat(requestedJobId, is(equalTo(jobId)));
		return jobMasterLeaderRetrievalService;
	});

	runHeartbeatTimeoutTest(
		resourceManagerGateway -> {
			final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager(
				jobMasterGateway.getFencingToken(),
				jobMasterResourceId,
				jobMasterGateway.getAddress(),
				jobId,
				TIMEOUT);

			assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
		},
		resourceManagerResourceId -> {
			// might have been completed or not depending whether the timeout was triggered first
			final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
			assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
			assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId)));
		});
}
 
Example #4
Source File: DefaultJobTableTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobTable.Connection connectJob(JobTable.Job job, ResourceID resourceId) {
	return job.connect(
			resourceId,
			new TestingJobMasterGatewayBuilder().build(),
			new NoOpTaskManagerActions(),
			NoOpCheckpointResponder.INSTANCE,
			new TestGlobalAggregateManager(),
			new NoOpResultPartitionConsumableNotifier(),
			new NoOpPartitionProducerStateChecker());
}
 
Example #5
Source File: ResourceManagerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithJobMaster() throws Exception {
	final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
	final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setResourceManagerHeartbeatConsumer(heartbeatRequestFuture::complete)
		.setDisconnectResourceManagerConsumer(disconnectFuture::complete)
		.build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
	final JobID jobId = new JobID();
	final ResourceID jobMasterResourceId = ResourceID.generate();
	final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

	highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
		assertThat(requestedJobId, is(equalTo(jobId)));
		return jobMasterLeaderRetrievalService;
	});

	runHeartbeatTimeoutTest(
		resourceManagerGateway -> {
			final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager(
				jobMasterGateway.getFencingToken(),
				jobMasterResourceId,
				jobMasterGateway.getAddress(),
				jobId,
				TIMEOUT);

			assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
		},
		resourceManagerResourceId -> {
			// might have been completed or not depending whether the timeout was triggered first
			final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
			assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
			assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId)));
		});
}
 
Example #6
Source File: ResourceManagerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithJobMaster() throws Exception {
	final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
	final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setResourceManagerHeartbeatConsumer(heartbeatRequestFuture::complete)
		.setDisconnectResourceManagerConsumer(disconnectFuture::complete)
		.build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
	final JobID jobId = new JobID();
	final ResourceID jobMasterResourceId = ResourceID.generate();
	final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

	highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
		assertThat(requestedJobId, is(equalTo(jobId)));
		return jobMasterLeaderRetrievalService;
	});

	runHeartbeatTimeoutTest(
		resourceManagerGateway -> {
			final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager(
				jobMasterGateway.getFencingToken(),
				jobMasterResourceId,
				jobMasterGateway.getAddress(),
				jobId,
				TIMEOUT);

			assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
		},
		resourceManagerResourceId -> {
			// might have been completed or not depending whether the timeout was triggered first
			final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
			assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
			assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId)));
		});
}
 
Example #7
Source File: DefaultJobLeaderServiceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void removeJobWithFailingLeaderRetrievalServiceStopWillStopListeningToLeaderNotifications() throws Exception {
	final FailingSettableLeaderRetrievalService leaderRetrievalService = new FailingSettableLeaderRetrievalService();
	final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder()
		.setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService)
		.build();

	final JobID jobId = new JobID();
	final CompletableFuture<JobID> newLeaderFuture = new CompletableFuture<>();
	final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(newLeaderFuture::complete);

	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
	rpcServiceResource.getTestingRpcService().registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener);

	try {
		jobLeaderService.addJob(jobId, "foobar");

		jobLeaderService.removeJob(jobId);

		leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		try {
			newLeaderFuture.get(10, TimeUnit.MILLISECONDS);
			fail("The leader future should not be completed.");
		} catch (TimeoutException expected) {}
	} finally {
		jobLeaderService.stop();
	}
}
 
Example #8
Source File: TaskExecutorOperatorEventHandlingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private TaskSubmissionTestEnvironment createExecutorWithRunningTask(
		JobID jobId,
		ExecutionAttemptID executionAttemptId,
		Class<? extends AbstractInvokable> invokableClass) throws Exception {

	final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(
			jobId, executionAttemptId, invokableClass);

	final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();

	final JobMasterId token = JobMasterId.generate();
	final TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId)
			.setJobMasterId(token)
			.setSlotSize(1)
			.addTaskManagerActionListener(executionAttemptId, ExecutionState.RUNNING, taskRunningFuture)
			.setMetricQueryServiceAddress(metricRegistry.getMetricQueryServiceGatewayRpcAddress())
			.setJobMasterGateway(new TestingJobMasterGatewayBuilder()
				.setFencingTokenSupplier(() -> token)
				.setOperatorEventSender((eio, oid, value) -> {
					throw new RuntimeException();
				})
				.build())
			.build();

	env.getTaskSlotTable().allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));

	final TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
	tmGateway.submitTask(tdd, env.getJobMasterId(), Time.seconds(10)).get();
	taskRunningFuture.get();

	return env;
}
 
Example #9
Source File: TaskExecutorSubmissionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests creates two tasks. The sender sends data but fails to send the
 * state update back to the job manager.
 * the second one blocks to be canceled
 */
@Test(timeout = TEST_TIMEOUT)
public void testCancellingDependentAndStateUpdateFails() throws Exception {
	ResourceID producerLocation = ResourceID.generate();
	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);

	TaskDeploymentDescriptor tdd1 = createSender(sdd);
	TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
	ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
	ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();

	final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>();

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
		.setFencingTokenSupplier(() -> jobMasterId)
		.setUpdateTaskExecutionStateFunction(taskExecutionState -> {
			if (taskExecutionState != null && taskExecutionState.getID().equals(eid1)) {
				return FutureUtils.completedExceptionally(
					new ExecutionGraphException("The execution attempt " + eid2 + " was not found."));
			} else {
				return CompletableFuture.completedFuture(Acknowledge.get());
			}
		})
		.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setResourceID(producerLocation)
			.setSlotSize(2)
			.addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture)
			.addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();

		taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
		task1RunningFuture.get();

		taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
		task2RunningFuture.get();

		task1FailedFuture.get();
		assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED);

		tmGateway.cancelTask(eid2, timeout);

		task2CanceledFuture.get();
		assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED);
	}
}
 
Example #10
Source File: TestingJobMasterService.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> start(JobMasterId jobMasterId) {
		jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
		return startFunction.apply(jobMasterId);
}
 
Example #11
Source File: TaskExecutorSubmissionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Test that a failing schedule or update consumers call leads to the failing of the respective
 * task.
 *
 * <p>IMPORTANT: We have to make sure that the invokable's cancel method is called, because only
 * then the future is completed. We do this by not eagerly deploying consumer tasks and requiring
 * the invokable to fill one memory segment. The completed memory segment will trigger the
 * scheduling of the downstream operator since it is in pipeline mode. After we've filled the
 * memory segment, we'll block the invokable and wait for the task failure due to the failed
 * schedule or update consumers call.
 */
@Test(timeout = TEST_TIMEOUT)
public void testFailingScheduleOrUpdateConsumers() throws Exception {
	final Configuration configuration = new Configuration();

	// set the memory segment to the smallest size possible, because we have to fill one
	// memory buffer to trigger the schedule or update consumers message to the downstream
	// operators
	configuration.set(TaskManagerOptions.MEMORY_SEGMENT_SIZE, MemorySize.parse("4096"));

	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
	TaskDeploymentDescriptor tdd = createSender(sdd, TestingAbstractInvokables.TestInvokableRecordCancel.class);
	ExecutionAttemptID eid = tdd.getExecutionAttemptId();

	final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();

	final Exception exception = new Exception("Failed schedule or update consumers");

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
			.setFencingTokenSupplier(() -> jobMasterId)
			.setUpdateTaskExecutionStateFunction(resultPartitionID -> FutureUtils.completedExceptionally(exception))
			.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setSlotSize(1)
			.setConfiguration(configuration)
			.addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();

		TestingAbstractInvokables.TestInvokableRecordCancel.resetGotCanceledFuture();

		taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd, jobMasterId, timeout).get();
		taskRunningFuture.get();

		CompletableFuture<Boolean> cancelFuture = TestingAbstractInvokables.TestInvokableRecordCancel.gotCanceled();

		assertTrue(cancelFuture.get());
		assertTrue(ExceptionUtils.findThrowableWithMessage(taskSlotTable.getTask(eid).getFailureCause(), exception.getMessage()).isPresent());
	}
}
 
Example #12
Source File: DefaultJobLeaderServiceTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private TestingJobMasterGateway registerJobMaster() {
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
	rpcServiceResource.getTestingRpcService().registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	return jobMasterGateway;
}
 
Example #13
Source File: TaskExecutorPartitionLifecycleTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private void testJobMasterConnectionTerminationAfterExternalReleaseOrPromotion(TriConsumer<TaskExecutorGateway, JobID, ResultPartitionID> releaseOrPromoteCall) throws Exception {
	final CompletableFuture<Void> disconnectFuture = new CompletableFuture<>();
	final JobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setDisconnectTaskManagerFunction(resourceID -> {
			disconnectFuture.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		}).build();

	final DefaultJobTable jobTable = DefaultJobTable.create();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setJobTable(jobTable)
		.setShuffleEnvironment(new NettyShuffleEnvironmentBuilder().build())
		.setTaskSlotTable(createTaskSlotTable())
		.build();

	final TestingTaskExecutorPartitionTracker partitionTracker = new TestingTaskExecutorPartitionTracker();

	final AtomicBoolean trackerIsTrackingPartitions = new AtomicBoolean(false);
	partitionTracker.setIsTrackingPartitionsForFunction(jobId -> trackerIsTrackingPartitions.get());

	final CompletableFuture<Collection<ResultPartitionID>> firstReleasePartitionsCallFuture = new CompletableFuture<>();
	partitionTracker.setStopTrackingAndReleasePartitionsConsumer(firstReleasePartitionsCallFuture::complete);

	final ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor = PartitionTestUtils.createPartitionDeploymentDescriptor(ResultPartitionType.BLOCKING);
	final ResultPartitionID resultPartitionId = resultPartitionDeploymentDescriptor.getShuffleDescriptor().getResultPartitionID();

	final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices, partitionTracker);

	try {
		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		TaskSubmissionTestEnvironment.registerJobMasterConnection(
			jobTable,
			jobId,
			rpc,
			jobMasterGateway,
			new NoOpTaskManagerActions(),
			timeout,
			taskExecutor.getMainThreadExecutableForTesting());

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		trackerIsTrackingPartitions.set(true);
		assertThat(firstReleasePartitionsCallFuture.isDone(), is(false));

		taskExecutorGateway.releaseOrPromotePartitions(jobId, Collections.singleton(new ResultPartitionID()), Collections.emptySet());

		// at this point we only know that the TE has entered releasePartitions; we cannot be certain whether it
		// has already checked whether it should disconnect or not
		firstReleasePartitionsCallFuture.get();

		// connection should be kept alive since the table still contains partitions
		assertThat(disconnectFuture.isDone(), is(false));

		trackerIsTrackingPartitions.set(false);

		// the TM should check whether partitions are still stored, and afterwards terminate the connection
		releaseOrPromoteCall.accept(taskExecutorGateway, jobId, resultPartitionId);

		disconnectFuture.get();
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example #14
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting
 * the job leader, it will offer all reserved slots to the JobManager.
 */
@Test
public void testJobLeaderDetection() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
	final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
	resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {

			offeredSlotsFuture.complete(new ArrayList<>(slotOffers));
			return CompletableFuture.completedFuture(slotOffers);
		})
		.build();

	rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final AllocationID allocationId = new AllocationID();
	final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	TaskExecutor taskManager = createTaskExecutor(taskManagerServices);

	try {
		taskManager.start();

		final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// tell the task manager about the rm leader
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());

		// wait for the initial slot report
		initialSlotReportFuture.get();

		// request slots from the task manager under the given allocation id
		CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			ResourceProfile.ZERO,
			jobMasterGateway.getAddress(),
			resourceManagerGateway.getFencingToken(),
			timeout);

		slotRequestAck.get();

		// now inform the task manager about the new job leader
		jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get();
		final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList());
		assertThat(allocationIds, containsInAnyOrder(allocationId));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example #15
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that offers slots to job master timeout and retry.
 */
@Test
public void testOfferSlotToJobMasterAfterTimeout() throws Exception {
	final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final AllocationID allocationId = new AllocationID();

	final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());

	});
	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final CountDownLatch slotOfferings = new CountDownLatch(3);
	final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			assertThat(slotOffers.size(), is(1));
			slotOfferings.countDown();

			if (slotOfferings.getCount() == 0) {
				offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId());
				return CompletableFuture.completedFuture(slotOffers);
			} else {
				return FutureUtils.completedExceptionally(new TimeoutException());
			}
		})
		.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	try {
		taskExecutor.start();
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// wait for the connection to the ResourceManager
		initialSlotReportFuture.get();

		taskExecutorGateway.requestSlot(
			new SlotID(taskExecutor.getResourceID(), 0),
			jobId,
			allocationId,
			ResourceProfile.ZERO,
			jobManagerAddress,
			testingResourceManagerGateway.getFencingToken(),
			timeout).get();

		slotOfferings.await();

		assertThat(offeredSlotFuture.get(), is(allocationId));
		assertTrue(taskSlotTable.isSlotFree(1));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example #16
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor syncs its slots view with the JobMaster's view
 * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059).
 */
@Test
public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception {
	final CountDownLatch activeSlots = new CountDownLatch(2);
	final TaskSlotTable<Task> taskSlotTable = new ActivateSlotNotifyingTaskSlotTable(
			2,
			activeSlots);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

	final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2);

	OneShotLatch initialSlotReporting = new OneShotLatch();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReporting.trigger();
		return CompletableFuture.completedFuture(Acknowledge.get());

	});

	testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 ->
			allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2));

	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2);
	final ResourceID jobManagerResourceId = ResourceID.generate();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
			.setFailSlotConsumer((resourceID, allocationID, throwable) ->
				failedSlotFutures.offer(allocationID))
			.setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers)))
			.setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId)))
			.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	taskExecutor.start();

	try {
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		initialSlotReporting.await();

		final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0);
		final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1);
		final AllocationID allocationIdInBoth = new AllocationID();
		final AllocationID allocationIdOnlyInJM = new AllocationID();
		final AllocationID allocationIdOnlyInTM = new AllocationID();

		taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, ResourceProfile.ZERO, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);
		taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, ResourceProfile.ZERO, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);

		activeSlots.await();

		List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList(
				new AllocatedSlotInfo(0, allocationIdInBoth),
				new AllocatedSlotInfo(1, allocationIdOnlyInJM)
		);
		AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos);
		taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport);

		assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM));
		assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM));
		assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue());
		assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue());
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example #17
Source File: ResourceManagerJobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private void createAndRegisterJobMasterGateway() {
	jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
}
 
Example #18
Source File: TaskExecutorSubmissionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test(timeout = TEST_TIMEOUT)
public void testRunJobWithForwardChannel() throws Exception {
	ResourceID producerLocation = ResourceID.generate();
	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);

	TaskDeploymentDescriptor tdd1 = createSender(sdd);
	TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
	ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
	ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();

	final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task1FinishedFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2FinishedFuture = new CompletableFuture<>();

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
		.setFencingTokenSupplier(() -> jobMasterId)
		.setScheduleOrUpdateConsumersFunction(
			resultPartitionID -> CompletableFuture.completedFuture(Acknowledge.get()))
		.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setResourceID(producerLocation)
			.setSlotSize(2)
			.addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture)
			.addTaskManagerActionListener(eid1, ExecutionState.FINISHED, task1FinishedFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.FINISHED, task2FinishedFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();

		taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
		task1RunningFuture.get();

		taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
		task2RunningFuture.get();

		task1FinishedFuture.get();
		task2FinishedFuture.get();

		assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FINISHED);
		assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.FINISHED);
	}
}
 
Example #19
Source File: TestingJobMasterService.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> start(JobMasterId jobMasterId) {
		jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
		return startFunction.apply(jobMasterId);
}
 
Example #20
Source File: ResourceManagerJobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private void createAndRegisterJobMasterGateway() {
	jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
}
 
Example #21
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor syncs its slots view with the JobMaster's view
 * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059).
 */
@Test
public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception {
	final CountDownLatch activeSlots = new CountDownLatch(2);
	final TaskSlotTable taskSlotTable = new ActivateSlotNotifyingTaskSlotTable(
			Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
			timerService,
			activeSlots);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

	final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2);

	OneShotLatch initialSlotReporting = new OneShotLatch();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReporting.trigger();
		return CompletableFuture.completedFuture(Acknowledge.get());

	});

	testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 ->
			allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2));

	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2);
	final ResourceID jobManagerResourceId = ResourceID.generate();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
			.setFailSlotConsumer((resourceID, allocationID, throwable) ->
				failedSlotFutures.offer(allocationID))
			.setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers)))
			.setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId)))
			.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	taskExecutor.start();

	try {
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		initialSlotReporting.await();

		final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0);
		final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1);
		final AllocationID allocationIdInBoth = new AllocationID();
		final AllocationID allocationIdOnlyInJM = new AllocationID();
		final AllocationID allocationIdOnlyInTM = new AllocationID();

		taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);
		taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);

		activeSlots.await();

		List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList(
				new AllocatedSlotInfo(0, allocationIdInBoth),
				new AllocatedSlotInfo(1, allocationIdOnlyInJM)
		);
		AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos);
		taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport);

		assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM));
		assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM));
		assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue());
		assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue());
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example #22
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that offers slots to job master timeout and retry.
 */
@Test
public void testOfferSlotToJobMasterAfterTimeout() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(
		Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
		timerService);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskSlotTable(taskSlotTable)
		.build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final AllocationID allocationId = new AllocationID();

	final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());

	});
	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final CountDownLatch slotOfferings = new CountDownLatch(3);
	final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {
			assertThat(slotOffers.size(), is(1));
			slotOfferings.countDown();

			if (slotOfferings.getCount() == 0) {
				offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId());
				return CompletableFuture.completedFuture(slotOffers);
			} else {
				return FutureUtils.completedExceptionally(new TimeoutException());
			}
		})
		.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	try {
		taskExecutor.start();
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// wait for the connection to the ResourceManager
		initialSlotReportFuture.get();

		taskExecutorGateway.requestSlot(
			new SlotID(taskExecutor.getResourceID(), 0),
			jobId,
			allocationId,
			jobManagerAddress,
			testingResourceManagerGateway.getFencingToken(),
			timeout).get();

		slotOfferings.await();

		assertThat(offeredSlotFuture.get(), is(allocationId));
		assertTrue(taskSlotTable.isSlotFree(1));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example #23
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting
 * the job leader, it will offer all reserved slots to the JobManager.
 */
@Test
public void testJobLeaderDetection() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final JobManagerTable jobManagerTable = new JobManagerTable();
	final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
	resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {

			offeredSlotsFuture.complete(new ArrayList<>(slotOffers));
			return CompletableFuture.completedFuture(slotOffers);
		})
		.build();

	rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final AllocationID allocationId = new AllocationID();
	final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setJobManagerTable(jobManagerTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	TaskExecutor taskManager = createTaskExecutor(taskManagerServices);

	try {
		taskManager.start();

		final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// tell the task manager about the rm leader
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());

		// wait for the initial slot report
		initialSlotReportFuture.get();

		// request slots from the task manager under the given allocation id
		CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			jobMasterGateway.getAddress(),
			resourceManagerGateway.getFencingToken(),
			timeout);

		slotRequestAck.get();

		// now inform the task manager about the new job leader
		jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get();
		final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList());
		assertThat(allocationIds, containsInAnyOrder(allocationId));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example #24
Source File: TaskExecutorPartitionLifecycleTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testConnectionTerminationAfterExternalRelease() throws Exception {
	final CompletableFuture<Void> disconnectFuture = new CompletableFuture<>();
	final JobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setDisconnectTaskManagerFunction(resourceID -> {
			disconnectFuture.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		}).build();

	final JobManagerConnection jobManagerConnection = TaskSubmissionTestEnvironment.createJobManagerConnection(
		jobId, jobMasterGateway, RPC, new NoOpTaskManagerActions(), timeout);

	final JobManagerTable jobManagerTable = new JobManagerTable();
	jobManagerTable.put(jobId, jobManagerConnection);

	final TestingShuffleEnvironment shuffleEnvironment = new TestingShuffleEnvironment();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setJobManagerTable(jobManagerTable)
		.setShuffleEnvironment(shuffleEnvironment)
		.setTaskSlotTable(createTaskSlotTable())
		.build();

	final PartitionTable<JobID> partitionTable = new PartitionTable<>();
	final ResultPartitionID resultPartitionId = new ResultPartitionID();

	final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices, partitionTable);

	try {
		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		// baseline, jobmanager was added in test setup
		runInTaskExecutorThreadAndWait(taskExecutor, () -> assertTrue(jobManagerTable.contains(jobId)));

		runInTaskExecutorThreadAndWait(taskExecutor, () -> partitionTable.startTrackingPartitions(jobId, Collections.singletonList(resultPartitionId)));

		final CompletableFuture<Collection<ResultPartitionID>> firstReleasePartitionsCallFuture = new CompletableFuture<>();
		runInTaskExecutorThreadAndWait(taskExecutor, () -> shuffleEnvironment.releasePartitionsLocallyFuture = firstReleasePartitionsCallFuture);

		taskExecutorGateway.releasePartitions(jobId, Collections.singletonList(new ResultPartitionID()));

		// at this point we only know that the TE has entered releasePartitions; we cannot be certain whether it
		// has already checked whether it should disconnect or not
		firstReleasePartitionsCallFuture.get();

		// connection should be kept alive since the table still contains partitions
		// once this returns we know that the TE has exited releasePartitions and associated connection checks
		runInTaskExecutorThreadAndWait(taskExecutor, () -> assertTrue(jobManagerTable.contains(jobId)));

		final CompletableFuture<Collection<ResultPartitionID>> secondReleasePartitionsCallFuture = new CompletableFuture<>();
		runInTaskExecutorThreadAndWait(taskExecutor, () -> shuffleEnvironment.releasePartitionsLocallyFuture = secondReleasePartitionsCallFuture);

		// the TM should check whether partitions are still stored, and afterwards terminate the connection
		taskExecutorGateway.releasePartitions(jobId, Collections.singletonList(resultPartitionId));

		disconnectFuture.get();
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example #25
Source File: TaskExecutorSubmissionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Test that a failing schedule or update consumers call leads to the failing of the respective
 * task.
 *
 * <p>IMPORTANT: We have to make sure that the invokable's cancel method is called, because only
 * then the future is completed. We do this by not eagerly deploying consumer tasks and requiring
 * the invokable to fill one memory segment. The completed memory segment will trigger the
 * scheduling of the downstream operator since it is in pipeline mode. After we've filled the
 * memory segment, we'll block the invokable and wait for the task failure due to the failed
 * schedule or update consumers call.
 */
@Test(timeout = 10000L)
public void testFailingScheduleOrUpdateConsumers() throws Exception {
	final Configuration configuration = new Configuration();

	// set the memory segment to the smallest size possible, because we have to fill one
	// memory buffer to trigger the schedule or update consumers message to the downstream
	// operators
	configuration.setString(TaskManagerOptions.MEMORY_SEGMENT_SIZE, "4096");

	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
	TaskDeploymentDescriptor tdd = createSender(sdd, TestingAbstractInvokables.TestInvokableRecordCancel.class);
	ExecutionAttemptID eid = tdd.getExecutionAttemptId();

	final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();

	final Exception exception = new Exception("Failed schedule or update consumers");

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
			.setFencingTokenSupplier(() -> jobMasterId)
			.setUpdateTaskExecutionStateFunction(resultPartitionID -> FutureUtils.completedExceptionally(exception))
			.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setSlotSize(1)
			.setConfiguration(configuration)
			.addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable taskSlotTable = env.getTaskSlotTable();

		TestingAbstractInvokables.TestInvokableRecordCancel.resetGotCanceledFuture();

		taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd, jobMasterId, timeout).get();
		taskRunningFuture.get();

		CompletableFuture<Boolean> cancelFuture = TestingAbstractInvokables.TestInvokableRecordCancel.gotCanceled();

		assertTrue(cancelFuture.get());
		assertTrue(ExceptionUtils.findThrowableWithMessage(taskSlotTable.getTask(eid).getFailureCause(), exception.getMessage()).isPresent());
	}
}
 
Example #26
Source File: TaskExecutorSubmissionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests creates two tasks. The sender sends data but fails to send the
 * state update back to the job manager.
 * the second one blocks to be canceled
 */
@Test(timeout = 10000L)
public void testCancellingDependentAndStateUpdateFails() throws Exception {
	ResourceID producerLocation = ResourceID.generate();
	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);

	TaskDeploymentDescriptor tdd1 = createSender(sdd);
	TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
	ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
	ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();

	final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>();

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
		.setFencingTokenSupplier(() -> jobMasterId)
		.setUpdateTaskExecutionStateFunction(taskExecutionState -> {
			if (taskExecutionState != null && taskExecutionState.getID().equals(eid1)) {
				return FutureUtils.completedExceptionally(
					new ExecutionGraphException("The execution attempt " + eid2 + " was not found."));
			} else {
				return CompletableFuture.completedFuture(Acknowledge.get());
			}
		})
		.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setResourceID(producerLocation)
			.setSlotSize(2)
			.addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture)
			.addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable taskSlotTable = env.getTaskSlotTable();

		taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
		task1RunningFuture.get();

		taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
		task2RunningFuture.get();

		task1FailedFuture.get();
		assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED);

		tmGateway.cancelTask(eid2, timeout);

		task2CanceledFuture.get();
		assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED);
	}
}
 
Example #27
Source File: TaskExecutorSubmissionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 10000L)
public void testRunJobWithForwardChannel() throws Exception {
	ResourceID producerLocation = ResourceID.generate();
	NettyShuffleDescriptor sdd =
		createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);

	TaskDeploymentDescriptor tdd1 = createSender(sdd);
	TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
	ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
	ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();

	final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task1FinishedFuture = new CompletableFuture<>();
	final CompletableFuture<Void> task2FinishedFuture = new CompletableFuture<>();

	final JobMasterId jobMasterId = JobMasterId.generate();
	TestingJobMasterGateway testingJobMasterGateway =
		new TestingJobMasterGatewayBuilder()
		.setFencingTokenSupplier(() -> jobMasterId)
		.setScheduleOrUpdateConsumersFunction(
			resultPartitionID -> CompletableFuture.completedFuture(Acknowledge.get()))
		.build();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setResourceID(producerLocation)
			.setSlotSize(2)
			.addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture)
			.addTaskManagerActionListener(eid1, ExecutionState.FINISHED, task1FinishedFuture)
			.addTaskManagerActionListener(eid2, ExecutionState.FINISHED, task2FinishedFuture)
			.setJobMasterId(jobMasterId)
			.setJobMasterGateway(testingJobMasterGateway)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable taskSlotTable = env.getTaskSlotTable();

		taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
		task1RunningFuture.get();

		taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
		task2RunningFuture.get();

		task1FinishedFuture.get();
		task2FinishedFuture.get();

		assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FINISHED);
		assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.FINISHED);
	}
}
 
Example #28
Source File: TestingJobMasterService.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> start(JobMasterId jobMasterId) {
		jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
		return startFunction.apply(jobMasterId);
}
 
Example #29
Source File: ResourceManagerJobMasterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private void createAndRegisterJobMasterGateway() {
	jobMasterGateway = new TestingJobMasterGatewayBuilder().build();
	rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
}
 
Example #30
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor syncs its slots view with the JobMaster's view
 * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059).
 */
@Test
public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception {
	final CountDownLatch activeSlots = new CountDownLatch(2);
	final TaskSlotTable taskSlotTable = new ActivateSlotNotifyingTaskSlotTable(
			Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN),
			timerService,
			activeSlots);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

	final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2);

	OneShotLatch initialSlotReporting = new OneShotLatch();
	testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReporting.trigger();
		return CompletableFuture.completedFuture(Acknowledge.get());

	});

	testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 ->
			allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2));

	rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
	resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

	final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2);
	final ResourceID jobManagerResourceId = ResourceID.generate();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
			.setFailSlotConsumer((resourceID, allocationID, throwable) ->
				failedSlotFutures.offer(allocationID))
			.setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers)))
			.setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId)))
			.build();
	final String jobManagerAddress = jobMasterGateway.getAddress();
	rpc.registerGateway(jobManagerAddress, jobMasterGateway);
	jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());

	taskExecutor.start();

	try {
		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		initialSlotReporting.await();

		final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0);
		final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1);
		final AllocationID allocationIdInBoth = new AllocationID();
		final AllocationID allocationIdOnlyInJM = new AllocationID();
		final AllocationID allocationIdOnlyInTM = new AllocationID();

		taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);
		taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);

		activeSlots.await();

		List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList(
				new AllocatedSlotInfo(0, allocationIdInBoth),
				new AllocatedSlotInfo(1, allocationIdOnlyInJM)
		);
		AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos);
		taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport);

		assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM));
		assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM));
		assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue());
		assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue());
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}