org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway Java Examples

The following examples show how to use org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JobMasterPartitionReleaseTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testPartitionTableCleanupOnDisconnect() throws Exception {
	final CompletableFuture<JobID> disconnectTaskExecutorFuture = new CompletableFuture<>();
	final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setDisconnectJobManagerConsumer((jobID, throwable) -> disconnectTaskExecutorFuture.complete(jobID))
		.createTestingTaskExecutorGateway();

	try (final TestSetup testSetup = new TestSetup(rpcService, testingFatalErrorHandler, testingTaskExecutorGateway)) {
		final JobMasterGateway jobMasterGateway = testSetup.jobMaster.getSelfGateway(JobMasterGateway.class);

		jobMasterGateway.disconnectTaskManager(testSetup.getTaskExecutorResourceID(), new Exception("test"));
		disconnectTaskExecutorFuture.get();

		assertThat(testSetup.getStopTrackingPartitionsTargetResourceId().get(), equalTo(testSetup.getTaskExecutorResourceID()));
	}
}
 
Example #2
Source File: JobMasterPartitionReleaseTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void testPartitionReleaseOrPromotionOnJobTermination(Function<TestSetup, CompletableFuture<ResourceID>> taskExecutorCallSelector, ExecutionState finalExecutionState) throws Exception {
	final CompletableFuture<TaskDeploymentDescriptor> taskDeploymentDescriptorFuture = new CompletableFuture<>();
	final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setSubmitTaskConsumer((tdd, ignored) -> {
			taskDeploymentDescriptorFuture.complete(tdd);
			return CompletableFuture.completedFuture(Acknowledge.get());
		})
		.createTestingTaskExecutorGateway();

	try (final TestSetup testSetup = new TestSetup(rpcService, testingFatalErrorHandler, testingTaskExecutorGateway)) {
		final JobMasterGateway jobMasterGateway = testSetup.getJobMasterGateway();

		// update the execution state of the only execution to target state
		// this should trigger the job to finish
		final TaskDeploymentDescriptor taskDeploymentDescriptor = taskDeploymentDescriptorFuture.get();
		jobMasterGateway.updateTaskExecutionState(
			new TaskExecutionState(
				taskDeploymentDescriptor.getJobId(),
				taskDeploymentDescriptor.getExecutionAttemptId(),
				finalExecutionState));

		assertThat(taskExecutorCallSelector.apply(testSetup).get(), equalTo(testSetup.getTaskExecutorResourceID()));
	}
}
 
Example #3
Source File: SlotManagerImplTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void registerTaskExecutorWithTwoSlots(SlotManagerImpl slotManager, CompletableFuture<JobID> firstRequestSlotFuture) {
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple6 -> {
			firstRequestSlotFuture.complete(slotIDJobIDAllocationIDStringResourceManagerIdTuple6.f1);
			return CompletableFuture.completedFuture(Acknowledge.get());
		})
		.createTestingTaskExecutorGateway();
	final TaskExecutorConnection firstTaskExecutorConnection = createTaskExecutorConnection(taskExecutorGateway);
	final SlotReport firstSlotReport = createSlotReport(firstTaskExecutorConnection.getResourceID(), 2);
	slotManager.registerTaskManager(firstTaskExecutorConnection, firstSlotReport);
}
 
Example #4
Source File: SlotManagerImplTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a slot request is retried if it times out on the task manager side.
 */
@Test
public void testTaskManagerSlotRequestTimeoutHandling() throws Exception {
	final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
	final ResourceActions resourceManagerActions = new TestingResourceActionsBuilder().build();

	final JobID jobId = new JobID();
	final AllocationID allocationId = new AllocationID();
	final ResourceProfile resourceProfile = ResourceProfile.fromResources(42.0, 1337);
	final SlotRequest slotRequest = new SlotRequest(jobId, allocationId, resourceProfile, "foobar");
	final CompletableFuture<Acknowledge> slotRequestFuture1 = new CompletableFuture<>();
	final CompletableFuture<Acknowledge> slotRequestFuture2 = new CompletableFuture<>();
	final Iterator<CompletableFuture<Acknowledge>> slotRequestFutureIterator = Arrays.asList(slotRequestFuture1, slotRequestFuture2).iterator();
	final ArrayBlockingQueue<SlotID> slotIds = new ArrayBlockingQueue<>(2);

	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setRequestSlotFunction(FunctionUtils.uncheckedFunction(
			requestSlotParameters -> {
				slotIds.put(requestSlotParameters.f0);
				return slotRequestFutureIterator.next();
			}))
		.createTestingTaskExecutorGateway();

	final ResourceID resourceId = ResourceID.generate();
	final TaskExecutorConnection taskManagerConnection = new TaskExecutorConnection(resourceId, taskExecutorGateway);

	final SlotID slotId1 = new SlotID(resourceId, 0);
	final SlotID slotId2 = new SlotID(resourceId, 1);
	final SlotStatus slotStatus1 = new SlotStatus(slotId1, resourceProfile);
	final SlotStatus slotStatus2 = new SlotStatus(slotId2, resourceProfile);
	final SlotReport slotReport = new SlotReport(Arrays.asList(slotStatus1, slotStatus2));

	try (SlotManagerImpl slotManager = createSlotManager(resourceManagerId, resourceManagerActions)) {

		slotManager.registerTaskManager(taskManagerConnection, slotReport);

		slotManager.registerSlotRequest(slotRequest);

		final SlotID firstSlotId = slotIds.take();
		assertThat(slotIds, is(empty()));

		TaskManagerSlot failedSlot = slotManager.getSlot(firstSlotId);

		// let the first attempt fail --> this should trigger a second attempt
		slotRequestFuture1.completeExceptionally(new SlotAllocationException("Test exception."));

		// the second attempt succeeds
		slotRequestFuture2.complete(Acknowledge.get());

		final SlotID secondSlotId = slotIds.take();
		assertThat(slotIds, is(empty()));

		TaskManagerSlot slot = slotManager.getSlot(secondSlotId);

		assertTrue(slot.getState() == TaskManagerSlot.State.ALLOCATED);
		assertEquals(allocationId, slot.getAllocationId());

		if (!failedSlot.getSlotId().equals(slot.getSlotId())) {
			assertTrue(failedSlot.getState() == TaskManagerSlot.State.FREE);
		}
	}
}
 
Example #5
Source File: SlotManagerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private TaskExecutorConnection createTaskExecutorConnection() {
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
	return new TaskExecutorConnection(ResourceID.generate(), taskExecutorGateway);
}
 
Example #6
Source File: LegacySchedulerBatchSchedulingTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a batch job can be executed with fewer slots than its parallelism.
 * See FLINK-13187 for more information.
 */
@Test
public void testSchedulingOfJobWithFewerSlotsThanParallelism() throws Exception {
	final int parallelism = 5;
	final Time batchSlotTimeout = Time.milliseconds(5L);
	final JobGraph jobGraph = createJobGraph(parallelism);
	jobGraph.setScheduleMode(ScheduleMode.LAZY_FROM_SOURCES_WITH_BATCH_SLOT_REQUEST);

	try (final SlotPoolImpl slotPool = createSlotPool(mainThreadExecutor, batchSlotTimeout)) {
		final ArrayBlockingQueue<ExecutionAttemptID> submittedTasksQueue = new ArrayBlockingQueue<>(parallelism);
		TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
			.setSubmitTaskConsumer(
				(tdd, ignored) -> {
					submittedTasksQueue.offer(tdd.getExecutionAttemptId());
					return CompletableFuture.completedFuture(Acknowledge.get());
				})
			.createTestingTaskExecutorGateway();

		// register a single slot at the slot pool
		SlotPoolUtils.offerSlots(
			slotPool,
			mainThreadExecutor,
			Collections.singletonList(ResourceProfile.ANY),
			new RpcTaskManagerGateway(testingTaskExecutorGateway, JobMasterId.generate()));

		final LegacyScheduler legacyScheduler = createLegacyScheduler(jobGraph, slotPool, mainThreadExecutor, batchSlotTimeout);

		final GloballyTerminalJobStatusListener jobStatusListener = new GloballyTerminalJobStatusListener();
		legacyScheduler.registerJobStatusListener(jobStatusListener);
		startScheduling(legacyScheduler, mainThreadExecutor);

		// wait until the batch slot timeout has been reached
		Thread.sleep(batchSlotTimeout.toMilliseconds());

		final CompletableFuture<JobStatus> terminationFuture = jobStatusListener.getTerminationFuture();

		for (int i = 0; i < parallelism; i++) {
			final CompletableFuture<ExecutionAttemptID> submittedTaskFuture = CompletableFuture.supplyAsync(CheckedSupplier.unchecked(submittedTasksQueue::take));

			// wait until one of them is completed
			CompletableFuture.anyOf(submittedTaskFuture, terminationFuture).join();

			if (submittedTaskFuture.isDone()) {
				finishExecution(submittedTaskFuture.get(), legacyScheduler, mainThreadExecutor);
			} else {
				fail(String.format("Job reached a globally terminal state %s before all executions were finished.", terminationFuture.get()));
			}
		}

		assertThat(terminationFuture.get(), is(JobStatus.FINISHED));
	}
}
 
Example #7
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
	final CompletableFuture<ResourceID> heartbeatResourceIdFuture = new CompletableFuture<>();
	final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
	final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setHeartbeatJobManagerConsumer((taskManagerId, ignored) -> heartbeatResourceIdFuture.complete(taskManagerId))
		.setDisconnectJobManagerConsumer((jobId, throwable) -> disconnectedJobManagerFuture.complete(jobId))
		.createTestingTaskExecutorGateway();

	rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);

	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		jobManagerSharedServices);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		// register task manager will trigger monitor heartbeat target, schedule heartbeat request at interval time
		CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(
			taskExecutorGateway.getAddress(),
			unresolvedTaskManagerLocation,
			testingTimeout);

		// wait for the completion of the registration
		registrationResponse.get();

		final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID()));

		final ResourceID heartbeatResourceId = heartbeatResourceIdFuture.getNow(null);

		assertThat(heartbeatResourceId, anyOf(nullValue(), equalTo(jmResourceId)));
	} finally {
		jobManagerSharedServices.shutdown();
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #8
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the {@link AllocatedSlotReport} contains up to date information and not
 * stale information about the allocated slots on the {@link JobMaster}.
 *
 * <p>This is a probabilistic test case which only fails if executed repeatedly without
 * the fix for FLINK-12863.
 */
@Test
public void testAllocatedSlotReportDoesNotContainStaleInformation() throws Exception {
	final CompletableFuture<Void> assertionFuture = new CompletableFuture<>();
	final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
	final AtomicBoolean terminateHeartbeatVerification = new AtomicBoolean(false);
	final OneShotLatch hasReceivedSlotOffers = new OneShotLatch();
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setHeartbeatJobManagerConsumer((taskManagerId, allocatedSlotReport) -> {
			try {
				if (hasReceivedSlotOffers.isTriggered()) {
					assertThat(allocatedSlotReport.getAllocatedSlotInfos(), hasSize(1));
				} else {
					assertThat(allocatedSlotReport.getAllocatedSlotInfos(), empty());
				}
			} catch (AssertionError e) {
				assertionFuture.completeExceptionally(e);
			}

			if (terminateHeartbeatVerification.get()) {
				assertionFuture.complete(null);
			}
		})
		.createTestingTaskExecutorGateway();

	rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);

	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobMaster jobMaster = new JobMasterBuilder(JobGraphTestUtils.createSingleVertexJobGraph(), rpcService)
		.withHeartbeatServices(new HeartbeatServices(5L, 1000L))
		.withSlotPoolFactory(new TestingSlotPoolFactory(hasReceivedSlotOffers))
		.createJobMaster();

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		// register task manager will trigger monitor heartbeat target, schedule heartbeat request at interval time
		CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(
			taskExecutorGateway.getAddress(),
			unresolvedTaskManagerLocation,
			testingTimeout);

		// wait for the completion of the registration
		registrationResponse.get();

		final SlotOffer slotOffer = new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY);

		final CompletableFuture<Collection<SlotOffer>> slotOfferFuture = jobMasterGateway.offerSlots(unresolvedTaskManagerLocation.getResourceID(), Collections.singleton(slotOffer), testingTimeout);

		assertThat(slotOfferFuture.get(), containsInAnyOrder(slotOffer));

		terminateHeartbeatVerification.set(true);

		// make sure that no assertion has been violated
		assertionFuture.get();
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
		jobManagerSharedServices.shutdown();
	}
}
 
Example #9
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor is released if all of its slots have been freed.
 */
@Test
public void testReleasingTaskExecutorIfNoMoreSlotsRegistered() throws Exception {
	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobGraph jobGraph = createSingleVertexJobWithRestartStrategy();

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		jobManagerSharedServices,
		heartbeatServices);

	final CompletableFuture<JobID> disconnectTaskExecutorFuture = new CompletableFuture<>();
	final CompletableFuture<AllocationID> freedSlotFuture = new CompletableFuture<>();
	final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setFreeSlotFunction(
			(allocationID, throwable) -> {
				freedSlotFuture.complete(allocationID);
				return CompletableFuture.completedFuture(Acknowledge.get());
			})
		.setDisconnectJobManagerConsumer((jobID, throwable) -> disconnectTaskExecutorFuture.complete(jobID))
		.createTestingTaskExecutorGateway();

	try {
		jobMaster.start(jobMasterId).get();

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, testingTaskExecutorGateway);

		// check that we accepted the offered slot
		assertThat(slotOffers, hasSize(1));
		final AllocationID allocationId = slotOffers.iterator().next().getAllocationId();

		// now fail the allocation and check that we close the connection to the TaskExecutor
		jobMasterGateway.notifyAllocationFailure(allocationId, new FlinkException("Fail alloction test exception"));

		// we should free the slot and then disconnect from the TaskExecutor because we use no longer slots from it
		assertThat(freedSlotFuture.get(), equalTo(allocationId));
		assertThat(disconnectTaskExecutorFuture.get(), equalTo(jobGraph.getJobID()));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #10
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTaskExecutorNotReleasedOnFailedAllocationIfPartitionIsAllocated() throws Exception {
	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobGraph jobGraph = JobGraphTestUtils.createSingleVertexJobGraph();

	final LocalUnresolvedTaskManagerLocation taskManagerUnresolvedLocation = new LocalUnresolvedTaskManagerLocation();

	final AtomicBoolean isTrackingPartitions = new AtomicBoolean(true);
	final TestingJobMasterPartitionTracker partitionTracker = new TestingJobMasterPartitionTracker();
	partitionTracker.setIsTrackingPartitionsForFunction(ignored -> isTrackingPartitions.get());

	final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService)
		.withConfiguration(configuration)
		.withHighAvailabilityServices(haServices)
		.withJobManagerSharedServices(jobManagerSharedServices)
		.withHeartbeatServices(heartbeatServices)
		.withPartitionTrackerFactory(ignored -> partitionTracker)
		.createJobMaster();

	final CompletableFuture<JobID> disconnectTaskExecutorFuture = new CompletableFuture<>();
	final CompletableFuture<AllocationID> freedSlotFuture = new CompletableFuture<>();
	final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setFreeSlotFunction(
			(allocationID, throwable) -> {
				freedSlotFuture.complete(allocationID);
				return CompletableFuture.completedFuture(Acknowledge.get());
			})
		.setDisconnectJobManagerConsumer((jobID, throwable) -> disconnectTaskExecutorFuture.complete(jobID))
		.createTestingTaskExecutorGateway();

	try {
		jobMaster.start(jobMasterId).get();

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, testingTaskExecutorGateway, taskManagerUnresolvedLocation);

		// check that we accepted the offered slot
		assertThat(slotOffers, hasSize(1));
		final AllocationID allocationId = slotOffers.iterator().next().getAllocationId();

		jobMasterGateway.notifyAllocationFailure(allocationId, new FlinkException("Fail allocation test exception"));

		// we should free the slot, but not disconnect from the TaskExecutor as we still have an allocated partition
		assertThat(freedSlotFuture.get(), equalTo(allocationId));

		// trigger some request to guarantee ensure the slotAllocationFailure processing if complete
		jobMasterGateway.requestJobStatus(Time.seconds(5)).get();
		assertThat(disconnectTaskExecutorFuture.isDone(), is(false));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #11
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private void runJobFailureWhenTaskExecutorTerminatesTest(
		HeartbeatServices heartbeatServices,
		BiConsumer<LocalUnresolvedTaskManagerLocation, JobMasterGateway> jobReachedRunningState,
		BiFunction<JobMasterGateway, ResourceID, BiConsumer<ResourceID, AllocatedSlotReport>> heartbeatConsumerFunction) throws Exception {
	final JobGraph jobGraph = JobGraphTestUtils.createSingleVertexJobGraph();
	final JobMasterBuilder.TestingOnCompletionActions onCompletionActions = new JobMasterBuilder.TestingOnCompletionActions();
	final JobMaster jobMaster = createJobMaster(
		new Configuration(),
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build(),
		heartbeatServices,
		onCompletionActions);

	try {
		jobMaster.start(jobMasterId).get();

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		final LocalUnresolvedTaskManagerLocation taskManagerUnresolvedLocation = new LocalUnresolvedTaskManagerLocation();
		final CompletableFuture<ExecutionAttemptID> taskDeploymentFuture = new CompletableFuture<>();
		final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
			.setSubmitTaskConsumer((taskDeploymentDescriptor, jobMasterId) -> {
				taskDeploymentFuture.complete(taskDeploymentDescriptor.getExecutionAttemptId());
				return CompletableFuture.completedFuture(Acknowledge.get());
			})
			.setHeartbeatJobManagerConsumer(heartbeatConsumerFunction.apply(jobMasterGateway, taskManagerUnresolvedLocation.getResourceID()))
			.createTestingTaskExecutorGateway();

		final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, taskExecutorGateway, taskManagerUnresolvedLocation);
		assertThat(slotOffers, hasSize(1));

		final ExecutionAttemptID executionAttemptId = taskDeploymentFuture.get();

		jobMasterGateway.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), executionAttemptId, ExecutionState.RUNNING)).get();

		jobReachedRunningState.accept(taskManagerUnresolvedLocation, jobMasterGateway);

		final ArchivedExecutionGraph archivedExecutionGraph = onCompletionActions.getJobReachedGloballyTerminalStateFuture().get();

		assertThat(archivedExecutionGraph.getState(), is(JobStatus.FAILED));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #12
Source File: SlotManagerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests notify the job manager of the allocations when the task manager is failed/killed.
 */
@Test
public void testNotifyFailedAllocationWhenTaskManagerTerminated() throws Exception {

	final Queue<Tuple2<JobID, AllocationID>> allocationFailures = new ArrayDeque<>(5);

	final TestingResourceActions resourceManagerActions = new TestingResourceActionsBuilder()
		.setNotifyAllocationFailureConsumer(
			(Tuple3<JobID, AllocationID, Exception> failureMessage) ->
				allocationFailures.offer(Tuple2.of(failureMessage.f0, failureMessage.f1)))
		.build();

	try (final SlotManager slotManager = createSlotManager(
		ResourceManagerId.generate(),
		resourceManagerActions)) {

		// register slot request for job1.
		JobID jobId1 = new JobID();
		final SlotRequest slotRequest11 = createSlotRequest(jobId1);
		final SlotRequest slotRequest12 = createSlotRequest(jobId1);
		slotManager.registerSlotRequest(slotRequest11);
		slotManager.registerSlotRequest(slotRequest12);

		// create task-manager-1 with 2 slots.
		final ResourceID taskExecutorResourceId1 = ResourceID.generate();
		final TestingTaskExecutorGateway testingTaskExecutorGateway1 = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
		final TaskExecutorConnection taskExecutionConnection1 = new TaskExecutorConnection(taskExecutorResourceId1, testingTaskExecutorGateway1);
		final SlotReport slotReport1 = createSlotReport(taskExecutorResourceId1, 2);

		// register the task-manager-1 to the slot manager, this will trigger the slot allocation for job1.
		slotManager.registerTaskManager(taskExecutionConnection1, slotReport1);

		// register slot request for job2.
		JobID jobId2 = new JobID();
		final SlotRequest slotRequest21 = createSlotRequest(jobId2);
		final SlotRequest slotRequest22 = createSlotRequest(jobId2);
		slotManager.registerSlotRequest(slotRequest21);
		slotManager.registerSlotRequest(slotRequest22);

		// register slot request for job3.
		JobID jobId3 = new JobID();
		final SlotRequest slotRequest31 = createSlotRequest(jobId3);
		slotManager.registerSlotRequest(slotRequest31);

		// create task-manager-2 with 3 slots.
		final ResourceID taskExecutorResourceId2 = ResourceID.generate();
		final TestingTaskExecutorGateway testingTaskExecutorGateway2 = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
		final TaskExecutorConnection taskExecutionConnection2 = new TaskExecutorConnection(taskExecutorResourceId2, testingTaskExecutorGateway2);
		final SlotReport slotReport2 = createSlotReport(taskExecutorResourceId2, 3);

		// register the task-manager-2 to the slot manager, this will trigger the slot allocation for job2 and job3.
		slotManager.registerTaskManager(taskExecutionConnection2, slotReport2);

		// validate for job1.
		slotManager.unregisterTaskManager(taskExecutionConnection1.getInstanceID());

		assertThat(allocationFailures, hasSize(2));

		Tuple2<JobID, AllocationID> allocationFailure;
		final Set<AllocationID> failedAllocations = new HashSet<>(2);

		while ((allocationFailure = allocationFailures.poll()) != null) {
			assertThat(allocationFailure.f0, equalTo(jobId1));
			failedAllocations.add(allocationFailure.f1);
		}

		assertThat(failedAllocations, containsInAnyOrder(slotRequest11.getAllocationId(), slotRequest12.getAllocationId()));

		// validate the result for job2 and job3.
		slotManager.unregisterTaskManager(taskExecutionConnection2.getInstanceID());

		assertThat(allocationFailures, hasSize(3));

		Map<JobID, List<Tuple2<JobID, AllocationID>>> job2AndJob3FailedAllocationInfo = allocationFailures.stream().collect(Collectors.groupingBy(tuple -> tuple.f0));

		assertThat(job2AndJob3FailedAllocationInfo.entrySet(), hasSize(2));

		final Set<AllocationID> job2FailedAllocations = extractFailedAllocationsForJob(jobId2, job2AndJob3FailedAllocationInfo);
		final Set<AllocationID> job3FailedAllocations = extractFailedAllocationsForJob(jobId3, job2AndJob3FailedAllocationInfo);

		assertThat(job2FailedAllocations, containsInAnyOrder(slotRequest21.getAllocationId(), slotRequest22.getAllocationId()));
		assertThat(job3FailedAllocations, containsInAnyOrder(slotRequest31.getAllocationId()));
	}
}
 
Example #13
Source File: SlotManagerImplTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that pending slot requests are rejected if a slot report with a different allocation
 * is received.
 */
@Test
public void testSlotReportWhileActiveSlotRequest() throws Exception {
	final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
	final ResourceActions resourceManagerActions = new TestingResourceActionsBuilder().build();

	final JobID jobId = new JobID();
	final AllocationID allocationId = new AllocationID();
	final ResourceProfile resourceProfile = ResourceProfile.fromResources(42.0, 1337);
	final SlotRequest slotRequest = new SlotRequest(jobId, allocationId, resourceProfile, "foobar");
	final CompletableFuture<Acknowledge> slotRequestFuture1 = new CompletableFuture<>();

	final Iterator<CompletableFuture<Acknowledge>> slotRequestFutureIterator = Arrays.asList(
		slotRequestFuture1,
		CompletableFuture.completedFuture(Acknowledge.get())).iterator();
	final ArrayBlockingQueue<SlotID> slotIds = new ArrayBlockingQueue<>(2);

	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setRequestSlotFunction(FunctionUtils.uncheckedFunction(
			requestSlotParameters -> {
				slotIds.put(requestSlotParameters.f0);
				return slotRequestFutureIterator.next();
			}))
		.createTestingTaskExecutorGateway();

	final ResourceID resourceId = ResourceID.generate();
	final TaskExecutorConnection taskManagerConnection = new TaskExecutorConnection(resourceId, taskExecutorGateway);

	final SlotID slotId1 = new SlotID(resourceId, 0);
	final SlotID slotId2 = new SlotID(resourceId, 1);
	final SlotStatus slotStatus1 = new SlotStatus(slotId1, resourceProfile);
	final SlotStatus slotStatus2 = new SlotStatus(slotId2, resourceProfile);
	final SlotReport slotReport = new SlotReport(Arrays.asList(slotStatus1, slotStatus2));

	final ScheduledExecutor mainThreadExecutor = TestingUtils.defaultScheduledExecutor();

	final SlotManagerImpl slotManager = createSlotManagerBuilder()
		.setScheduledExecutor(mainThreadExecutor)
		.build();

	try {

		slotManager.start(resourceManagerId, mainThreadExecutor, resourceManagerActions);

		CompletableFuture<Void> registrationFuture = CompletableFuture.supplyAsync(
			() -> {
				slotManager.registerTaskManager(taskManagerConnection, slotReport);

				return null;
			},
			mainThreadExecutor)
		.thenAccept(
			(Object value) -> {
				try {
					slotManager.registerSlotRequest(slotRequest);
				} catch (ResourceManagerException e) {
					throw new RuntimeException("Could not register slots.", e);
				}
			});

		// check that no exception has been thrown
		registrationFuture.get();

		final SlotID requestedSlotId = slotIds.take();
		final SlotID freeSlotId = requestedSlotId.equals(slotId1) ? slotId2 : slotId1;

		final SlotStatus newSlotStatus1 = new SlotStatus(requestedSlotId, resourceProfile, new JobID(), new AllocationID());
		final SlotStatus newSlotStatus2 = new SlotStatus(freeSlotId, resourceProfile);
		final SlotReport newSlotReport = new SlotReport(Arrays.asList(newSlotStatus1, newSlotStatus2));

		CompletableFuture<Boolean> reportSlotStatusFuture = CompletableFuture.supplyAsync(
			// this should update the slot with the pending slot request triggering the reassignment of it
			() -> slotManager.reportSlotStatus(taskManagerConnection.getInstanceID(), newSlotReport),
			mainThreadExecutor);

		assertTrue(reportSlotStatusFuture.get());

		final SlotID requestedSlotId2 = slotIds.take();

		assertEquals(freeSlotId, requestedSlotId2);
	} finally {
		CompletableFuture.runAsync(
			ThrowingRunnable.unchecked(slotManager::close),
			mainThreadExecutor);
	}
}
 
Example #14
Source File: SlotManagerImplTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that free slots which are reported as allocated won't be considered for fulfilling
 * other pending slot requests.
 *
 * <p>See: FLINK-8505
 */
@Test
public void testReportAllocatedSlot() throws Exception {
	final ResourceID taskManagerId = ResourceID.generate();
	final ResourceActions resourceActions = new TestingResourceActionsBuilder().build();
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
	final TaskExecutorConnection taskExecutorConnection = new TaskExecutorConnection(taskManagerId, taskExecutorGateway);

	try (final SlotManagerImpl slotManager = createSlotManager(ResourceManagerId.generate(), resourceActions)) {

		// initially report a single slot as free
		final SlotID slotId = new SlotID(taskManagerId, 0);
		final SlotStatus initialSlotStatus = new SlotStatus(
			slotId,
			ResourceProfile.ANY);
		final SlotReport initialSlotReport = new SlotReport(initialSlotStatus);

		slotManager.registerTaskManager(taskExecutorConnection, initialSlotReport);

		assertThat(slotManager.getNumberRegisteredSlots(), is(equalTo(1)));

		// Now report this slot as allocated
		final SlotStatus slotStatus = new SlotStatus(
			slotId,
			ResourceProfile.ANY,
			new JobID(),
			new AllocationID());
		final SlotReport slotReport = new SlotReport(
			slotStatus);

		slotManager.reportSlotStatus(
			taskExecutorConnection.getInstanceID(),
			slotReport);

		// this slot request should not be fulfilled
		final AllocationID allocationId = new AllocationID();
		final SlotRequest slotRequest = new SlotRequest(
			new JobID(),
			allocationId,
			ResourceProfile.UNKNOWN,
			"foobar");

		// This triggered an IllegalStateException before
		slotManager.registerSlotRequest(slotRequest);

		assertThat(slotManager.getSlotRequest(allocationId).isAssigned(), is(false));
	}
}
 
Example #15
Source File: SlotManagerImplTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the SlotManager retries allocating a slot if the TaskExecutor#requestSlot call
 * fails.
 */
@Test
public void testSlotRequestFailure() throws Exception {
	try (final SlotManagerImpl slotManager = createSlotManager(ResourceManagerId.generate(),
		new TestingResourceActionsBuilder().build())) {

		final SlotRequest slotRequest = new SlotRequest(new JobID(), new AllocationID(), ResourceProfile.UNKNOWN, "foobar");
		slotManager.registerSlotRequest(slotRequest);

		final BlockingQueue<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1);
		final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(1);

		final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
			.setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple6 -> {
				requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple6);
				try {
					return responseQueue.take();
				} catch (InterruptedException ignored) {
					return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted."));
				}
			})
			.createTestingTaskExecutorGateway();

		final ResourceID taskExecutorResourceId = ResourceID.generate();
		final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway);
		final SlotReport slotReport = new SlotReport(createEmptySlotStatus(new SlotID(taskExecutorResourceId, 0), ResourceProfile.ANY));

		final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>();
		responseQueue.offer(firstManualSlotRequestResponse);

		slotManager.registerTaskManager(taskExecutionConnection, slotReport);

		final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> firstRequest = requestSlotQueue.take();

		final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>();
		responseQueue.offer(secondManualSlotRequestResponse);

		// fail first request
		firstManualSlotRequestResponse.completeExceptionally(new SlotAllocationException("Test exception"));

		final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> secondRequest = requestSlotQueue.take();

		assertThat(secondRequest.f2, equalTo(firstRequest.f2));
		assertThat(secondRequest.f0, equalTo(firstRequest.f0));

		secondManualSlotRequestResponse.complete(Acknowledge.get());

		final TaskManagerSlot slot = slotManager.getSlot(secondRequest.f0);
		assertThat(slot.getState(), equalTo(TaskManagerSlot.State.ALLOCATED));
		assertThat(slot.getAllocationId(), equalTo(secondRequest.f2));
	}
}
 
Example #16
Source File: SlotManagerImplTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that pending request is removed if task executor reports a slot with its allocation id.
 */
@Test
public void testSlotRequestRemovedIfTMReportAllocation() throws Exception {
	try (final SlotManagerImpl slotManager = createSlotManager(ResourceManagerId.generate(),
			new TestingResourceActionsBuilder().build())) {

		final JobID jobID = new JobID();
		final SlotRequest slotRequest1 = new SlotRequest(jobID, new AllocationID(), ResourceProfile.UNKNOWN, "foobar");
		slotManager.registerSlotRequest(slotRequest1);

		final BlockingQueue<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1);
		final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(1);

		final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
				.setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple6 -> {
					requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple6);
					try {
						return responseQueue.take();
					} catch (InterruptedException ignored) {
						return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted."));
					}
				})
				.createTestingTaskExecutorGateway();

		final ResourceID taskExecutorResourceId = ResourceID.generate();
		final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway);
		final SlotReport slotReport = new SlotReport(createEmptySlotStatus(new SlotID(taskExecutorResourceId, 0), ResourceProfile.ANY));

		final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>();
		responseQueue.offer(firstManualSlotRequestResponse);

		slotManager.registerTaskManager(taskExecutionConnection, slotReport);

		final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> firstRequest = requestSlotQueue.take();

		final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>();
		responseQueue.offer(secondManualSlotRequestResponse);

		final SlotRequest slotRequest2 = new SlotRequest(jobID, new AllocationID(), ResourceProfile.UNKNOWN, "foobar");
		slotManager.registerSlotRequest(slotRequest2);

		// fail first request
		firstManualSlotRequestResponse.completeExceptionally(new TimeoutException("Test exception to fail first allocation"));

		final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> secondRequest = requestSlotQueue.take();

		// fail second request
		secondManualSlotRequestResponse.completeExceptionally(new SlotOccupiedException("Test exception", slotRequest1.getAllocationId(), jobID));

		assertThat(firstRequest.f2, equalTo(slotRequest1.getAllocationId()));
		assertThat(secondRequest.f2, equalTo(slotRequest2.getAllocationId()));
		assertThat(secondRequest.f0, equalTo(firstRequest.f0));

		secondManualSlotRequestResponse.complete(Acknowledge.get());

		final TaskManagerSlot slot = slotManager.getSlot(secondRequest.f0);
		assertThat(slot.getState(), equalTo(TaskManagerSlot.State.ALLOCATED));
		assertThat(slot.getAllocationId(), equalTo(firstRequest.f2));

		assertThat(slotManager.getNumberRegisteredSlots(), is(1));
	}
}
 
Example #17
Source File: SlotManagerImplTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests notify the job manager of the allocations when the task manager is failed/killed.
 */
@Test
public void testNotifyFailedAllocationWhenTaskManagerTerminated() throws Exception {

	final Queue<Tuple2<JobID, AllocationID>> allocationFailures = new ArrayDeque<>(5);

	final TestingResourceActions resourceManagerActions = new TestingResourceActionsBuilder()
		.setNotifyAllocationFailureConsumer(
			(Tuple3<JobID, AllocationID, Exception> failureMessage) ->
				allocationFailures.offer(Tuple2.of(failureMessage.f0, failureMessage.f1)))
		.build();

	try (final SlotManager slotManager = createSlotManager(
		ResourceManagerId.generate(),
		resourceManagerActions)) {

		// register slot request for job1.
		JobID jobId1 = new JobID();
		final SlotRequest slotRequest11 = createSlotRequest(jobId1);
		final SlotRequest slotRequest12 = createSlotRequest(jobId1);
		slotManager.registerSlotRequest(slotRequest11);
		slotManager.registerSlotRequest(slotRequest12);

		// create task-manager-1 with 2 slots.
		final ResourceID taskExecutorResourceId1 = ResourceID.generate();
		final TestingTaskExecutorGateway testingTaskExecutorGateway1 = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
		final TaskExecutorConnection taskExecutionConnection1 = new TaskExecutorConnection(taskExecutorResourceId1, testingTaskExecutorGateway1);
		final SlotReport slotReport1 = createSlotReport(taskExecutorResourceId1, 2);

		// register the task-manager-1 to the slot manager, this will trigger the slot allocation for job1.
		slotManager.registerTaskManager(taskExecutionConnection1, slotReport1);

		// register slot request for job2.
		JobID jobId2 = new JobID();
		final SlotRequest slotRequest21 = createSlotRequest(jobId2);
		final SlotRequest slotRequest22 = createSlotRequest(jobId2);
		slotManager.registerSlotRequest(slotRequest21);
		slotManager.registerSlotRequest(slotRequest22);

		// register slot request for job3.
		JobID jobId3 = new JobID();
		final SlotRequest slotRequest31 = createSlotRequest(jobId3);
		slotManager.registerSlotRequest(slotRequest31);

		// create task-manager-2 with 3 slots.
		final ResourceID taskExecutorResourceId2 = ResourceID.generate();
		final TestingTaskExecutorGateway testingTaskExecutorGateway2 = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
		final TaskExecutorConnection taskExecutionConnection2 = new TaskExecutorConnection(taskExecutorResourceId2, testingTaskExecutorGateway2);
		final SlotReport slotReport2 = createSlotReport(taskExecutorResourceId2, 3);

		// register the task-manager-2 to the slot manager, this will trigger the slot allocation for job2 and job3.
		slotManager.registerTaskManager(taskExecutionConnection2, slotReport2);

		// validate for job1.
		slotManager.unregisterTaskManager(taskExecutionConnection1.getInstanceID(), TEST_EXCEPTION);

		assertThat(allocationFailures, hasSize(2));

		Tuple2<JobID, AllocationID> allocationFailure;
		final Set<AllocationID> failedAllocations = new HashSet<>(2);

		while ((allocationFailure = allocationFailures.poll()) != null) {
			assertThat(allocationFailure.f0, equalTo(jobId1));
			failedAllocations.add(allocationFailure.f1);
		}

		assertThat(failedAllocations, containsInAnyOrder(slotRequest11.getAllocationId(), slotRequest12.getAllocationId()));

		// validate the result for job2 and job3.
		slotManager.unregisterTaskManager(taskExecutionConnection2.getInstanceID(), TEST_EXCEPTION);

		assertThat(allocationFailures, hasSize(3));

		Map<JobID, List<Tuple2<JobID, AllocationID>>> job2AndJob3FailedAllocationInfo = allocationFailures.stream().collect(Collectors.groupingBy(tuple -> tuple.f0));

		assertThat(job2AndJob3FailedAllocationInfo.entrySet(), hasSize(2));

		final Set<AllocationID> job2FailedAllocations = extractFailedAllocationsForJob(jobId2, job2AndJob3FailedAllocationInfo);
		final Set<AllocationID> job3FailedAllocations = extractFailedAllocationsForJob(jobId3, job2AndJob3FailedAllocationInfo);

		assertThat(job2FailedAllocations, containsInAnyOrder(slotRequest21.getAllocationId(), slotRequest22.getAllocationId()));
		assertThat(job3FailedAllocations, containsInAnyOrder(slotRequest31.getAllocationId()));
	}
}
 
Example #18
Source File: SlotManagerImplTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private TaskExecutorConnection createTaskExecutorConnection() {
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
	return createTaskExecutorConnection(taskExecutorGateway);
}
 
Example #19
Source File: DefaultSchedulerBatchSchedulingTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a batch job can be executed with fewer slots than its parallelism.
 * See FLINK-13187 for more information.
 */
@Test
public void testSchedulingOfJobWithFewerSlotsThanParallelism() throws Exception {
	final int parallelism = 5;
	final Time batchSlotTimeout = Time.milliseconds(5L);
	final JobGraph jobGraph = createJobGraph(parallelism);
	jobGraph.setScheduleMode(ScheduleMode.LAZY_FROM_SOURCES_WITH_BATCH_SLOT_REQUEST);

	try (final SlotPoolImpl slotPool = createSlotPool(mainThreadExecutor, batchSlotTimeout)) {
		final ArrayBlockingQueue<ExecutionAttemptID> submittedTasksQueue = new ArrayBlockingQueue<>(parallelism);
		TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
			.setSubmitTaskConsumer(
				(tdd, ignored) -> {
					submittedTasksQueue.offer(tdd.getExecutionAttemptId());
					return CompletableFuture.completedFuture(Acknowledge.get());
				})
			.createTestingTaskExecutorGateway();

		// register a single slot at the slot pool
		SlotPoolUtils.offerSlots(
			slotPool,
			mainThreadExecutor,
			Collections.singletonList(ResourceProfile.ANY),
			new RpcTaskManagerGateway(testingTaskExecutorGateway, JobMasterId.generate()));

		final SlotProvider slotProvider = createSlotProvider(slotPool, mainThreadExecutor);
		final SchedulerNG scheduler = createScheduler(jobGraph, slotProvider, batchSlotTimeout);

		final GloballyTerminalJobStatusListener jobStatusListener = new GloballyTerminalJobStatusListener();
		scheduler.registerJobStatusListener(jobStatusListener);
		startScheduling(scheduler, mainThreadExecutor);

		// wait until the batch slot timeout has been reached
		Thread.sleep(batchSlotTimeout.toMilliseconds());

		final CompletableFuture<JobStatus> terminationFuture = jobStatusListener.getTerminationFuture();

		for (int i = 0; i < parallelism; i++) {
			final CompletableFuture<ExecutionAttemptID> submittedTaskFuture = CompletableFuture.supplyAsync(CheckedSupplier.unchecked(submittedTasksQueue::take));

			// wait until one of them is completed
			CompletableFuture.anyOf(submittedTaskFuture, terminationFuture).join();

			if (submittedTaskFuture.isDone()) {
				finishExecution(submittedTaskFuture.get(), scheduler, mainThreadExecutor);
			} else {
				fail(String.format("Job reached a globally terminal state %s before all executions were finished.", terminationFuture.get()));
			}
		}

		assertThat(terminationFuture.get(), is(JobStatus.FINISHED));
	}
}
 
Example #20
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the {@link AllocatedSlotReport} contains up to date information and not
 * stale information about the allocated slots on the {@link JobMaster}.
 *
 * <p>This is a probabilistic test case which only fails if executed repeatedly without
 * the fix for FLINK-12863.
 */
@Test
public void testAllocatedSlotReportDoesNotContainStaleInformation() throws Exception {
	final CompletableFuture<Void> assertionFuture = new CompletableFuture<>();
	final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
	final AtomicBoolean terminateHeartbeatVerification = new AtomicBoolean(false);
	final OneShotLatch hasReceivedSlotOffers = new OneShotLatch();
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setHeartbeatJobManagerConsumer((taskManagerId, allocatedSlotReport) -> {
			try {
				if (hasReceivedSlotOffers.isTriggered()) {
					assertThat(allocatedSlotReport.getAllocatedSlotInfos(), hasSize(1));
				} else {
					assertThat(allocatedSlotReport.getAllocatedSlotInfos(), empty());
				}
			} catch (AssertionError e) {
				assertionFuture.completeExceptionally(e);
			}

			if (terminateHeartbeatVerification.get()) {
				assertionFuture.complete(null);
			}
		})
		.createTestingTaskExecutorGateway();

	rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);

	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobMaster jobMaster = new JobMasterBuilder()
		.withJobGraph(createSingleVertexJobGraph())
		.withHeartbeatServices(new HeartbeatServices(5L, 1000L))
		.withSlotPoolFactory(new TestingSlotPoolFactory(hasReceivedSlotOffers))
		.createJobMaster();

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		// register task manager will trigger monitor heartbeat target, schedule heartbeat request at interval time
		CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(
			taskExecutorGateway.getAddress(),
			taskManagerLocation,
			testingTimeout);

		// wait for the completion of the registration
		registrationResponse.get();

		final SlotOffer slotOffer = new SlotOffer(new AllocationID(), 0, ResourceProfile.UNKNOWN);

		final CompletableFuture<Collection<SlotOffer>> slotOfferFuture = jobMasterGateway.offerSlots(taskManagerLocation.getResourceID(), Collections.singleton(slotOffer), testingTimeout);

		assertThat(slotOfferFuture.get(), containsInAnyOrder(slotOffer));

		terminateHeartbeatVerification.set(true);

		// make sure that no assertion has been violated
		assertionFuture.get();
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
		jobManagerSharedServices.shutdown();
	}
}
 
Example #21
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the {@link AllocatedSlotReport} contains up to date information and not
 * stale information about the allocated slots on the {@link JobMaster}.
 *
 * <p>This is a probabilistic test case which only fails if executed repeatedly without
 * the fix for FLINK-12863.
 */
@Test
public void testAllocatedSlotReportDoesNotContainStaleInformation() throws Exception {
	final CompletableFuture<Void> assertionFuture = new CompletableFuture<>();
	final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
	final AtomicBoolean terminateHeartbeatVerification = new AtomicBoolean(false);
	final OneShotLatch hasReceivedSlotOffers = new OneShotLatch();
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setHeartbeatJobManagerConsumer((taskManagerId, allocatedSlotReport) -> {
			try {
				if (hasReceivedSlotOffers.isTriggered()) {
					assertThat(allocatedSlotReport.getAllocatedSlotInfos(), hasSize(1));
				} else {
					assertThat(allocatedSlotReport.getAllocatedSlotInfos(), empty());
				}
			} catch (AssertionError e) {
				assertionFuture.completeExceptionally(e);
			}

			if (terminateHeartbeatVerification.get()) {
				assertionFuture.complete(null);
			}
		})
		.createTestingTaskExecutorGateway();

	rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);

	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobMaster jobMaster = new JobMasterBuilder()
		.withJobGraph(createSingleVertexJobGraph())
		.withHeartbeatServices(new HeartbeatServices(5L, 1000L))
		.withSlotPoolFactory(new TestingSlotPoolFactory(hasReceivedSlotOffers))
		.createJobMaster();

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		// register task manager will trigger monitor heartbeat target, schedule heartbeat request at interval time
		CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(
			taskExecutorGateway.getAddress(),
			taskManagerLocation,
			testingTimeout);

		// wait for the completion of the registration
		registrationResponse.get();

		final SlotOffer slotOffer = new SlotOffer(new AllocationID(), 0, ResourceProfile.UNKNOWN);

		final CompletableFuture<Collection<SlotOffer>> slotOfferFuture = jobMasterGateway.offerSlots(taskManagerLocation.getResourceID(), Collections.singleton(slotOffer), testingTimeout);

		assertThat(slotOfferFuture.get(), containsInAnyOrder(slotOffer));

		terminateHeartbeatVerification.set(true);

		// make sure that no assertion has been violated
		assertionFuture.get();
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
		jobManagerSharedServices.shutdown();
	}
}
 
Example #22
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor is released if all of its slots have been freed.
 */
@Test
public void testReleasingTaskExecutorIfNoMoreSlotsRegistered() throws Exception {
	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobGraph jobGraph = createSingleVertexJobWithRestartStrategy();

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		jobManagerSharedServices,
		heartbeatServices);

	final CompletableFuture<JobID> disconnectTaskExecutorFuture = new CompletableFuture<>();
	final CompletableFuture<AllocationID> freedSlotFuture = new CompletableFuture<>();
	final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setFreeSlotFunction(
			(allocationID, throwable) -> {
				freedSlotFuture.complete(allocationID);
				return CompletableFuture.completedFuture(Acknowledge.get());
			})
		.setDisconnectJobManagerConsumer((jobID, throwable) -> disconnectTaskExecutorFuture.complete(jobID))
		.createTestingTaskExecutorGateway();

	try {
		jobMaster.start(jobMasterId).get();

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, testingTaskExecutorGateway);

		// check that we accepted the offered slot
		assertThat(slotOffers, hasSize(1));
		final AllocationID allocationId = slotOffers.iterator().next().getAllocationId();

		// now fail the allocation and check that we close the connection to the TaskExecutor
		jobMasterGateway.notifyAllocationFailure(allocationId, new FlinkException("Fail alloction test exception"));

		// we should free the slot and then disconnect from the TaskExecutor because we use no longer slots from it
		assertThat(freedSlotFuture.get(), equalTo(allocationId));
		assertThat(disconnectTaskExecutorFuture.get(), equalTo(jobGraph.getJobID()));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #23
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private void runJobFailureWhenTaskExecutorTerminatesTest(
		HeartbeatServices heartbeatServices,
		BiConsumer<LocalTaskManagerLocation, JobMasterGateway> jobReachedRunningState,
		BiFunction<JobMasterGateway, ResourceID, BiConsumer<ResourceID, AllocatedSlotReport>> heartbeatConsumerFunction) throws Exception {
	final JobGraph jobGraph = createSingleVertexJobGraph();
	final TestingOnCompletionActions onCompletionActions = new TestingOnCompletionActions();
	final JobMaster jobMaster = createJobMaster(
		new Configuration(),
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build(),
		heartbeatServices,
		onCompletionActions);

	try {
		jobMaster.start(jobMasterId).get();

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		final LocalTaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
		final CompletableFuture<ExecutionAttemptID> taskDeploymentFuture = new CompletableFuture<>();
		final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
			.setSubmitTaskConsumer((taskDeploymentDescriptor, jobMasterId) -> {
				taskDeploymentFuture.complete(taskDeploymentDescriptor.getExecutionAttemptId());
				return CompletableFuture.completedFuture(Acknowledge.get());
			})
			.setHeartbeatJobManagerConsumer(heartbeatConsumerFunction.apply(jobMasterGateway, taskManagerLocation.getResourceID()))
			.createTestingTaskExecutorGateway();

		final Collection<SlotOffer> slotOffers = registerSlotsAtJobMaster(1, jobMasterGateway, taskExecutorGateway, taskManagerLocation);
		assertThat(slotOffers, hasSize(1));

		final ExecutionAttemptID executionAttemptId = taskDeploymentFuture.get();

		jobMasterGateway.updateTaskExecutionState(new TaskExecutionState(jobGraph.getJobID(), executionAttemptId, ExecutionState.RUNNING)).get();

		jobReachedRunningState.accept(taskManagerLocation, jobMasterGateway);

		final ArchivedExecutionGraph archivedExecutionGraph = onCompletionActions.getJobReachedGloballyTerminalStateFuture().get();

		assertThat(archivedExecutionGraph.getState(), is(JobStatus.FAILED));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #24
Source File: SlotManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that free slots which are reported as allocated won't be considered for fulfilling
 * other pending slot requests.
 *
 * <p>See: FLINK-8505
 */
@Test
public void testReportAllocatedSlot() throws Exception {
	final ResourceID taskManagerId = ResourceID.generate();
	final ResourceActions resourceActions = new TestingResourceActionsBuilder().build();
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
	final TaskExecutorConnection taskExecutorConnection = new TaskExecutorConnection(taskManagerId, taskExecutorGateway);

	try (final SlotManager slotManager = SlotManagerBuilder.newBuilder().build()) {

		slotManager.start(ResourceManagerId.generate(), Executors.directExecutor(), resourceActions);

		// initially report a single slot as free
		final SlotID slotId = new SlotID(taskManagerId, 0);
		final SlotStatus initialSlotStatus = new SlotStatus(
			slotId,
			ResourceProfile.UNKNOWN);
		final SlotReport initialSlotReport = new SlotReport(initialSlotStatus);

		slotManager.registerTaskManager(taskExecutorConnection, initialSlotReport);

		assertThat(slotManager.getNumberRegisteredSlots(), is(equalTo(1)));

		// Now report this slot as allocated
		final SlotStatus slotStatus = new SlotStatus(
			slotId,
			ResourceProfile.UNKNOWN,
			new JobID(),
			new AllocationID());
		final SlotReport slotReport = new SlotReport(
			slotStatus);

		slotManager.reportSlotStatus(
			taskExecutorConnection.getInstanceID(),
			slotReport);

		// this slot request should not be fulfilled
		final AllocationID allocationId = new AllocationID();
		final SlotRequest slotRequest = new SlotRequest(
			new JobID(),
			allocationId,
			ResourceProfile.UNKNOWN,
			"foobar");

		// This triggered an IllegalStateException before
		slotManager.registerSlotRequest(slotRequest);

		assertThat(slotManager.getSlotRequest(allocationId).isAssigned(), is(false));
	}
}
 
Example #25
Source File: SlotManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the SlotManager retries allocating a slot if the TaskExecutor#requestSlot call
 * fails.
 */
@Test
public void testSlotRequestFailure() throws Exception {
	try (final SlotManager slotManager = createSlotManager(ResourceManagerId.generate(),
		new TestingResourceActionsBuilder().build())) {

		final SlotRequest slotRequest = new SlotRequest(new JobID(), new AllocationID(), ResourceProfile.UNKNOWN, "foobar");
		slotManager.registerSlotRequest(slotRequest);

		final BlockingQueue<Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1);
		final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(1);

		final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
			.setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple5 -> {
				requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple5);
				try {
					return responseQueue.take();
				} catch (InterruptedException ignored) {
					return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted."));
				}
			})
			.createTestingTaskExecutorGateway();

		final ResourceID taskExecutorResourceId = ResourceID.generate();
		final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway);
		final SlotReport slotReport = new SlotReport(new SlotStatus(new SlotID(taskExecutorResourceId, 0), ResourceProfile.UNKNOWN));

		final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>();
		responseQueue.offer(firstManualSlotRequestResponse);

		slotManager.registerTaskManager(taskExecutionConnection, slotReport);

		final Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId> firstRequest = requestSlotQueue.take();

		final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>();
		responseQueue.offer(secondManualSlotRequestResponse);

		// fail first request
		firstManualSlotRequestResponse.completeExceptionally(new SlotAllocationException("Test exception"));

		final Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId> secondRequest = requestSlotQueue.take();

		assertThat(secondRequest.f2, equalTo(firstRequest.f2));
		assertThat(secondRequest.f0, equalTo(firstRequest.f0));

		secondManualSlotRequestResponse.complete(Acknowledge.get());

		final TaskManagerSlot slot = slotManager.getSlot(secondRequest.f0);
		assertThat(slot.getState(), equalTo(TaskManagerSlot.State.ALLOCATED));
		assertThat(slot.getAllocationId(), equalTo(secondRequest.f2));
	}
}
 
Example #26
Source File: SlotManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that pending request is removed if task executor reports a slot with its allocation id.
 */
@Test
public void testSlotRequestRemovedIfTMReportAllocation() throws Exception {
	try (final SlotManager slotManager = createSlotManager(ResourceManagerId.generate(),
			new TestingResourceActionsBuilder().build())) {

		final JobID jobID = new JobID();
		final SlotRequest slotRequest1 = new SlotRequest(jobID, new AllocationID(), ResourceProfile.UNKNOWN, "foobar");
		slotManager.registerSlotRequest(slotRequest1);

		final BlockingQueue<Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1);
		final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(1);

		final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
				.setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple5 -> {
					requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple5);
					try {
						return responseQueue.take();
					} catch (InterruptedException ignored) {
						return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted."));
					}
				})
				.createTestingTaskExecutorGateway();

		final ResourceID taskExecutorResourceId = ResourceID.generate();
		final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway);
		final SlotReport slotReport = new SlotReport(new SlotStatus(new SlotID(taskExecutorResourceId, 0), ResourceProfile.UNKNOWN));

		final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>();
		responseQueue.offer(firstManualSlotRequestResponse);

		slotManager.registerTaskManager(taskExecutionConnection, slotReport);

		final Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId> firstRequest = requestSlotQueue.take();

		final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>();
		responseQueue.offer(secondManualSlotRequestResponse);

		final SlotRequest slotRequest2 = new SlotRequest(jobID, new AllocationID(), ResourceProfile.UNKNOWN, "foobar");
		slotManager.registerSlotRequest(slotRequest2);

		// fail first request
		firstManualSlotRequestResponse.completeExceptionally(new TimeoutException("Test exception to fail first allocation"));

		final Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId> secondRequest = requestSlotQueue.take();

		// fail second request
		secondManualSlotRequestResponse.completeExceptionally(new SlotOccupiedException("Test exception", slotRequest1.getAllocationId(), jobID));

		assertThat(firstRequest.f2, equalTo(slotRequest1.getAllocationId()));
		assertThat(secondRequest.f2, equalTo(slotRequest2.getAllocationId()));
		assertThat(secondRequest.f0, equalTo(firstRequest.f0));

		secondManualSlotRequestResponse.complete(Acknowledge.get());

		final TaskManagerSlot slot = slotManager.getSlot(secondRequest.f0);
		assertThat(slot.getState(), equalTo(TaskManagerSlot.State.ALLOCATED));
		assertThat(slot.getAllocationId(), equalTo(firstRequest.f2));

		assertThat(slotManager.getNumberRegisteredSlots(), is(1));
	}
}
 
Example #27
Source File: SlotManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests notify the job manager of the allocations when the task manager is failed/killed.
 */
@Test
public void testNotifyFailedAllocationWhenTaskManagerTerminated() throws Exception {

	final Queue<Tuple2<JobID, AllocationID>> allocationFailures = new ArrayDeque<>(5);

	final TestingResourceActions resourceManagerActions = new TestingResourceActionsBuilder()
		.setNotifyAllocationFailureConsumer(
			(Tuple3<JobID, AllocationID, Exception> failureMessage) ->
				allocationFailures.offer(Tuple2.of(failureMessage.f0, failureMessage.f1)))
		.build();

	try (final SlotManager slotManager = createSlotManager(
		ResourceManagerId.generate(),
		resourceManagerActions)) {

		// register slot request for job1.
		JobID jobId1 = new JobID();
		final SlotRequest slotRequest11 = createSlotRequest(jobId1);
		final SlotRequest slotRequest12 = createSlotRequest(jobId1);
		slotManager.registerSlotRequest(slotRequest11);
		slotManager.registerSlotRequest(slotRequest12);

		// create task-manager-1 with 2 slots.
		final ResourceID taskExecutorResourceId1 = ResourceID.generate();
		final TestingTaskExecutorGateway testingTaskExecutorGateway1 = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
		final TaskExecutorConnection taskExecutionConnection1 = new TaskExecutorConnection(taskExecutorResourceId1, testingTaskExecutorGateway1);
		final SlotReport slotReport1 = createSlotReport(taskExecutorResourceId1, 2);

		// register the task-manager-1 to the slot manager, this will trigger the slot allocation for job1.
		slotManager.registerTaskManager(taskExecutionConnection1, slotReport1);

		// register slot request for job2.
		JobID jobId2 = new JobID();
		final SlotRequest slotRequest21 = createSlotRequest(jobId2);
		final SlotRequest slotRequest22 = createSlotRequest(jobId2);
		slotManager.registerSlotRequest(slotRequest21);
		slotManager.registerSlotRequest(slotRequest22);

		// register slot request for job3.
		JobID jobId3 = new JobID();
		final SlotRequest slotRequest31 = createSlotRequest(jobId3);
		slotManager.registerSlotRequest(slotRequest31);

		// create task-manager-2 with 3 slots.
		final ResourceID taskExecutorResourceId2 = ResourceID.generate();
		final TestingTaskExecutorGateway testingTaskExecutorGateway2 = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
		final TaskExecutorConnection taskExecutionConnection2 = new TaskExecutorConnection(taskExecutorResourceId2, testingTaskExecutorGateway2);
		final SlotReport slotReport2 = createSlotReport(taskExecutorResourceId2, 3);

		// register the task-manager-2 to the slot manager, this will trigger the slot allocation for job2 and job3.
		slotManager.registerTaskManager(taskExecutionConnection2, slotReport2);

		// validate for job1.
		slotManager.unregisterTaskManager(taskExecutionConnection1.getInstanceID());

		assertThat(allocationFailures, hasSize(2));

		Tuple2<JobID, AllocationID> allocationFailure;
		final Set<AllocationID> failedAllocations = new HashSet<>(2);

		while ((allocationFailure = allocationFailures.poll()) != null) {
			assertThat(allocationFailure.f0, equalTo(jobId1));
			failedAllocations.add(allocationFailure.f1);
		}

		assertThat(failedAllocations, containsInAnyOrder(slotRequest11.getAllocationId(), slotRequest12.getAllocationId()));

		// validate the result for job2 and job3.
		slotManager.unregisterTaskManager(taskExecutionConnection2.getInstanceID());

		assertThat(allocationFailures, hasSize(3));

		Map<JobID, List<Tuple2<JobID, AllocationID>>> job2AndJob3FailedAllocationInfo = allocationFailures.stream().collect(Collectors.groupingBy(tuple -> tuple.f0));

		assertThat(job2AndJob3FailedAllocationInfo.entrySet(), hasSize(2));

		final Set<AllocationID> job2FailedAllocations = extractFailedAllocationsForJob(jobId2, job2AndJob3FailedAllocationInfo);
		final Set<AllocationID> job3FailedAllocations = extractFailedAllocationsForJob(jobId3, job2AndJob3FailedAllocationInfo);

		assertThat(job2FailedAllocations, containsInAnyOrder(slotRequest21.getAllocationId(), slotRequest22.getAllocationId()));
		assertThat(job3FailedAllocations, containsInAnyOrder(slotRequest31.getAllocationId()));
	}
}
 
Example #28
Source File: SlotManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private TaskExecutorConnection createTaskExecutorConnection() {
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();
	return new TaskExecutorConnection(ResourceID.generate(), taskExecutorGateway);
}
 
Example #29
Source File: JobMasterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
	final CompletableFuture<ResourceID> heartbeatResourceIdFuture = new CompletableFuture<>();
	final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
	final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setHeartbeatJobManagerConsumer((taskManagerId, ignored) -> heartbeatResourceIdFuture.complete(taskManagerId))
		.setDisconnectJobManagerConsumer((jobId, throwable) -> disconnectedJobManagerFuture.complete(jobId))
		.createTestingTaskExecutorGateway();

	rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);

	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		jobManagerSharedServices);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		// register task manager will trigger monitor heartbeat target, schedule heartbeat request at interval time
		CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(
			taskExecutorGateway.getAddress(),
			taskManagerLocation,
			testingTimeout);

		// wait for the completion of the registration
		registrationResponse.get();

		final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID()));

		final ResourceID heartbeatResourceId = heartbeatResourceIdFuture.getNow(null);

		assertThat(heartbeatResourceId, anyOf(nullValue(), equalTo(jmResourceId)));
	} finally {
		jobManagerSharedServices.shutdown();
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}
 
Example #30
Source File: JobMasterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
	final CompletableFuture<ResourceID> heartbeatResourceIdFuture = new CompletableFuture<>();
	final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
	final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
	final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setHeartbeatJobManagerConsumer((taskManagerId, ignored) -> heartbeatResourceIdFuture.complete(taskManagerId))
		.setDisconnectJobManagerConsumer((jobId, throwable) -> disconnectedJobManagerFuture.complete(jobId))
		.createTestingTaskExecutorGateway();

	rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);

	final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build();

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		jobManagerSharedServices);

	CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId);

	try {
		// wait for the start to complete
		startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);

		// register task manager will trigger monitor heartbeat target, schedule heartbeat request at interval time
		CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(
			taskExecutorGateway.getAddress(),
			taskManagerLocation,
			testingTimeout);

		// wait for the completion of the registration
		registrationResponse.get();

		final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID()));

		final ResourceID heartbeatResourceId = heartbeatResourceIdFuture.getNow(null);

		assertThat(heartbeatResourceId, anyOf(nullValue(), equalTo(jmResourceId)));
	} finally {
		jobManagerSharedServices.shutdown();
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}