org.apache.flink.runtime.taskexecutor.exceptions.TaskManagerException Java Examples

The following examples show how to use org.apache.flink.runtime.taskexecutor.exceptions.TaskManagerException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TaskExecutor.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void onStart() throws Exception {
	try {
		startTaskExecutorServices();
	} catch (Exception e) {
		final TaskManagerException exception = new TaskManagerException(String.format("Could not start the TaskExecutor %s", getAddress()), e);
		onFatalError(exception);
		throw exception;
	}

	startRegistrationTimeout();
}
 
Example #2
Source File: TaskExecutor.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void notifyHeartbeatTimeout(final ResourceID resourceId) {
	validateRunsInMainThread();
	// first check whether the timeout is still valid
	if (establishedResourceManagerConnection != null && establishedResourceManagerConnection.getResourceManagerResourceId().equals(resourceId)) {
		log.info("The heartbeat of ResourceManager with id {} timed out.", resourceId);

		reconnectToResourceManager(new TaskManagerException(
			String.format("The heartbeat of ResourceManager with id %s timed out.", resourceId)));
	} else {
		log.debug("Received heartbeat timeout for outdated ResourceManager id {}. Ignoring the timeout.", resourceId);
	}
}
 
Example #3
Source File: TaskExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void onStart() throws Exception {
	try {
		startTaskExecutorServices();
	} catch (Exception e) {
		final TaskManagerException exception = new TaskManagerException(String.format("Could not start the TaskExecutor %s", getAddress()), e);
		onFatalError(exception);
		throw exception;
	}

	startRegistrationTimeout();
}
 
Example #4
Source File: TaskExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void notifyHeartbeatTimeout(final ResourceID resourceId) {
	validateRunsInMainThread();
	// first check whether the timeout is still valid
	if (establishedResourceManagerConnection != null && establishedResourceManagerConnection.getResourceManagerResourceId().equals(resourceId)) {
		log.info("The heartbeat of ResourceManager with id {} timed out.", resourceId);

		reconnectToResourceManager(new TaskManagerException(
			String.format("The heartbeat of ResourceManager with id %s timed out.", resourceId)));
	} else {
		log.debug("Received heartbeat timeout for outdated ResourceManager id {}. Ignoring the timeout.", resourceId);
	}
}
 
Example #5
Source File: TaskExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void onStart() throws Exception {
	try {
		startTaskExecutorServices();
	} catch (Exception e) {
		final TaskManagerException exception = new TaskManagerException(String.format("Could not start the TaskExecutor %s", getAddress()), e);
		onFatalError(exception);
		throw exception;
	}

	startRegistrationTimeout();
}
 
Example #6
Source File: TaskExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void notifyHeartbeatTimeout(final ResourceID resourceId) {
	validateRunsInMainThread();
	// first check whether the timeout is still valid
	if (establishedResourceManagerConnection != null && establishedResourceManagerConnection.getResourceManagerResourceId().equals(resourceId)) {
		log.info("The heartbeat of ResourceManager with id {} timed out.", resourceId);

		reconnectToResourceManager(new TaskManagerException(
			String.format("The heartbeat of ResourceManager with id %s timed out.", resourceId)));
	} else {
		log.debug("Received heartbeat timeout for outdated ResourceManager id {}. Ignoring the timeout.", resourceId);
	}
}
 
Example #7
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that we ignore slot requests if the TaskExecutor is not
 * registered at a ResourceManager.
 */
@Test
public void testIgnoringSlotRequestsIfNotRegistered() throws Exception {
	final TaskExecutor taskExecutor = createTaskExecutor(1);

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

		final CompletableFuture<RegistrationResponse> registrationFuture = new CompletableFuture<>();
		final CompletableFuture<ResourceID> taskExecutorResourceIdFuture = new CompletableFuture<>();

		testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
			taskExecutorResourceIdFuture.complete(taskExecutorRegistration.getResourceId());
			return registrationFuture;
		});

		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		final ResourceID resourceId = taskExecutorResourceIdFuture.get();

		final SlotID slotId = new SlotID(resourceId, 0);
		final CompletableFuture<Acknowledge> slotRequestResponse = taskExecutorGateway.requestSlot(slotId, jobId, new AllocationID(), ResourceProfile.ZERO, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);

		try {
			slotRequestResponse.get();
			fail("We should not be able to request slots before the TaskExecutor is registered at the ResourceManager.");
		} catch (ExecutionException ee) {
			assertThat(ExceptionUtils.stripExecutionException(ee), instanceOf(TaskManagerException.class));
		}
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example #8
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that we ignore slot requests if the TaskExecutor is not
 * registered at a ResourceManager.
 */
@Test
public void testIgnoringSlotRequestsIfNotRegistered() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

		final CompletableFuture<RegistrationResponse> registrationFuture = new CompletableFuture<>();
		final CompletableFuture<ResourceID> taskExecutorResourceIdFuture = new CompletableFuture<>();

		testingResourceManagerGateway.setRegisterTaskExecutorFunction(stringResourceIDSlotReportIntegerHardwareDescriptionTuple5 -> {
			taskExecutorResourceIdFuture.complete(stringResourceIDSlotReportIntegerHardwareDescriptionTuple5.f1);
			return registrationFuture;
		});

		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		final ResourceID resourceId = taskExecutorResourceIdFuture.get();

		final SlotID slotId = new SlotID(resourceId, 0);
		final CompletableFuture<Acknowledge> slotRequestResponse = taskExecutorGateway.requestSlot(slotId, jobId, new AllocationID(), "foobar", testingResourceManagerGateway.getFencingToken(), timeout);

		try {
			slotRequestResponse.get();
			fail("We should not be able to request slots before the TaskExecutor is registered at the ResourceManager.");
		} catch (ExecutionException ee) {
			assertThat(ExceptionUtils.stripExecutionException(ee), instanceOf(TaskManagerException.class));
		}
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example #9
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that we ignore slot requests if the TaskExecutor is not
 * registered at a ResourceManager.
 */
@Test
public void testIgnoringSlotRequestsIfNotRegistered() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();

	final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();

		final CompletableFuture<RegistrationResponse> registrationFuture = new CompletableFuture<>();
		final CompletableFuture<ResourceID> taskExecutorResourceIdFuture = new CompletableFuture<>();

		testingResourceManagerGateway.setRegisterTaskExecutorFunction(stringResourceIDSlotReportIntegerHardwareDescriptionTuple5 -> {
			taskExecutorResourceIdFuture.complete(stringResourceIDSlotReportIntegerHardwareDescriptionTuple5.f1);
			return registrationFuture;
		});

		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		final ResourceID resourceId = taskExecutorResourceIdFuture.get();

		final SlotID slotId = new SlotID(resourceId, 0);
		final CompletableFuture<Acknowledge> slotRequestResponse = taskExecutorGateway.requestSlot(slotId, jobId, new AllocationID(), "foobar", testingResourceManagerGateway.getFencingToken(), timeout);

		try {
			slotRequestResponse.get();
			fail("We should not be able to request slots before the TaskExecutor is registered at the ResourceManager.");
		} catch (ExecutionException ee) {
			assertThat(ExceptionUtils.stripExecutionException(ee), instanceOf(TaskManagerException.class));
		}
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example #10
Source File: TaskExecutor.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> requestSlot(
	final SlotID slotId,
	final JobID jobId,
	final AllocationID allocationId,
	final ResourceProfile resourceProfile,
	final String targetAddress,
	final ResourceManagerId resourceManagerId,
	final Time timeout) {
	// TODO: Filter invalid requests from the resource manager by using the instance/registration Id

	log.info("Receive slot request {} for job {} from resource manager with leader id {}.",
		allocationId, jobId, resourceManagerId);

	if (!isConnectedToResourceManager(resourceManagerId)) {
		final String message = String.format("TaskManager is not connected to the resource manager %s.", resourceManagerId);
		log.debug(message);
		return FutureUtils.completedExceptionally(new TaskManagerException(message));
	}

	try {
		allocateSlot(
			slotId,
			jobId,
			allocationId,
			resourceProfile);
	} catch (SlotAllocationException sae) {
		return FutureUtils.completedExceptionally(sae);
	}

	final JobTable.Job job;

	try {
		job = jobTable.getOrCreateJob(jobId, () -> registerNewJobAndCreateServices(jobId, targetAddress));
	} catch (Exception e) {
		// free the allocated slot
		try {
			taskSlotTable.freeSlot(allocationId);
		} catch (SlotNotFoundException slotNotFoundException) {
			// slot no longer existent, this should actually never happen, because we've
			// just allocated the slot. So let's fail hard in this case!
			onFatalError(slotNotFoundException);
		}

		// release local state under the allocation id.
		localStateStoresManager.releaseLocalStateForAllocationId(allocationId);

		// sanity check
		if (!taskSlotTable.isSlotFree(slotId.getSlotNumber())) {
			onFatalError(new Exception("Could not free slot " + slotId));
		}

		return FutureUtils.completedExceptionally(new SlotAllocationException("Could not create new job.", e));
	}

	if (job.isConnected()) {
		offerSlotsToJobManager(jobId);
	}

	return CompletableFuture.completedFuture(Acknowledge.get());
}