org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException Java Examples

The following examples show how to use org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SimpleSlotProvider.java    From Flink-CEPplus with Apache License 2.0 7 votes vote down vote up
@Override
public CompletableFuture<LogicalSlot> allocateSlot(
		SlotRequestId slotRequestId,
		ScheduledUnit task,
		SlotProfile slotProfile,
		boolean allowQueued,
		Time allocationTimeout) {
	final SlotContext slot;

	synchronized (lock) {
		if (slots.isEmpty()) {
			slot = null;
		} else {
			slot = slots.removeFirst();
		}
		if (slot != null) {
			SimpleSlot result = new SimpleSlot(slot, this, 0);
			allocatedSlots.put(slotRequestId, slot);
			return CompletableFuture.completedFuture(result);
		}
		else {
			return FutureUtils.completedExceptionally(new NoResourceAvailableException());
		}
	}
}
 
Example #2
Source File: RestartIndividualStrategy.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void onTaskFailure(Execution taskExecution, Throwable cause) {

	executionGraph.getJobMasterMainThreadExecutor().assertRunningInMainThread();

	// to better handle the lack of resources (potentially by a scale-in), we
	// make failures due to missing resources global failures 
	if (cause instanceof NoResourceAvailableException) {
		LOG.info("Not enough resources to schedule {} - triggering full recovery.", taskExecution);
		executionGraph.failGlobal(cause);
		return;
	}

	LOG.info("Recovering task failure for {} (#{}) via individual restart.", 
			taskExecution.getVertex().getTaskNameWithSubtaskIndex(), taskExecution.getAttemptNumber());

	numTaskFailures.inc();

	// trigger the restart once the task has reached its terminal state
	// Note: currently all tasks passed here are already in their terminal state,
	//       so we could actually avoid the future. We use it anyways because it is cheap and
	//       it helps to support better testing
	final CompletableFuture<ExecutionState> terminationFuture = taskExecution.getTerminalStateFuture();
	terminationFuture.thenRun(
		() -> performExecutionVertexRestart(taskExecution.getVertex(), taskExecution.getGlobalModVersion()));
}
 
Example #3
Source File: DefaultSchedulerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void failJobIfNotEnoughResources() throws Exception {
	final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
	testRestartBackoffTimeStrategy.setCanRestart(false);
	testExecutionSlotAllocator.disableAutoCompletePendingRequests();

	final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

	testExecutionSlotAllocator.timeoutPendingRequests();

	waitForTermination(scheduler);
	final JobStatus jobStatus = scheduler.requestJobStatus();
	assertThat(jobStatus, is(equalTo(JobStatus.FAILED)));

	Throwable failureCause = scheduler.requestJob()
		.getFailureInfo()
		.getException()
		.deserializeError(DefaultSchedulerTest.class.getClassLoader());
	assertTrue(findThrowable(failureCause, NoResourceAvailableException.class).isPresent());
	assertTrue(
		findThrowableWithMessage(
			failureCause,
			"Could not allocate the required slot within slot request timeout.").isPresent());
	assertThat(jobStatus, is(equalTo(JobStatus.FAILED)));
}
 
Example #4
Source File: MiniClusterITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testHandleBatchJobsWhenNotEnoughSlot() throws Exception {
	try {
		setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.LAZY_FROM_SOURCES);
		fail("Job should fail.");
	} catch (JobExecutionException e) {
		assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent());
		assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent());

		//TODO: remove the legacy scheduler message check once legacy scheduler is removed
		final String legacySchedulerErrorMessage = "Could not allocate enough slots";
		final String ngSchedulerErrorMessage = "Could not allocate the required slot within slot request timeout";
		assertTrue(findThrowableWithMessage(e, legacySchedulerErrorMessage).isPresent() ||
			findThrowableWithMessage(e, ngSchedulerErrorMessage).isPresent());
	}
}
 
Example #5
Source File: MiniClusterITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testHandleStreamingJobsWhenNotEnoughSlot() throws Exception {
	try {
		setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.EAGER);
		fail("Job should fail.");
	} catch (JobExecutionException e) {
		assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent());
		assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent());

		//TODO: remove the legacy scheduler message check once legacy scheduler is removed
		final String legacySchedulerErrorMessage = "Slots required: 2, slots allocated: 1";
		final String ngSchedulerErrorMessage = "Could not allocate the required slot within slot request timeout";
		assertTrue(findThrowableWithMessage(e, legacySchedulerErrorMessage).isPresent() ||
			findThrowableWithMessage(e, ngSchedulerErrorMessage).isPresent());
	}
}
 
Example #6
Source File: SlotPoolImpl.java    From flink with Apache License 2.0 6 votes vote down vote up
private void slotRequestToResourceManagerFailed(SlotRequestId slotRequestID, Throwable failure) {
	final PendingRequest request = pendingRequests.getValueByKeyA(slotRequestID);
	if (request != null) {
		if (isBatchRequestAndFailureCanBeIgnored(request, failure)) {
			log.debug("Ignoring failed request to the resource manager for a batch slot request.");
		} else {
			removePendingRequest(slotRequestID);
			request.getAllocatedSlotFuture().completeExceptionally(new NoResourceAvailableException(
				"No pooled slot available and request to ResourceManager for new slot failed", failure));
		}
	} else {
		if (log.isDebugEnabled()) {
			log.debug("Unregistered slot request [{}] failed.", slotRequestID, failure);
		}
	}
}
 
Example #7
Source File: SlotPoolImpl.java    From flink with Apache License 2.0 6 votes vote down vote up
private void slotRequestToResourceManagerFailed(SlotRequestId slotRequestID, Throwable failure) {
	final PendingRequest request = pendingRequests.getKeyA(slotRequestID);
	if (request != null) {
		if (isBatchRequestAndFailureCanBeIgnored(request, failure)) {
			log.debug("Ignoring failed request to the resource manager for a batch slot request.");
		} else {
			pendingRequests.removeKeyA(slotRequestID);
			request.getAllocatedSlotFuture().completeExceptionally(new NoResourceAvailableException(
				"No pooled slot available and request to ResourceManager for new slot failed", failure));
		}
	} else {
		if (log.isDebugEnabled()) {
			log.debug("Unregistered slot request [{}] failed.", slotRequestID, failure);
		}
	}
}
 
Example #8
Source File: RestartIndividualStrategy.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void onTaskFailure(Execution taskExecution, Throwable cause) {

	executionGraph.getJobMasterMainThreadExecutor().assertRunningInMainThread();

	// to better handle the lack of resources (potentially by a scale-in), we
	// make failures due to missing resources global failures 
	if (cause instanceof NoResourceAvailableException) {
		LOG.info("Not enough resources to schedule {} - triggering full recovery.", taskExecution);
		executionGraph.failGlobal(cause);
		return;
	}

	LOG.info("Recovering task failure for {} (#{}) via individual restart.", 
			taskExecution.getVertex().getTaskNameWithSubtaskIndex(), taskExecution.getAttemptNumber());

	numTaskFailures.inc();

	// trigger the restart once the task has reached its terminal state
	// Note: currently all tasks passed here are already in their terminal state,
	//       so we could actually avoid the future. We use it anyways because it is cheap and
	//       it helps to support better testing
	final CompletableFuture<ExecutionState> terminationFuture = taskExecution.getTerminalStateFuture();
	terminationFuture.thenRun(
		() -> performExecutionVertexRestart(taskExecution.getVertex(), taskExecution.getGlobalModVersion()));
}
 
Example #9
Source File: MiniClusterITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testHandleBatchJobsWhenNotEnoughSlot() throws Exception {
	try {
		setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.LAZY_FROM_SOURCES);
		fail("Job should fail.");
	} catch (JobExecutionException e) {
		assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent());
		assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent());
		assertTrue(findThrowableWithMessage(e, "Could not allocate enough slots").isPresent());
	}
}
 
Example #10
Source File: SimpleSlotProvider.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<LogicalSlot> allocateSlot(
		SlotRequestId slotRequestId,
		ScheduledUnit task,
		SlotProfile slotProfile,
		Time allocationTimeout) {
	final SlotContext slot;

	synchronized (lock) {
		if (slots.isEmpty()) {
			slot = null;
		} else {
			slot = slots.removeFirst();
		}
		if (slot != null) {
			TestingLogicalSlot result = new TestingLogicalSlotBuilder()
				.setTaskManagerLocation(slot.getTaskManagerLocation())
				.setTaskManagerGateway(slot.getTaskManagerGateway())
				.setSlotNumber(slot.getPhysicalSlotNumber())
				.setAllocationId(slot.getAllocationId())
				.setSlotRequestId(slotRequestId)
				.setSlotSharingGroupId(task.getSlotSharingGroupId())
				.setSlotOwner(this)
				.createTestingLogicalSlot();
			allocatedSlots.put(slotRequestId, slot);
			return CompletableFuture.completedFuture(result);
		} else {
			return FutureUtils.completedExceptionally(new NoResourceAvailableException());
		}
	}
}
 
Example #11
Source File: DefaultScheduler.java    From flink with Apache License 2.0 5 votes vote down vote up
private static Throwable maybeWrapWithNoResourceAvailableException(final Throwable failure) {
	final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(failure);
	if (strippedThrowable instanceof TimeoutException) {
		return new NoResourceAvailableException("Could not allocate the required slot within slot request timeout. " +
			"Please make sure that the cluster has enough resources.", failure);
	} else {
		return failure;
	}
}
 
Example #12
Source File: ThrowableClassifierTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testThrowableType_NonRecoverable() {
	assertEquals(ThrowableType.NonRecoverableError,
		ThrowableClassifier.getThrowableType(new SuppressRestartsException(new Exception(""))));

	assertEquals(ThrowableType.NonRecoverableError,
		ThrowableClassifier.getThrowableType(new NoResourceAvailableException()));
}
 
Example #13
Source File: SimpleSlotProvider.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<LogicalSlot> allocateSlot(
		SlotRequestId slotRequestId,
		ScheduledUnit task,
		SlotProfile slotProfile,
		boolean allowQueued,
		Time allocationTimeout) {
	final SlotContext slot;

	synchronized (lock) {
		if (slots.isEmpty()) {
			slot = null;
		} else {
			slot = slots.removeFirst();
		}
		if (slot != null) {
			TestingLogicalSlot result = new TestingLogicalSlotBuilder()
				.setTaskManagerLocation(slot.getTaskManagerLocation())
				.setTaskManagerGateway(slot.getTaskManagerGateway())
				.setSlotNumber(slot.getPhysicalSlotNumber())
				.setAllocationId(slot.getAllocationId())
				.setSlotRequestId(slotRequestId)
				.setSlotOwner(this)
				.createTestingLogicalSlot();
			allocatedSlots.put(slotRequestId, slot);
			return CompletableFuture.completedFuture(result);
		}
		else {
			return FutureUtils.completedExceptionally(new NoResourceAvailableException());
		}
	}
}
 
Example #14
Source File: MiniClusterITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testHandleStreamingJobsWhenNotEnoughSlot() throws Exception {
	try {
		setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.EAGER);
		fail("Job should fail.");
	} catch (JobExecutionException e) {
		assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent());
		assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent());
		assertTrue(findThrowableWithMessage(e, "Slots required: 2, slots allocated: 1").isPresent());
	}
}
 
Example #15
Source File: ThrowableClassifierTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testThrowableType_NonRecoverable() {
	assertEquals(ThrowableType.NonRecoverableError,
		ThrowableClassifier.getThrowableType(new SuppressRestartsException(new Exception(""))));

	assertEquals(ThrowableType.NonRecoverableError,
		ThrowableClassifier.getThrowableType(new NoResourceAvailableException()));
}
 
Example #16
Source File: MiniClusterITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testHandleBatchJobsWhenNotEnoughSlot() throws Exception {
	try {
		setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.LAZY_FROM_SOURCES);
		fail("Job should fail.");
	} catch (JobExecutionException e) {
		assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent());
		assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent());
		assertTrue(findThrowableWithMessage(e, "Could not allocate enough slots").isPresent());
	}
}
 
Example #17
Source File: MiniClusterITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testHandleStreamingJobsWhenNotEnoughSlot() throws Exception {
	try {
		setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.EAGER);
		fail("Job should fail.");
	} catch (JobExecutionException e) {
		assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent());
		assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent());
		assertTrue(findThrowableWithMessage(e, "Slots required: 2, slots allocated: 1").isPresent());
	}
}
 
Example #18
Source File: SlotPoolImpl.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void slotRequestToResourceManagerFailed(SlotRequestId slotRequestID, Throwable failure) {
	PendingRequest request = pendingRequests.removeKeyA(slotRequestID);
	if (request != null) {
		request.getAllocatedSlotFuture().completeExceptionally(new NoResourceAvailableException(
				"No pooled slot available and request to ResourceManager for new slot failed", failure));
	} else {
		if (log.isDebugEnabled()) {
			log.debug("Unregistered slot request [{}] failed.", slotRequestID, failure);
		}
	}
}
 
Example #19
Source File: ExecutionGraphRestartTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testRestartWithSlotSharingAndNotEnoughResources() throws Exception {
	// this test is inconclusive if not used with a proper multi-threaded executor
	assertTrue("test assumptions violated", ((ThreadPoolExecutor) executor).getCorePoolSize() > 1);

	final int numRestarts = 10;
	final int parallelism = 20;

	try (SlotPool slotPool = createSlotPoolImpl()) {
		final Scheduler scheduler = createSchedulerWithSlots(
			parallelism - 1, slotPool, new LocalTaskManagerLocation());

		final SlotSharingGroup sharingGroup = new SlotSharingGroup();

		final JobVertex source = new JobVertex("source");
		source.setInvokableClass(NoOpInvokable.class);
		source.setParallelism(parallelism);
		source.setSlotSharingGroup(sharingGroup);

		final JobVertex sink = new JobVertex("sink");
		sink.setInvokableClass(NoOpInvokable.class);
		sink.setParallelism(parallelism);
		sink.setSlotSharingGroup(sharingGroup);
		sink.connectNewDataSetAsInput(source, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED_BOUNDED);

		TestRestartStrategy restartStrategy =
			new TestRestartStrategy(numRestarts, false);

		final ExecutionGraph eg = new ExecutionGraphTestUtils.TestingExecutionGraphBuilder(TEST_JOB_ID, source, sink)
			.setSlotProvider(scheduler)
			.setRestartStrategy(restartStrategy)
			.setIoExecutor(executor)
			.setFutureExecutor(executor)
			.setScheduleMode(ScheduleMode.EAGER)
			.build();

		eg.start(mainThreadExecutor);
		eg.scheduleForExecution();

		// wait until no more changes happen
		while (eg.getNumberOfFullRestarts() < numRestarts) {
			Thread.sleep(1);
		}

		assertEquals(JobStatus.FAILED, eg.getState());

		final Throwable t = eg.getFailureCause();
		if (!(t instanceof NoResourceAvailableException)) {
			ExceptionUtils.rethrowException(t, t.getMessage());
		}
	}
}
 
Example #20
Source File: SchedulerImpl.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private CompletableFuture<LogicalSlot> allocateSharedSlot(
	SlotRequestId slotRequestId,
	ScheduledUnit scheduledUnit,
	SlotProfile slotProfile,
	boolean allowQueuedScheduling,
	Time allocationTimeout) {
	// allocate slot with slot sharing
	final SlotSharingManager multiTaskSlotManager = slotSharingManagers.computeIfAbsent(
		scheduledUnit.getSlotSharingGroupId(),
		id -> new SlotSharingManager(
			id,
			slotPool,
			this));

	final SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality;
	try {
		if (scheduledUnit.getCoLocationConstraint() != null) {
			multiTaskSlotLocality = allocateCoLocatedMultiTaskSlot(
				scheduledUnit.getCoLocationConstraint(),
				multiTaskSlotManager,
				slotProfile,
				allowQueuedScheduling,
				allocationTimeout);
		} else {
			multiTaskSlotLocality = allocateMultiTaskSlot(
				scheduledUnit.getJobVertexId(),
				multiTaskSlotManager,
				slotProfile,
				allowQueuedScheduling,
				allocationTimeout);
		}
	} catch (NoResourceAvailableException noResourceException) {
		return FutureUtils.completedExceptionally(noResourceException);
	}

	// sanity check
	Preconditions.checkState(!multiTaskSlotLocality.getMultiTaskSlot().contains(scheduledUnit.getJobVertexId()));

	final SlotSharingManager.SingleTaskSlot leaf = multiTaskSlotLocality.getMultiTaskSlot().allocateSingleTaskSlot(
		slotRequestId,
		scheduledUnit.getJobVertexId(),
		multiTaskSlotLocality.getLocality());
	return leaf.getLogicalSlotFuture();
}
 
Example #21
Source File: SchedulerImpl.java    From flink with Apache License 2.0 4 votes vote down vote up
private CompletableFuture<LogicalSlot> allocateSharedSlot(
	SlotRequestId slotRequestId,
	ScheduledUnit scheduledUnit,
	SlotProfile slotProfile,
	@Nullable Time allocationTimeout) {
	// allocate slot with slot sharing
	final SlotSharingManager multiTaskSlotManager = slotSharingManagers.computeIfAbsent(
		scheduledUnit.getSlotSharingGroupId(),
		id -> new SlotSharingManager(
			id,
			slotPool,
			this));

	final SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality;
	try {
		if (scheduledUnit.getCoLocationConstraint() != null) {
			multiTaskSlotLocality = allocateCoLocatedMultiTaskSlot(
				scheduledUnit.getCoLocationConstraint(),
				multiTaskSlotManager,
				slotProfile,
				allocationTimeout);
		} else {
			multiTaskSlotLocality = allocateMultiTaskSlot(
				scheduledUnit.getJobVertexId(),
				multiTaskSlotManager,
				slotProfile,
				allocationTimeout);
		}
	} catch (NoResourceAvailableException noResourceException) {
		return FutureUtils.completedExceptionally(noResourceException);
	}

	// sanity check
	Preconditions.checkState(!multiTaskSlotLocality.getMultiTaskSlot().contains(scheduledUnit.getJobVertexId()));

	final SlotSharingManager.SingleTaskSlot leaf = multiTaskSlotLocality.getMultiTaskSlot().allocateSingleTaskSlot(
		slotRequestId,
		slotProfile.getTaskResourceProfile(),
		scheduledUnit.getJobVertexId(),
		multiTaskSlotLocality.getLocality());
	return leaf.getLogicalSlotFuture();
}
 
Example #22
Source File: SchedulerImpl.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Allocates a co-located {@link SlotSharingManager.MultiTaskSlot} for the given {@link CoLocationConstraint}.
 *
 * <p>The returned {@link SlotSharingManager.MultiTaskSlot} can be uncompleted.
 *
 * @param coLocationConstraint for which to allocate a {@link SlotSharingManager.MultiTaskSlot}
 * @param multiTaskSlotManager responsible for the slot sharing group for which to allocate the slot
 * @param slotProfile specifying the requirements for the requested slot
 * @param allocationTimeout timeout before the slot allocation times out
 * @return A {@link SlotAndLocality} which contains the allocated{@link SlotSharingManager.MultiTaskSlot}
 * 		and its locality wrt the given location preferences
 */
private SlotSharingManager.MultiTaskSlotLocality allocateCoLocatedMultiTaskSlot(
	CoLocationConstraint coLocationConstraint,
	SlotSharingManager multiTaskSlotManager,
	SlotProfile slotProfile,
	@Nullable Time allocationTimeout) throws NoResourceAvailableException {
	final SlotRequestId coLocationSlotRequestId = coLocationConstraint.getSlotRequestId();

	if (coLocationSlotRequestId != null) {
		// we have a slot assigned --> try to retrieve it
		final SlotSharingManager.TaskSlot taskSlot = multiTaskSlotManager.getTaskSlot(coLocationSlotRequestId);

		if (taskSlot != null) {
			Preconditions.checkState(taskSlot instanceof SlotSharingManager.MultiTaskSlot);

			SlotSharingManager.MultiTaskSlot multiTaskSlot = (SlotSharingManager.MultiTaskSlot) taskSlot;

			if (multiTaskSlot.mayHaveEnoughResourcesToFulfill(slotProfile.getTaskResourceProfile())) {
				return SlotSharingManager.MultiTaskSlotLocality.of(multiTaskSlot, Locality.LOCAL);
			}

			throw new NoResourceAvailableException("Not enough resources in the slot for all co-located tasks.");
		} else {
			// the slot may have been cancelled in the mean time
			coLocationConstraint.setSlotRequestId(null);
		}
	}

	if (coLocationConstraint.isAssigned()) {
		// refine the preferred locations of the slot profile
		slotProfile = SlotProfile.priorAllocation(
			slotProfile.getTaskResourceProfile(),
			slotProfile.getPhysicalSlotResourceProfile(),
			Collections.singleton(coLocationConstraint.getLocation()),
			slotProfile.getPreferredAllocations(),
			slotProfile.getPreviousExecutionGraphAllocations());
	}

	// get a new multi task slot
	SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality = allocateMultiTaskSlot(
		coLocationConstraint.getGroupId(),
		multiTaskSlotManager,
		slotProfile,
		allocationTimeout);

	// check whether we fulfill the co-location constraint
	if (coLocationConstraint.isAssigned() && multiTaskSlotLocality.getLocality() != Locality.LOCAL) {
		multiTaskSlotLocality.getMultiTaskSlot().release(
			new FlinkException("Multi task slot is not local and, thus, does not fulfill the co-location constraint."));

		throw new NoResourceAvailableException("Could not allocate a local multi task slot for the " +
			"co location constraint " + coLocationConstraint + '.');
	}

	final SlotRequestId slotRequestId = new SlotRequestId();
	final SlotSharingManager.MultiTaskSlot coLocationSlot =
		multiTaskSlotLocality.getMultiTaskSlot().allocateMultiTaskSlot(
			slotRequestId,
			coLocationConstraint.getGroupId());

	// mark the requested slot as co-located slot for other co-located tasks
	coLocationConstraint.setSlotRequestId(slotRequestId);

	// lock the co-location constraint once we have obtained the allocated slot
	coLocationSlot.getSlotContextFuture().whenComplete(
		(SlotContext slotContext, Throwable throwable) -> {
			if (throwable == null) {
				// check whether we are still assigned to the co-location constraint
				if (Objects.equals(coLocationConstraint.getSlotRequestId(), slotRequestId)) {
					coLocationConstraint.lockLocation(slotContext.getTaskManagerLocation());
				} else {
					log.debug("Failed to lock colocation constraint {} because assigned slot " +
							"request {} differs from fulfilled slot request {}.",
						coLocationConstraint.getGroupId(),
						coLocationConstraint.getSlotRequestId(),
						slotRequestId);
				}
			} else {
				log.debug("Failed to lock colocation constraint {} because the slot " +
						"allocation for slot request {} failed.",
					coLocationConstraint.getGroupId(),
					coLocationConstraint.getSlotRequestId(),
					throwable);
			}
		});

	return SlotSharingManager.MultiTaskSlotLocality.of(coLocationSlot, multiTaskSlotLocality.getLocality());
}
 
Example #23
Source File: SchedulerImpl.java    From flink with Apache License 2.0 4 votes vote down vote up
private CompletableFuture<LogicalSlot> allocateSharedSlot(
	SlotRequestId slotRequestId,
	ScheduledUnit scheduledUnit,
	SlotProfile slotProfile,
	boolean allowQueuedScheduling,
	@Nullable Time allocationTimeout) {
	// allocate slot with slot sharing
	final SlotSharingManager multiTaskSlotManager = slotSharingManagers.computeIfAbsent(
		scheduledUnit.getSlotSharingGroupId(),
		id -> new SlotSharingManager(
			id,
			slotPool,
			this));

	final SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality;
	try {
		if (scheduledUnit.getCoLocationConstraint() != null) {
			multiTaskSlotLocality = allocateCoLocatedMultiTaskSlot(
				scheduledUnit.getCoLocationConstraint(),
				multiTaskSlotManager,
				slotProfile,
				allowQueuedScheduling,
				allocationTimeout);
		} else {
			multiTaskSlotLocality = allocateMultiTaskSlot(
				scheduledUnit.getJobVertexId(),
				multiTaskSlotManager,
				slotProfile,
				allowQueuedScheduling,
				allocationTimeout);
		}
	} catch (NoResourceAvailableException noResourceException) {
		return FutureUtils.completedExceptionally(noResourceException);
	}

	// sanity check
	Preconditions.checkState(!multiTaskSlotLocality.getMultiTaskSlot().contains(scheduledUnit.getJobVertexId()));

	final SlotSharingManager.SingleTaskSlot leaf = multiTaskSlotLocality.getMultiTaskSlot().allocateSingleTaskSlot(
		slotRequestId,
		slotProfile.getResourceProfile(),
		scheduledUnit.getJobVertexId(),
		multiTaskSlotLocality.getLocality());
	return leaf.getLogicalSlotFuture();
}
 
Example #24
Source File: ExecutionGraphRestartTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testRestartWithSlotSharingAndNotEnoughResources() throws Exception {
	// this test is inconclusive if not used with a proper multi-threaded executor
	assertTrue("test assumptions violated", ((ThreadPoolExecutor) executor).getCorePoolSize() > 1);

	final int numRestarts = 10;
	final int parallelism = 20;

	TaskManagerGateway taskManagerGateway = new SimpleAckingTaskManagerGateway();
	final Scheduler scheduler = createSchedulerWithInstances(parallelism - 1, taskManagerGateway);

	final SlotSharingGroup sharingGroup = new SlotSharingGroup();

	final JobVertex source = new JobVertex("source");
	source.setInvokableClass(NoOpInvokable.class);
	source.setParallelism(parallelism);
	source.setSlotSharingGroup(sharingGroup);

	final JobVertex sink = new JobVertex("sink");
	sink.setInvokableClass(NoOpInvokable.class);
	sink.setParallelism(parallelism);
	sink.setSlotSharingGroup(sharingGroup);
	sink.connectNewDataSetAsInput(source, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED_BOUNDED);

	TestRestartStrategy restartStrategy =
		new TestRestartStrategy(numRestarts, false);


	final ExecutionGraph eg = ExecutionGraphTestUtils.createExecutionGraph(
		new JobID(), scheduler, restartStrategy, executor, source, sink);

	eg.start(mainThreadExecutor);
	eg.setScheduleMode(ScheduleMode.EAGER);
	eg.scheduleForExecution();

	// wait until no more changes happen
	while (eg.getNumberOfFullRestarts() < numRestarts) {
		Thread.sleep(1);
	}

	assertEquals(JobStatus.FAILED, eg.getState());

	final Throwable t = eg.getFailureCause();
	if (!(t instanceof NoResourceAvailableException)) {
		ExceptionUtils.rethrowException(t, t.getMessage());
	}
}
 
Example #25
Source File: ExecutionGraphNotEnoughResourceTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testRestartWithSlotSharingAndNotEnoughResources() throws Exception {
	final int numRestarts = 10;
	final int parallelism = 20;

	SlotPool slotPool = null;
	try {
		slotPool = new TestingSlotPoolImpl(TEST_JOB_ID);
		final Scheduler scheduler = createSchedulerWithSlots(
			parallelism - 1, slotPool, new LocalTaskManagerLocation());

		final SlotSharingGroup sharingGroup = new SlotSharingGroup();

		final JobVertex source = new JobVertex("source");
		source.setInvokableClass(NoOpInvokable.class);
		source.setParallelism(parallelism);
		source.setSlotSharingGroup(sharingGroup);

		final JobVertex sink = new JobVertex("sink");
		sink.setInvokableClass(NoOpInvokable.class);
		sink.setParallelism(parallelism);
		sink.setSlotSharingGroup(sharingGroup);
		sink.connectNewDataSetAsInput(source, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED_BOUNDED);

		final JobGraph jobGraph = new JobGraph(TEST_JOB_ID, "Test Job", source, sink);
		jobGraph.setScheduleMode(ScheduleMode.EAGER);

		TestRestartStrategy restartStrategy = new TestRestartStrategy(numRestarts, false);

		final ExecutionGraph eg = TestingExecutionGraphBuilder
			.newBuilder()
			.setJobGraph(jobGraph)
			.setSlotProvider(scheduler)
			.setRestartStrategy(restartStrategy)
			.setAllocationTimeout(Time.milliseconds(1L))
			.build();

		eg.start(mainThreadExecutor);

		mainThreadExecutor.execute(ThrowingRunnable.unchecked(eg::scheduleForExecution));

		CommonTestUtils.waitUntilCondition(
			() -> CompletableFuture.supplyAsync(eg::getState, mainThreadExecutor).join() == JobStatus.FAILED,
			Deadline.fromNow(Duration.ofMillis(2000)));

		// the last suppressed restart is also counted
		assertEquals(numRestarts + 1, CompletableFuture.supplyAsync(eg::getNumberOfRestarts, mainThreadExecutor).join().longValue());

		final Throwable t = CompletableFuture.supplyAsync(eg::getFailureCause, mainThreadExecutor).join();
		if (!(t instanceof NoResourceAvailableException)) {
			ExceptionUtils.rethrowException(t, t.getMessage());
		}
	} finally {
		if (slotPool != null) {
			CompletableFuture.runAsync(slotPool::close, mainThreadExecutor).join();
		}
	}
}