org.apache.flink.runtime.clusterframework.types.ResourceID Java Examples

The following examples show how to use org.apache.flink.runtime.clusterframework.types.ResourceID. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExecutionVertexLocalityTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This test validates that vertices with too many input streams do not have a location
 * preference any more.
 */
@Test
public void testNoLocalityInputLargeAllToAll() throws Exception {
	final int parallelism = 100;

	final ExecutionGraph graph = createTestGraph(parallelism, true);

	// set the location for all sources to a distinct location
	for (int i = 0; i < parallelism; i++) {
		ExecutionVertex source = graph.getAllVertices().get(sourceVertexId).getTaskVertices()[i];
		TaskManagerLocation location = new TaskManagerLocation(
				ResourceID.generate(), InetAddress.getLoopbackAddress(), 10000 + i);
		initializeLocation(source, location);
	}

	// validate that the target vertices have no location preference
	for (int i = 0; i < parallelism; i++) {
		ExecutionVertex target = graph.getAllVertices().get(targetVertexId).getTaskVertices()[i];

		Iterator<CompletableFuture<TaskManagerLocation>> preference = target.getPreferredLocations().iterator();
		assertFalse(preference.hasNext());
	}
}
 
Example #2
Source File: YarnResourceManagerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Create mock RM dependencies.
 */
Context() throws Exception {
	rpcService = new TestingRpcService();
	rmServices = new MockResourceManagerRuntimeServices();

	// resource manager
	rmResourceID = ResourceID.generate();
	resourceManager =
			new TestingYarnResourceManager(
					rpcService,
					RM_ADDRESS,
					rmResourceID,
					flinkConfig,
					env,
					rmServices.highAvailabilityServices,
					rmServices.heartbeatServices,
					rmServices.slotManager,
					rmServices.metricRegistry,
					rmServices.jobLeaderIdService,
					new ClusterInformation("localhost", 1234),
					testingFatalErrorHandler,
					null,
					mockResourceManagerClient,
					mockNMClient,
					mockJMMetricGroup);
}
 
Example #3
Source File: TaskManagerInfo.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@JsonCreator
public TaskManagerInfo(
		@JsonDeserialize(using = ResourceIDDeserializer.class) @JsonProperty(FIELD_NAME_RESOURCE_ID) ResourceID resourceId,
		@JsonProperty(FIELD_NAME_ADDRESS) String address,
		@JsonProperty(FIELD_NAME_DATA_PORT) int dataPort,
		@JsonProperty(FIELD_NAME_LAST_HEARTBEAT) long lastHeartbeat,
		@JsonProperty(FIELD_NAME_NUMBER_SLOTS) int numberSlots,
		@JsonProperty(FIELD_NAME_NUMBER_AVAILABLE_SLOTS) int numberAvailableSlots,
		@JsonProperty(FIELD_NAME_HARDWARE) HardwareDescription hardwareDescription) {
	this.resourceId = Preconditions.checkNotNull(resourceId);
	this.address = Preconditions.checkNotNull(address);
	this.dataPort = dataPort;
	this.lastHeartbeat = lastHeartbeat;
	this.numberSlots = numberSlots;
	this.numberAvailableSlots = numberAvailableSlots;
	this.hardwareDescription = Preconditions.checkNotNull(hardwareDescription);
}
 
Example #4
Source File: HeartbeatManagerSenderImpl.java    From flink with Apache License 2.0 6 votes vote down vote up
HeartbeatManagerSenderImpl(
		long heartbeatPeriod,
		long heartbeatTimeout,
		ResourceID ownResourceID,
		HeartbeatListener<I, O> heartbeatListener,
		ScheduledExecutor mainThreadExecutor,
		Logger log) {
	this(
		heartbeatPeriod,
		heartbeatTimeout,
		ownResourceID,
		heartbeatListener,
		mainThreadExecutor,
		log,
		new HeartbeatMonitorImpl.Factory<>());
}
 
Example #5
Source File: ResourceManager.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<Collection<TaskManagerInfo>> requestTaskManagerInfo(Time timeout) {

	final ArrayList<TaskManagerInfo> taskManagerInfos = new ArrayList<>(taskExecutors.size());

	for (Map.Entry<ResourceID, WorkerRegistration<WorkerType>> taskExecutorEntry : taskExecutors.entrySet()) {
		final ResourceID resourceId = taskExecutorEntry.getKey();
		final WorkerRegistration<WorkerType> taskExecutor = taskExecutorEntry.getValue();

		taskManagerInfos.add(
			new TaskManagerInfo(
				resourceId,
				taskExecutor.getTaskExecutorGateway().getAddress(),
				taskExecutor.getDataPort(),
				taskManagerHeartbeatManager.getLastHeartbeatFrom(resourceId),
				slotManager.getNumberRegisteredSlotsOf(taskExecutor.getInstanceID()),
				slotManager.getNumberFreeSlotsOf(taskExecutor.getInstanceID()),
				slotManager.getRegisteredResourceOf(taskExecutor.getInstanceID()),
				slotManager.getFreeResourceOf(taskExecutor.getInstanceID()),
				taskExecutor.getHardwareDescription()));
	}

	return CompletableFuture.completedFuture(taskManagerInfos);
}
 
Example #6
Source File: ResourceManagerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testHeartbeatTimeoutWithTaskExecutor() throws Exception {
	final ResourceID taskExecutorId = ResourceID.generate();
	final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
	final CompletableFuture<Exception> disconnectFuture = new CompletableFuture<>();
	final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder()
		.setDisconnectResourceManagerConsumer(disconnectFuture::complete)
		.setHeartbeatResourceManagerConsumer(heartbeatRequestFuture::complete)
		.createTestingTaskExecutorGateway();
	rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);

	runHeartbeatTimeoutTest(
		resourceManagerGateway -> {
			registerTaskExecutor(resourceManagerGateway, taskExecutorId, taskExecutorGateway.getAddress());
		},
		resourceManagerResourceId -> {
			// might have been completed or not depending whether the timeout was triggered first
			final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
			assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
			assertThat(disconnectFuture.get(), instanceOf(TimeoutException.class));
		}
	);
}
 
Example #7
Source File: YarnResourceManager.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void onContainersCompleted(final List<ContainerStatus> statuses) {
	runAsync(() -> {
			log.debug("YARN ResourceManager reported the following containers completed: {}.", statuses);
			for (final ContainerStatus containerStatus : statuses) {

				final ResourceID resourceId = new ResourceID(containerStatus.getContainerId().toString());
				final YarnWorkerNode yarnWorkerNode = workerNodeMap.remove(resourceId);

				if (yarnWorkerNode != null) {
					// Container completed unexpectedly ~> start a new one
					requestYarnContainerIfRequired();
				}
				// Eagerly close the connection with task manager.
				closeTaskManagerConnection(resourceId, new Exception(containerStatus.getDiagnostics()));
			}
		}
	);
}
 
Example #8
Source File: SlotSharingGroupAssignment.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Gets the number of shared slots into which the given group can place subtasks or 
 * nested task groups.
 * 
 * @param groupId The ID of the group.
 * @return The number of shared slots available to the given job vertex.
 */
public int getNumberOfAvailableSlotsForGroup(AbstractID groupId) {
	synchronized (lock) {
		Map<ResourceID, List<SharedSlot>> available = availableSlotsPerJid.get(groupId);

		if (available != null) {
			Set<SharedSlot> set = new HashSet<SharedSlot>();

			for (List<SharedSlot> list : available.values()) {
				for (SharedSlot slot : list) {
					set.add(slot);
				}
			}

			return set.size();
		}
		else {
			// if no entry exists for a JobVertexID so far, then the vertex with that ID can
			// add a subtask into each shared slot of this group. Consequently, all
			// of them are available for that JobVertexID.
			return allSlots.size();
		}
	}
}
 
Example #9
Source File: TaskExecutor.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void notifyHeartbeatTimeout(final ResourceID resourceID) {
	validateRunsInMainThread();
	log.info("The heartbeat of JobManager with id {} timed out.", resourceID);

	if (jobManagerConnections.containsKey(resourceID)) {
		JobManagerConnection jobManagerConnection = jobManagerConnections.get(resourceID);

		if (jobManagerConnection != null) {
			closeJobManagerConnection(
				jobManagerConnection.getJobID(),
				new TimeoutException("The heartbeat of JobManager with id " + resourceID + " timed out."));

			jobLeaderService.reconnect(jobManagerConnection.getJobID());
		}
	}
}
 
Example #10
Source File: StandaloneResourceManagerFactoryTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void createResourceManager_WithLessMemoryThanContainerizedHeapCutoffMin_ShouldSucceed() throws Exception {
	final StandaloneResourceManagerFactory resourceManagerFactory = StandaloneResourceManagerFactory.INSTANCE;

	final TestingRpcService rpcService = new TestingRpcService();
	try {
		final Configuration configuration = new Configuration();
		configuration.setString(TaskManagerOptions.TASK_MANAGER_HEAP_MEMORY, new MemorySize(128 * 1024 * 1024).toString());
		configuration.setInteger(ResourceManagerOptions.CONTAINERIZED_HEAP_CUTOFF_MIN, 600);

		final ResourceManager<ResourceID> ignored = resourceManagerFactory.createResourceManager(
			configuration,
			ResourceID.generate(),
			rpcService,
			new TestingHighAvailabilityServices(),
			new TestingHeartbeatServices(),
			NoOpMetricRegistry.INSTANCE,
			new TestingFatalErrorHandler(),
			new ClusterInformation("foobar", 1234),
			null,
			UnregisteredMetricGroups.createUnregisteredJobManagerMetricGroup());
	} finally {
		RpcUtils.terminateRpcService(rpcService, Time.seconds(10L));
	}
}
 
Example #11
Source File: KubernetesResourceManagerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testTaskManagerPodErrorAfterRegistration() throws Exception {
	new Context() {{
		runTest(() -> {
			registerSlotRequest();
			final Pod pod = kubeClient.pods().list().getItems().get(0);
			resourceManager.onAdded(Collections.singletonList(new KubernetesPod(pod)));
			registerTaskExecutor(new ResourceID(pod.getMetadata().getName()));

			// Error happens in the pod. Should not request a new pod.
			terminatePod(pod);
			resourceManager.onError(Collections.singletonList(new KubernetesPod(pod)));
			assertEquals(0, kubeClient.pods().list().getItems().size());
		});
	}};
}
 
Example #12
Source File: HeartbeatManagerSenderImpl.java    From flink with Apache License 2.0 6 votes vote down vote up
HeartbeatManagerSenderImpl(
		long heartbeatPeriod,
		long heartbeatTimeout,
		ResourceID ownResourceID,
		HeartbeatListener<I, O> heartbeatListener,
		ScheduledExecutor mainThreadExecutor,
		Logger log,
		HeartbeatMonitor.Factory<O> heartbeatMonitorFactory) {
	super(
		heartbeatTimeout,
		ownResourceID,
		heartbeatListener,
		mainThreadExecutor,
		log,
		heartbeatMonitorFactory);

	this.heartbeatPeriod = heartbeatPeriod;
	mainThreadExecutor.schedule(this, 0L, TimeUnit.MILLISECONDS);
}
 
Example #13
Source File: MetricRegistryImplTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testMetricQueryServiceSetup() throws Exception {
	MetricRegistryImpl metricRegistry = new MetricRegistryImpl(MetricRegistryConfiguration.defaultMetricRegistryConfiguration());

	Assert.assertNull(metricRegistry.getMetricQueryServiceGatewayRpcAddress());

	metricRegistry.startQueryService(new TestingRpcService(), new ResourceID("mqs"));

	MetricQueryServiceGateway metricQueryServiceGateway = metricRegistry.getMetricQueryServiceGateway();
	Assert.assertNotNull(metricQueryServiceGateway);

	metricRegistry.register(new SimpleCounter(), "counter", UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup());

	boolean metricsSuccessfullyQueried = false;
	for (int x = 0; x < 10; x++) {
		MetricDumpSerialization.MetricSerializationResult metricSerializationResult = metricQueryServiceGateway.queryMetrics(Time.seconds(5))
			.get(5, TimeUnit.SECONDS);

		if (metricSerializationResult.numCounters == 1) {
			metricsSuccessfullyQueried = true;
		} else {
			Thread.sleep(50);
		}
	}
	Assert.assertTrue("metrics query did not return expected result", metricsSuccessfullyQueried);
}
 
Example #14
Source File: NettyShuffleEnvironment.java    From flink with Apache License 2.0 6 votes vote down vote up
NettyShuffleEnvironment(
		ResourceID taskExecutorResourceId,
		NettyShuffleEnvironmentConfiguration config,
		NetworkBufferPool networkBufferPool,
		ConnectionManager connectionManager,
		ResultPartitionManager resultPartitionManager,
		FileChannelManager fileChannelManager,
		ResultPartitionFactory resultPartitionFactory,
		SingleInputGateFactory singleInputGateFactory,
		Executor ioExecutor) {
	this.taskExecutorResourceId = taskExecutorResourceId;
	this.config = config;
	this.networkBufferPool = networkBufferPool;
	this.connectionManager = connectionManager;
	this.resultPartitionManager = resultPartitionManager;
	this.inputGatesById = new ConcurrentHashMap<>(10);
	this.fileChannelManager = fileChannelManager;
	this.resultPartitionFactory = resultPartitionFactory;
	this.singleInputGateFactory = singleInputGateFactory;
	this.ioExecutor = ioExecutor;
	this.isClosed = false;
}
 
Example #15
Source File: JobMaster.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> disconnectTaskManager(final ResourceID resourceID, final Exception cause) {
	log.debug("Disconnect TaskExecutor {} because: {}", resourceID, cause.getMessage());

	taskManagerHeartbeatManager.unmonitorTarget(resourceID);
	slotPool.releaseTaskManager(resourceID, cause);

	Tuple2<TaskManagerLocation, TaskExecutorGateway> taskManagerConnection = registeredTaskManagers.remove(resourceID);

	if (taskManagerConnection != null) {
		taskManagerConnection.f1.disconnectJobManager(jobGraph.getJobID(), cause);
	}

	return CompletableFuture.completedFuture(Acknowledge.get());
}
 
Example #16
Source File: ResourceManagerPartitionTrackerImpl.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public Map<IntermediateDataSetID, DataSetMetaInfo> listDataSets() {
	return dataSetMetaInfo.entrySet().stream()
		.collect(Collectors.toMap(Map.Entry::getKey, entry -> {
			final Map<ResourceID, Set<ResultPartitionID>> taskExecutorToPartitions = dataSetToTaskExecutors.get(entry.getKey());
			Preconditions.checkState(taskExecutorToPartitions != null, "Have metadata entry for dataset %s, but no partition is tracked.", entry.getKey());

			int numTrackedPartitions = 0;
			for (Set<ResultPartitionID> hostedPartitions : taskExecutorToPartitions.values()) {
				numTrackedPartitions += hostedPartitions.size();
			}

			return DataSetMetaInfo.withNumRegisteredPartitions(numTrackedPartitions, entry.getValue().getNumTotalPartitions());
		}));
}
 
Example #17
Source File: TestingResourceManagerGateway.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<TransientBlobKey> requestTaskManagerFileUpload(ResourceID taskManagerId, FileType fileType, Time timeout) {
	final Function<Tuple2<ResourceID, FileType>, CompletableFuture<TransientBlobKey>> function = requestTaskManagerFileUploadFunction;

	if (function != null) {
		return function.apply(Tuple2.of(taskManagerId, fileType));
	} else {
		return CompletableFuture.completedFuture(new TransientBlobKey());
	}
}
 
Example #18
Source File: InstanceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testInstanceDies() {
	try {
		ResourceID resourceID = ResourceID.generate();
		HardwareDescription hardwareDescription = new HardwareDescription(4, 2L*1024*1024*1024, 1024*1024*1024, 512*1024*1024);
		InetAddress address = InetAddress.getByName("127.0.0.1");
		TaskManagerLocation connection = new TaskManagerLocation(resourceID, address, 10001);

		Instance instance = new Instance(
			new ActorTaskManagerGateway(DummyActorGateway.INSTANCE),
			connection,
			new InstanceID(),
			hardwareDescription,
			3);

		assertEquals(3, instance.getNumberOfAvailableSlots());

		SimpleSlot slot1 = instance.allocateSimpleSlot();
		SimpleSlot slot2 = instance.allocateSimpleSlot();
		SimpleSlot slot3 = instance.allocateSimpleSlot();

		instance.markDead();

		assertEquals(0, instance.getNumberOfAllocatedSlots());
		assertEquals(0, instance.getNumberOfAvailableSlots());

		assertTrue(slot1.isCanceled());
		assertTrue(slot2.isCanceled());
		assertTrue(slot3.isCanceled());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #19
Source File: ResourceManagerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void registerTaskExecutor(ResourceManagerGateway resourceManagerGateway, ResourceID taskExecutorId, String taskExecutorAddress) throws Exception {
	final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerTaskExecutor(
		taskExecutorAddress,
		taskExecutorId,
		dataPort,
		hardwareDescription,
		TestingUtils.TIMEOUT());

	assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
}
 
Example #20
Source File: JobMasterPartitionTrackerImpl.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void stopTrackingAndReleaseOrPromotePartitionsFor(ResourceID producingTaskExecutorId) {
	Preconditions.checkNotNull(producingTaskExecutorId);

	Collection<ResultPartitionDeploymentDescriptor> resultPartitionIds =
		CollectionUtil.project(stopTrackingPartitionsFor(producingTaskExecutorId), PartitionTrackerEntry::getMetaInfo);

	internalReleaseOrPromotePartitions(producingTaskExecutorId, resultPartitionIds);
}
 
Example #21
Source File: DefaultJobTableTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void connectJob_AfterBeingClosed_WillFail() {
	final JobTable.Job job = jobTable.getOrCreateJob(jobId, DEFAULT_JOB_SERVICES_SUPPLIER);

	job.close();

	connectJob(job, ResourceID.generate());
}
 
Example #22
Source File: SimpleSlotTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static SimpleSlot getSlot() throws Exception {
	ResourceID resourceID = ResourceID.generate();
	HardwareDescription hardwareDescription = new HardwareDescription(4, 2L*1024*1024*1024, 1024*1024*1024, 512*1024*1024);
	InetAddress address = InetAddress.getByName("127.0.0.1");
	TaskManagerLocation connection = new TaskManagerLocation(resourceID, address, 10001);

	Instance instance = new Instance(
		new ActorTaskManagerGateway(DummyActorGateway.INSTANCE),
		connection,
		new InstanceID(),
		hardwareDescription,
		1);

	return instance.allocateSimpleSlot();
}
 
Example #23
Source File: SlotManagerImplTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that a task manager timeout does not remove the slots from the SlotManager.
 * A timeout should only trigger the {@link ResourceActions#releaseResource(InstanceID, Exception)}
 * callback. The receiver of the callback can then decide what to do with the TaskManager.
 *
 * <p>See FLINK-7793
 */
@Test
public void testTaskManagerTimeoutDoesNotRemoveSlots() throws Exception {
	final Time taskManagerTimeout = Time.milliseconds(10L);
	final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
	final ResourceID resourceID = ResourceID.generate();
	final CompletableFuture<InstanceID> releaseResourceFuture = new CompletableFuture<>();
	final ResourceActions resourceActions = new TestingResourceActionsBuilder()
		.setReleaseResourceConsumer((instanceId, ignored) -> releaseResourceFuture.complete(instanceId))
		.build();
	final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway();

	final TaskExecutorConnection taskExecutorConnection = new TaskExecutorConnection(resourceID, taskExecutorGateway);
	final SlotStatus slotStatus = createEmptySlotStatus(new SlotID(resourceID, 0), ResourceProfile.fromResources(1.0, 1));
	final SlotReport initialSlotReport = new SlotReport(slotStatus);

	try (final SlotManager slotManager = createSlotManagerBuilder()
		.setTaskManagerTimeout(taskManagerTimeout)
		.buildAndStartWithDirectExec(resourceManagerId, resourceActions)) {

		slotManager.registerTaskManager(taskExecutorConnection, initialSlotReport);

		assertEquals(1, slotManager.getNumberRegisteredSlots());

		// wait for the timeout call to happen
		assertThat(releaseResourceFuture.get(), is(taskExecutorConnection.getInstanceID()));

		assertEquals(1, slotManager.getNumberRegisteredSlots());

		slotManager.unregisterTaskManager(taskExecutorConnection.getInstanceID(), TEST_EXCEPTION);

		assertEquals(0, slotManager.getNumberRegisteredSlots());
	}
}
 
Example #24
Source File: ResourceManager.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void notifyHeartbeatTimeout(final ResourceID resourceID) {
	validateRunsInMainThread();
	log.info("The heartbeat of TaskManager with id {} timed out.", resourceID);

	closeTaskManagerConnection(
		resourceID,
		new TimeoutException("The heartbeat of TaskManager with id " + resourceID + "  timed out."));
}
 
Example #25
Source File: SlotManagerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that a task manager timeout does not remove the slots from the SlotManager.
 * A timeout should only trigger the {@link ResourceActions#releaseResource(InstanceID, Exception)}
 * callback. The receiver of the callback can then decide what to do with the TaskManager.
 *
 * <p>See FLINK-7793
 */
@Test
public void testTaskManagerTimeoutDoesNotRemoveSlots() throws Exception {
	final Time taskManagerTimeout = Time.milliseconds(10L);
	final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
	final ResourceID resourceID = ResourceID.generate();
	final ResourceActions resourceActions = mock(ResourceActions.class);
	final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);

	when(taskExecutorGateway.canBeReleased()).thenReturn(CompletableFuture.completedFuture(true));

	final TaskExecutorConnection taskExecutorConnection = new TaskExecutorConnection(resourceID, taskExecutorGateway);
	final SlotStatus slotStatus = new SlotStatus(
		new SlotID(resourceID, 0),
		new ResourceProfile(1.0, 1));
	final SlotReport initialSlotReport = new SlotReport(slotStatus);

	try (final SlotManager slotManager = SlotManagerBuilder.newBuilder()
		.setTaskManagerTimeout(taskManagerTimeout)
		.build()) {

		slotManager.start(resourceManagerId, Executors.directExecutor(), resourceActions);

		slotManager.registerTaskManager(taskExecutorConnection, initialSlotReport);

		assertEquals(1, slotManager.getNumberRegisteredSlots());

		// wait for the timeout call to happen
		verify(resourceActions, timeout(taskManagerTimeout.toMilliseconds() * 20L).atLeast(1)).releaseResource(eq(taskExecutorConnection.getInstanceID()), any(Exception.class));

		assertEquals(1, slotManager.getNumberRegisteredSlots());

		slotManager.unregisterTaskManager(taskExecutorConnection.getInstanceID());

		assertEquals(0, slotManager.getNumberRegisteredSlots());
	}
}
 
Example #26
Source File: ExecutionPartitionLifecycleTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void testPartitionTrackingForStateTransition(final Consumer<Execution> stateTransition, final PartitionReleaseResult partitionReleaseResult) throws Exception {
	CompletableFuture<Tuple2<ResourceID, ResultPartitionDeploymentDescriptor>> partitionStartTrackingFuture = new CompletableFuture<>();
	CompletableFuture<Collection<ResultPartitionID>> partitionStopTrackingFuture = new CompletableFuture<>();
	CompletableFuture<Collection<ResultPartitionID>> partitionStopTrackingAndReleaseFuture = new CompletableFuture<>();
	final TestingPartitionTracker partitionTracker = new TestingPartitionTracker();
	partitionTracker.setStartTrackingPartitionsConsumer(
		(resourceID, resultPartitionDeploymentDescriptor) ->
			partitionStartTrackingFuture.complete(Tuple2.of(resourceID, resultPartitionDeploymentDescriptor))
	);
	partitionTracker.setStopTrackingPartitionsConsumer(partitionStopTrackingFuture::complete);
	partitionTracker.setStopTrackingAndReleasePartitionsConsumer(partitionStopTrackingAndReleaseFuture::complete);

	setupExecutionGraphAndStartRunningJob(ResultPartitionType.BLOCKING, partitionTracker, new SimpleAckingTaskManagerGateway(), NettyShuffleMaster.INSTANCE);

	Tuple2<ResourceID, ResultPartitionDeploymentDescriptor> startTrackingCall = partitionStartTrackingFuture.get();
	assertThat(startTrackingCall.f0, equalTo(taskExecutorResourceId));
	assertThat(startTrackingCall.f1, equalTo(descriptor));

	stateTransition.accept(execution);

	switch (partitionReleaseResult) {
		case NONE:
			assertFalse(partitionStopTrackingFuture.isDone());
			assertFalse(partitionStopTrackingAndReleaseFuture.isDone());
			break;
		case STOP_TRACKING:
			assertTrue(partitionStopTrackingFuture.isDone());
			assertFalse(partitionStopTrackingAndReleaseFuture.isDone());
			final Collection<ResultPartitionID> stopTrackingCall = partitionStopTrackingFuture.get();
			assertEquals(Collections.singletonList(descriptor.getShuffleDescriptor().getResultPartitionID()), stopTrackingCall);
			break;
		case STOP_TRACKING_AND_RELEASE:
			assertFalse(partitionStopTrackingFuture.isDone());
			assertTrue(partitionStopTrackingAndReleaseFuture.isDone());
			final Collection<ResultPartitionID> stopTrackingAndReleaseCall = partitionStopTrackingAndReleaseFuture.get();
			assertEquals(Collections.singletonList(descriptor.getShuffleDescriptor().getResultPartitionID()), stopTrackingAndReleaseCall);
			break;
	}
}
 
Example #27
Source File: YarnResourceManager.java    From flink with Apache License 2.0 5 votes vote down vote up
private void releaseFailedContainerAndRequestNewContainerIfRequired(ContainerId containerId, Throwable throwable) {
	validateRunsInMainThread();

	log.error("Could not start TaskManager in container {}.", containerId, throwable);

	final ResourceID resourceId = new ResourceID(containerId.toString());
	// release the failed container
	workerNodeMap.remove(resourceId);
	resourceManagerClient.releaseAssignedContainer(containerId);
	notifyAllocatedWorkerStopped(resourceId);
	// and ask for a new one
	requestYarnContainerIfRequired();
}
 
Example #28
Source File: HeartbeatManagerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the heartbeat monitors are updated when receiving a new heartbeat signal.
 */
@Test
public void testHeartbeatMonitorUpdate() {
	long heartbeatTimeout = 1000L;
	ResourceID ownResourceID = new ResourceID("foobar");
	ResourceID targetResourceID = new ResourceID("barfoo");
	@SuppressWarnings("unchecked")
	HeartbeatListener<Object, Object> heartbeatListener = mock(HeartbeatListener.class);
	ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
	ScheduledFuture<?> scheduledFuture = mock(ScheduledFuture.class);

	doReturn(scheduledFuture).when(scheduledExecutor).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));

	Object expectedObject = new Object();

	when(heartbeatListener.retrievePayload(any(ResourceID.class))).thenReturn(CompletableFuture.completedFuture(expectedObject));

	HeartbeatManagerImpl<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>(
		heartbeatTimeout,
		ownResourceID,
		heartbeatListener,
		scheduledExecutor,
		LOG);

	@SuppressWarnings("unchecked")
	HeartbeatTarget<Object> heartbeatTarget = mock(HeartbeatTarget.class);

	heartbeatManager.monitorTarget(targetResourceID, heartbeatTarget);

	heartbeatManager.receiveHeartbeat(targetResourceID, expectedObject);

	verify(scheduledFuture, times(1)).cancel(true);
	verify(scheduledExecutor, times(2)).schedule(any(Runnable.class), eq(heartbeatTimeout), eq(TimeUnit.MILLISECONDS));
}
 
Example #29
Source File: JobManagerRegistration.java    From flink with Apache License 2.0 5 votes vote down vote up
public JobManagerRegistration(
		JobID jobID,
		ResourceID jobManagerResourceID,
		JobMasterGateway jobManagerGateway) {
	this.jobID = Preconditions.checkNotNull(jobID);
	this.jobManagerResourceID = Preconditions.checkNotNull(jobManagerResourceID);
	this.jobManagerGateway = Preconditions.checkNotNull(jobManagerGateway);
}
 
Example #30
Source File: SlotManagerImplTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that slots are updated with respect to the latest incoming slot report. This means that
 * slots for which a report was received are updated accordingly.
 */
@Test
public void testUpdateSlotReport() throws Exception {
	final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
	final ResourceActions resourceManagerActions = new TestingResourceActionsBuilder().build();

	final JobID jobId = new JobID();
	final AllocationID allocationId = new AllocationID();

	final TaskExecutorConnection taskManagerConnection = createTaskExecutorConnection();
	final ResourceID resourceId = taskManagerConnection.getResourceID();

	final SlotID slotId1 = new SlotID(resourceId, 0);
	final SlotID slotId2 = new SlotID(resourceId, 1);

	final ResourceProfile resourceProfile = ResourceProfile.fromResources(1.0, 1);
	final SlotStatus slotStatus1 = new SlotStatus(slotId1, resourceProfile);
	final SlotStatus slotStatus2 = new SlotStatus(slotId2, resourceProfile);

	final SlotStatus newSlotStatus2 = new SlotStatus(slotId2, resourceProfile, jobId, allocationId);

	final SlotReport slotReport1 = new SlotReport(Arrays.asList(slotStatus1, slotStatus2));
	final SlotReport slotReport2 = new SlotReport(Arrays.asList(newSlotStatus2, slotStatus1));

	try (SlotManagerImpl slotManager = createSlotManager(resourceManagerId, resourceManagerActions)) {
		// check that we don't have any slots registered
		assertTrue(0 == slotManager.getNumberRegisteredSlots());

		slotManager.registerTaskManager(taskManagerConnection, slotReport1);

		TaskManagerSlot slot1 = slotManager.getSlot(slotId1);
		TaskManagerSlot slot2 = slotManager.getSlot(slotId2);

		assertTrue(2 == slotManager.getNumberRegisteredSlots());

		assertTrue(slot1.getState() == TaskManagerSlot.State.FREE);
		assertTrue(slot2.getState() == TaskManagerSlot.State.FREE);

		assertTrue(slotManager.reportSlotStatus(taskManagerConnection.getInstanceID(), slotReport2));

		assertTrue(2 == slotManager.getNumberRegisteredSlots());

		assertNotNull(slotManager.getSlot(slotId1));
		assertNotNull(slotManager.getSlot(slotId2));

		// slotId2 should have been allocated for allocationId
		assertEquals(allocationId, slotManager.getSlot(slotId2).getAllocationId());
	}
}