Java Code Examples for org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups#createUnregisteredTaskManagerMetricGroup()

The following examples show how to use org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups#createUnregisteredTaskManagerMetricGroup() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private TestingTaskExecutor createTestingTaskExecutor(TaskManagerServices taskManagerServices, HeartbeatServices heartbeatServices) {
	return new TestingTaskExecutor(
		rpc,
		TaskManagerConfiguration.fromConfiguration(
			configuration,
			TM_RESOURCE_SPEC,
			InetAddress.getLoopbackAddress().getHostAddress()),
		haServices,
		taskManagerServices,
		ExternalResourceInfoProvider.NO_EXTERNAL_RESOURCES,
		heartbeatServices,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler,
		new TaskExecutorPartitionTrackerImpl(taskManagerServices.getShuffleEnvironment()),
		TaskManagerRunner.createBackPressureSampleService(configuration, rpc.getScheduledExecutor()));
}
 
Example 2
Source File: TaskExecutorSlotLifetimeTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private TaskExecutor createTaskExecutor(Configuration configuration, TestingRpcService rpcService, TestingHighAvailabilityServices haServices, LocalUnresolvedTaskManagerLocation unresolvedTaskManagerLocation) throws IOException {
	return new TaskExecutor(
		rpcService,
		TaskManagerConfiguration.fromConfiguration(
			configuration,
			TaskExecutorResourceUtils.resourceSpecFromConfigForLocalExecution(configuration),
			InetAddress.getLoopbackAddress().getHostAddress()),
		haServices,
		new TaskManagerServicesBuilder()
			.setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1))
			.setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation)
			.build(),
		ExternalResourceInfoProvider.NO_EXTERNAL_RESOURCES,
		new TestingHeartbeatServices(),
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		new BlobCacheService(
			configuration,
			new VoidBlobStore(),
			null),
		testingFatalErrorHandlerResource.getFatalErrorHandler(),
		new TestingTaskExecutorPartitionTracker(),
		TaskManagerRunner.createBackPressureSampleService(configuration, rpcService.getScheduledExecutor()));
}
 
Example 3
Source File: MetricQueryServiceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateDump() throws Exception {
	MetricQueryService queryService = MetricQueryService.createMetricQueryService(rpcService, ResourceID.generate(), Long.MAX_VALUE);
	queryService.start();

	final Counter c = new SimpleCounter();
	final Gauge<String> g = () -> "Hello";
	final Histogram h = new TestHistogram();
	final Meter m = new TestMeter();

	final TaskManagerMetricGroup tm = UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup();

	queryService.addMetric("counter", c, tm);
	queryService.addMetric("gauge", g, tm);
	queryService.addMetric("histogram", h, tm);
	queryService.addMetric("meter", m, tm);

	MetricDumpSerialization.MetricSerializationResult dump = queryService.queryMetrics(TIMEOUT).get();

	assertTrue(dump.serializedCounters.length > 0);
	assertTrue(dump.serializedGauges.length > 0);
	assertTrue(dump.serializedHistograms.length > 0);
	assertTrue(dump.serializedMeters.length > 0);

	queryService.removeMetric(c);
	queryService.removeMetric(g);
	queryService.removeMetric(h);
	queryService.removeMetric(m);

	MetricDumpSerialization.MetricSerializationResult emptyDump = queryService.queryMetrics(TIMEOUT).get();

	assertEquals(0, emptyDump.serializedCounters.length);
	assertEquals(0, emptyDump.serializedGauges.length);
	assertEquals(0, emptyDump.serializedHistograms.length);
	assertEquals(0, emptyDump.serializedMeters.length);
}
 
Example 4
Source File: TaskSubmissionTestEnvironment.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nonnull
private TestingTaskExecutor createTaskExecutor(TaskManagerServices taskManagerServices, Configuration configuration) {
	return new TestingTaskExecutor(
		testingRpcService,
		TaskManagerConfiguration.fromConfiguration(configuration),
		haServices,
		taskManagerServices,
		heartbeatServices,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		blobCacheService,
		testingFatalErrorHandler,
		new PartitionTable<>()
	);
}
 
Example 5
Source File: TaskExecutorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private TestingTaskExecutor createTestingTaskExecutor(TaskManagerServices taskManagerServices, HeartbeatServices heartbeatServices) {
	return new TestingTaskExecutor(
		rpc,
		TaskManagerConfiguration.fromConfiguration(configuration),
		haServices,
		taskManagerServices,
		heartbeatServices,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler,
		new PartitionTable<>());
}
 
Example 6
Source File: MetricQueryServiceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateDump() throws Exception {
	MetricQueryService queryService = MetricQueryService.createMetricQueryService(rpcService, ResourceID.generate(), Long.MAX_VALUE);
	queryService.start();

	final Counter c = new SimpleCounter();
	final Gauge<String> g = () -> "Hello";
	final Histogram h = new TestHistogram();
	final Meter m = new TestMeter();

	final TaskManagerMetricGroup tm = UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup();

	queryService.addMetric("counter", c, tm);
	queryService.addMetric("gauge", g, tm);
	queryService.addMetric("histogram", h, tm);
	queryService.addMetric("meter", m, tm);

	MetricDumpSerialization.MetricSerializationResult dump = queryService.queryMetrics(TIMEOUT).get();

	assertTrue(dump.serializedCounters.length > 0);
	assertTrue(dump.serializedGauges.length > 0);
	assertTrue(dump.serializedHistograms.length > 0);
	assertTrue(dump.serializedMeters.length > 0);

	queryService.removeMetric(c);
	queryService.removeMetric(g);
	queryService.removeMetric(h);
	queryService.removeMetric(m);

	MetricDumpSerialization.MetricSerializationResult emptyDump = queryService.queryMetrics(TIMEOUT).get();

	assertEquals(0, emptyDump.serializedCounters.length);
	assertEquals(0, emptyDump.serializedGauges.length);
	assertEquals(0, emptyDump.serializedHistograms.length);
	assertEquals(0, emptyDump.serializedMeters.length);
}
 
Example 7
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Nonnull
private TaskExecutor createTaskExecutor(TaskManagerServices taskManagerServices) {
	return new TaskExecutor(
		rpc,
		TaskManagerConfiguration.fromConfiguration(configuration),
		haServices,
		taskManagerServices,
		HEARTBEAT_SERVICES,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);
}
 
Example 8
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testMaximumRegistrationDurationAfterConnectionLoss() throws Exception {
	configuration.setString(TaskManagerOptions.REGISTRATION_TIMEOUT, "100 ms");
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);

	final long heartbeatInterval = 10L;
	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();
	final TaskExecutor taskExecutor = new TaskExecutor(
		rpc,
		TaskManagerConfiguration.fromConfiguration(configuration),
		haServices,
		taskManagerServices,
		new HeartbeatServices(heartbeatInterval, 10L),
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	taskExecutor.start();

	final CompletableFuture<ResourceID> registrationFuture = new CompletableFuture<>();
	final OneShotLatch secondRegistration = new OneShotLatch();
	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
		testingResourceManagerGateway.setRegisterTaskExecutorFunction(
			tuple -> {
				if (registrationFuture.complete(tuple.f1)) {
					return CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(
						new InstanceID(),
						testingResourceManagerGateway.getOwnResourceId(),
						new ClusterInformation("localhost", 1234)));
				} else {
					secondRegistration.trigger();
					return CompletableFuture.completedFuture(new RegistrationResponse.Decline("Only the first registration should succeed."));
				}
			}
		);
		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);

		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), UUID.randomUUID());

		final ResourceID registrationResourceId = registrationFuture.get();

		assertThat(registrationResourceId, equalTo(taskManagerServices.getTaskManagerLocation().getResourceID()));

		secondRegistration.await();

		final Throwable error = testingFatalErrorHandler.getErrorFuture().get();
		assertThat(error, is(notNullValue()));
		assertThat(ExceptionUtils.stripExecutionException(error), instanceOf(RegistrationTimeoutException.class));

		testingFatalErrorHandler.clearError();
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 9
Source File: MetricQueryServiceTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testHandleOversizedMetricMessage() throws Exception {
	final long sizeLimit = 200L;
	MetricQueryService queryService = MetricQueryService.createMetricQueryService(rpcService, ResourceID.generate(), sizeLimit);
	queryService.start();

	final TaskManagerMetricGroup tm = UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup();

	final String gaugeValue = "Hello";
	final long requiredGaugesToExceedLimit = sizeLimit / gaugeValue.length() + 1;
	List<Tuple2<String, Gauge<String>>> gauges = LongStream.range(0, requiredGaugesToExceedLimit)
		.mapToObj(x -> Tuple2.of("gauge" + x, (Gauge<String>) () -> "Hello" + x))
		.collect(Collectors.toList());
	gauges.forEach(gauge -> queryService.addMetric(gauge.f0, gauge.f1, tm));

	queryService.addMetric("counter", new SimpleCounter(), tm);
	queryService.addMetric("histogram", new TestHistogram(), tm);
	queryService.addMetric("meter", new TestMeter(), tm);

	MetricDumpSerialization.MetricSerializationResult dump = queryService.queryMetrics(TIMEOUT).get();

	assertTrue(dump.serializedCounters.length > 0);
	assertEquals(1, dump.numCounters);
	assertTrue(dump.serializedMeters.length > 0);
	assertEquals(1, dump.numMeters);

	// gauges exceeded the size limit and will be excluded
	assertEquals(0, dump.serializedGauges.length);
	assertEquals(0, dump.numGauges);

	assertTrue(dump.serializedHistograms.length > 0);
	assertEquals(1, dump.numHistograms);

	// unregister all but one gauge to ensure gauges are reported again if the remaining fit
	for (int x = 1; x < gauges.size(); x++) {
		queryService.removeMetric(gauges.get(x).f1);
	}

	MetricDumpSerialization.MetricSerializationResult recoveredDump = queryService.queryMetrics(TIMEOUT).get();

	assertTrue(recoveredDump.serializedCounters.length > 0);
	assertEquals(1, recoveredDump.numCounters);
	assertTrue(recoveredDump.serializedMeters.length > 0);
	assertEquals(1, recoveredDump.numMeters);
	assertTrue(recoveredDump.serializedGauges.length > 0);
	assertEquals(1, recoveredDump.numGauges);
	assertTrue(recoveredDump.serializedHistograms.length > 0);
	assertEquals(1, recoveredDump.numHistograms);

}
 
Example 10
Source File: MetricQueryServiceTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testHandleOversizedMetricMessage() throws Exception {
	final long sizeLimit = 200L;
	MetricQueryService queryService = MetricQueryService.createMetricQueryService(rpcService, ResourceID.generate(), sizeLimit);
	queryService.start();

	final TaskManagerMetricGroup tm = UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup();

	final String gaugeValue = "Hello";
	final long requiredGaugesToExceedLimit = sizeLimit / gaugeValue.length() + 1;
	List<Tuple2<String, Gauge<String>>> gauges = LongStream.range(0, requiredGaugesToExceedLimit)
		.mapToObj(x -> Tuple2.of("gauge" + x, (Gauge<String>) () -> "Hello" + x))
		.collect(Collectors.toList());
	gauges.forEach(gauge -> queryService.addMetric(gauge.f0, gauge.f1, tm));

	queryService.addMetric("counter", new SimpleCounter(), tm);
	queryService.addMetric("histogram", new TestHistogram(), tm);
	queryService.addMetric("meter", new TestMeter(), tm);

	MetricDumpSerialization.MetricSerializationResult dump = queryService.queryMetrics(TIMEOUT).get();

	assertTrue(dump.serializedCounters.length > 0);
	assertEquals(1, dump.numCounters);
	assertTrue(dump.serializedMeters.length > 0);
	assertEquals(1, dump.numMeters);

	// gauges exceeded the size limit and will be excluded
	assertEquals(0, dump.serializedGauges.length);
	assertEquals(0, dump.numGauges);

	assertTrue(dump.serializedHistograms.length > 0);
	assertEquals(1, dump.numHistograms);

	// unregister all but one gauge to ensure gauges are reported again if the remaining fit
	for (int x = 1; x < gauges.size(); x++) {
		queryService.removeMetric(gauges.get(x).f1);
	}

	MetricDumpSerialization.MetricSerializationResult recoveredDump = queryService.queryMetrics(TIMEOUT).get();

	assertTrue(recoveredDump.serializedCounters.length > 0);
	assertEquals(1, recoveredDump.numCounters);
	assertTrue(recoveredDump.serializedMeters.length > 0);
	assertEquals(1, recoveredDump.numMeters);
	assertTrue(recoveredDump.serializedGauges.length > 0);
	assertEquals(1, recoveredDump.numGauges);
	assertTrue(recoveredDump.serializedHistograms.length > 0);
	assertEquals(1, recoveredDump.numHistograms);

}
 
Example 11
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskExecutor tries to reconnect to a ResourceManager from which it
 * was explicitly disconnected.
 */
@Test
public void testReconnectionAttemptIfExplicitlyDisconnected() throws Exception {
	final long heartbeatInterval = 1000L;
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
	final TaskExecutor taskExecutor = new TaskExecutor(
		rpc,
		TaskManagerConfiguration.fromConfiguration(configuration),
		haServices,
		new TaskManagerServicesBuilder()
			.setTaskSlotTable(taskSlotTable)
			.setTaskManagerLocation(taskManagerLocation)
			.build(),
		new HeartbeatServices(heartbeatInterval, 1000L),
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	taskExecutor.start();

	try {
		final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
		final ClusterInformation clusterInformation = new ClusterInformation("foobar", 1234);
		final CompletableFuture<RegistrationResponse> registrationResponseFuture = CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), ResourceID.generate(), clusterInformation));
		final BlockingQueue<ResourceID> registrationQueue = new ArrayBlockingQueue<>(1);

		testingResourceManagerGateway.setRegisterTaskExecutorFunction(stringResourceIDSlotReportIntegerHardwareDescriptionTuple5 -> {
			registrationQueue.offer(stringResourceIDSlotReportIntegerHardwareDescriptionTuple5.f1);
			return registrationResponseFuture;
		});
		rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);

		resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());

		final ResourceID firstRegistrationAttempt = registrationQueue.take();

		assertThat(firstRegistrationAttempt, equalTo(taskManagerLocation.getResourceID()));

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		assertThat(registrationQueue, is(empty()));

		taskExecutorGateway.disconnectResourceManager(new FlinkException("Test exception"));

		final ResourceID secondRegistrationAttempt = registrationQueue.take();

		assertThat(secondRegistrationAttempt, equalTo(taskManagerLocation.getResourceID()));

	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 12
Source File: MetricQueryServiceTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testCreateDump() throws Exception {
	ActorSystem s = AkkaUtils.createLocalActorSystem(new Configuration());
	try {
		ActorRef serviceActor = MetricQueryService.startMetricQueryService(s, null, Long.MAX_VALUE);
		TestActorRef testActorRef = TestActorRef.create(s, Props.create(TestActor.class));
		TestActor testActor = (TestActor) testActorRef.underlyingActor();

		final Counter c = new SimpleCounter();
		final Gauge<String> g = () -> "Hello";
		final Histogram h = new TestHistogram();
		final Meter m = new TestMeter();

		final TaskManagerMetricGroup tm = UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup();

		MetricQueryService.notifyOfAddedMetric(serviceActor, c, "counter", tm);
		MetricQueryService.notifyOfAddedMetric(serviceActor, g, "gauge", tm);
		MetricQueryService.notifyOfAddedMetric(serviceActor, h, "histogram", tm);
		MetricQueryService.notifyOfAddedMetric(serviceActor, m, "meter", tm);
		serviceActor.tell(MetricQueryService.getCreateDump(), testActorRef);

		testActor.waitForResult();

		MetricDumpSerialization.MetricSerializationResult dump = testActor.getSerializationResult();

		assertTrue(dump.serializedCounters.length > 0);
		assertTrue(dump.serializedGauges.length > 0);
		assertTrue(dump.serializedHistograms.length > 0);
		assertTrue(dump.serializedMeters.length > 0);

		MetricQueryService.notifyOfRemovedMetric(serviceActor, c);
		MetricQueryService.notifyOfRemovedMetric(serviceActor, g);
		MetricQueryService.notifyOfRemovedMetric(serviceActor, h);
		MetricQueryService.notifyOfRemovedMetric(serviceActor, m);

		serviceActor.tell(MetricQueryService.getCreateDump(), testActorRef);

		testActor.waitForResult();

		MetricDumpSerialization.MetricSerializationResult emptyDump = testActor.getSerializationResult();

		assertEquals(0, emptyDump.serializedCounters.length);
		assertEquals(0, emptyDump.serializedGauges.length);
		assertEquals(0, emptyDump.serializedHistograms.length);
		assertEquals(0, emptyDump.serializedMeters.length);
	} finally {
		s.terminate();
	}
}
 
Example 13
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a job is removed from the JobLeaderService once a TaskExecutor has
 * no more slots assigned to this job.
 *
 * <p>See FLINK-8504
 */
@Test
public void testRemoveJobFromJobLeaderService() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(
		Collections.singleton(ResourceProfile.UNKNOWN),
		timerService);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TestingTaskExecutor taskExecutor = new TestingTaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		HEARTBEAT_SERVICES,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
		final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>();
		resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
			initialSlotReport.complete(null);
			return CompletableFuture.completedFuture(Acknowledge.get());
		});
		final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken();

		rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID());

		final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>();
		final CompletableFuture<Void> stopFuture = new CompletableFuture<>();

		final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService(
			startFuture,
			stopFuture);
		haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);

		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);

		final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);
		final AllocationID allocationId = new AllocationID();

		assertThat(startFuture.isDone(), is(false));
		final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService();
		assertThat(jobLeaderService.containsJob(jobId), is(false));

		// wait for the initial slot report
		initialSlotReport.get();

		taskExecutorGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			"foobar",
			resourceManagerId,
			timeout).get();

		// wait until the job leader retrieval service for jobId is started
		startFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(true));

		taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get();

		// wait that the job leader retrieval service for jobId stopped becaue it should get removed
		stopFuture.get();
		assertThat(jobLeaderService.containsJob(jobId), is(false));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 14
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the heartbeat is stopped once the TaskExecutor detects that the RM is no longer leader.
 *
 * <p>See FLINK-8462
 */
@Test
public void testRMHeartbeatStopWhenLeadershipRevoked() throws Exception {
	final long heartbeatInterval = 1L;
	final long heartbeatTimeout = 10000L;
	final long pollTimeout = 1000L;
	final RecordingHeartbeatServices heartbeatServices = new RecordingHeartbeatServices(heartbeatInterval, heartbeatTimeout);
	final ResourceID rmResourceID = ResourceID.generate();

	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);

	final String rmAddress = "rm";
	final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway(
		ResourceManagerId.generate(),
		rmResourceID,
		rmAddress,
		rmAddress);

	rpc.registerGateway(rmAddress, rmGateway);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TaskExecutor taskExecutor = new TaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		heartbeatServices,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		taskExecutor.start();

		final BlockingQueue<ResourceID> unmonitoredTargets = heartbeatServices.getUnmonitoredTargets();
		final BlockingQueue<ResourceID> monitoredTargets = heartbeatServices.getMonitoredTargets();

		resourceManagerLeaderRetriever.notifyListener(rmAddress, rmGateway.getFencingToken().toUUID());

		// wait for TM registration by checking the registered heartbeat targets
		assertThat(
			monitoredTargets.poll(pollTimeout, TimeUnit.MILLISECONDS),
			equalTo(rmResourceID));

		// let RM lose leadership
		resourceManagerLeaderRetriever.notifyListener(null, null);

		// the timeout should not have triggered since it is much higher
		assertThat(unmonitoredTargets.poll(pollTimeout, TimeUnit.MILLISECONDS), equalTo(rmResourceID));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 15
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * This tests makes sure that duplicate JobMaster gained leadership messages are filtered out
 * by the TaskExecutor. See FLINK-7526.
 */
@Test
public void testFilterOutDuplicateJobMasterRegistrations() throws Exception {
	final long verificationTimeout = 500L;
	final JobLeaderService jobLeaderService = mock(JobLeaderService.class);
	final HeartbeatServices heartbeatServicesMock = mock(HeartbeatServices.class, Mockito.RETURNS_MOCKS);

	final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
	when(jobMasterGateway.getHostname()).thenReturn("localhost");
	final JMTMRegistrationSuccess registrationMessage = new JMTMRegistrationSuccess(ResourceID.generate());
	final JobManagerTable jobManagerTableMock = spy(new JobManagerTable());

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setJobManagerTable(jobManagerTableMock)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TestingTaskExecutor taskExecutor = new TestingTaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		heartbeatServicesMock,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		taskExecutor.start();
		taskExecutor.waitUntilStarted();

		ArgumentCaptor<JobLeaderListener> jobLeaderListenerArgumentCaptor = ArgumentCaptor.forClass(JobLeaderListener.class);

		verify(jobLeaderService).start(anyString(), any(RpcService.class), any(HighAvailabilityServices.class), jobLeaderListenerArgumentCaptor.capture());

		JobLeaderListener taskExecutorListener = jobLeaderListenerArgumentCaptor.getValue();

		taskExecutorListener.jobManagerGainedLeadership(jobId, jobMasterGateway, registrationMessage);

		// duplicate job manager gained leadership message
		taskExecutorListener.jobManagerGainedLeadership(jobId, jobMasterGateway, registrationMessage);

		ArgumentCaptor<JobManagerConnection> jobManagerConnectionArgumentCaptor = ArgumentCaptor.forClass(JobManagerConnection.class);

		verify(jobManagerTableMock, Mockito.timeout(verificationTimeout).times(1)).put(eq(jobId), jobManagerConnectionArgumentCaptor.capture());

		JobManagerConnection jobManagerConnection = jobManagerConnectionArgumentCaptor.getValue();

		assertEquals(jobMasterGateway, jobManagerConnection.getJobManagerGateway());
	} finally {
		RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
	}
}
 
Example 16
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting
 * the job leader, it will offer all reserved slots to the JobManager.
 */
@Test
public void testJobLeaderDetection() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);
	final JobManagerTable jobManagerTable = new JobManagerTable();
	final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());

	final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
	CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
	resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(null);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>();
	final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder()
		.setOfferSlotsFunction((resourceID, slotOffers) -> {

			offeredSlotsFuture.complete(new ArrayList<>(slotOffers));
			return CompletableFuture.completedFuture(slotOffers);
		})
		.build();

	rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
	rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);

	final AllocationID allocationId = new AllocationID();
	final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setJobManagerTable(jobManagerTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	TaskExecutor taskManager = new TaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		HEARTBEAT_SERVICES,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		taskManager.start();

		final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// tell the task manager about the rm leader
		resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());

		// wait for the initial slot report
		initialSlotReportFuture.get();

		// request slots from the task manager under the given allocation id
		CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot(
			slotId,
			jobId,
			allocationId,
			jobMasterGateway.getAddress(),
			resourceManagerGateway.getFencingToken(),
			timeout);

		slotRequestAck.get();

		// now inform the task manager about the new job leader
		jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());

		final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get();
		final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList());
		assertThat(allocationIds, containsInAnyOrder(allocationId));
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 17
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testTriggerRegistrationOnLeaderChange() throws Exception {
	final String address1 = "/resource/manager/address/one";
	final String address2 = "/resource/manager/address/two";
	final UUID leaderId1 = UUID.randomUUID();
	final UUID leaderId2 = UUID.randomUUID();
	final ResourceID rmResourceId1 = new ResourceID(address1);
	final ResourceID rmResourceId2 = new ResourceID(address2);

	// register the mock resource manager gateways
	ResourceManagerGateway rmGateway1 = mock(ResourceManagerGateway.class);
	ResourceManagerGateway rmGateway2 = mock(ResourceManagerGateway.class);

	when(rmGateway1.registerTaskExecutor(
				anyString(), any(ResourceID.class), anyInt(), any(HardwareDescription.class), any(Time.class)))
		.thenReturn(CompletableFuture.completedFuture(
			new TaskExecutorRegistrationSuccess(new InstanceID(), rmResourceId1, new ClusterInformation("localhost", 1234))));
	when(rmGateway2.registerTaskExecutor(
				anyString(), any(ResourceID.class), anyInt(), any(HardwareDescription.class), any(Time.class)))
		.thenReturn(CompletableFuture.completedFuture(
			new TaskExecutorRegistrationSuccess(new InstanceID(), rmResourceId2, new ClusterInformation("localhost", 1234))));

	rpc.registerGateway(address1, rmGateway1);
	rpc.registerGateway(address2, rmGateway2);

	final TaskSlotTable taskSlotTable = mock(TaskSlotTable.class);
	final SlotReport slotReport = new SlotReport();
	when(taskSlotTable.createSlotReport(any(ResourceID.class))).thenReturn(slotReport);

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	TaskExecutor taskManager = new TaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		HEARTBEAT_SERVICES,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		taskManager.start();
		String taskManagerAddress = taskManager.getAddress();

		// no connection initially, since there is no leader
		assertNull(taskManager.getResourceManagerConnection());

		// define a leader and see that a registration happens
		resourceManagerLeaderRetriever.notifyListener(address1, leaderId1);

		verify(rmGateway1, Mockito.timeout(timeout.toMilliseconds())).registerTaskExecutor(
				eq(taskManagerAddress), eq(taskManagerLocation.getResourceID()), anyInt(), any(HardwareDescription.class), any(Time.class));
		assertNotNull(taskManager.getResourceManagerConnection());

		// cancel the leader
		resourceManagerLeaderRetriever.notifyListener(null, null);

		// set a new leader, see that a registration happens
		resourceManagerLeaderRetriever.notifyListener(address2, leaderId2);

		verify(rmGateway2, Mockito.timeout(timeout.toMilliseconds())).registerTaskExecutor(
				eq(taskManagerAddress), eq(taskManagerLocation.getResourceID()), anyInt(), any(HardwareDescription.class), any(Time.class));
		assertNotNull(taskManager.getResourceManagerConnection());
	}
	finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 18
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the correct slot report is sent as part of the heartbeat response.
 */
@Test
public void testHeartbeatSlotReporting() throws Exception {
	final String rmAddress = "rm";
	final UUID rmLeaderId = UUID.randomUUID();

	// register the mock resource manager gateway
	final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway();
	final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>();
	final ResourceID rmResourceId = rmGateway.getOwnResourceId();
	final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture(
		new TaskExecutorRegistrationSuccess(
			new InstanceID(),
			rmResourceId,
			new ClusterInformation("localhost", 1234)));

	rmGateway.setRegisterTaskExecutorFunction(stringResourceIDIntegerHardwareDescriptionTuple4 -> {
		taskExecutorRegistrationFuture.complete(stringResourceIDIntegerHardwareDescriptionTuple4.f1);
		return registrationResponse;
	});

	final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>();
	rmGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
		initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2);
		return CompletableFuture.completedFuture(Acknowledge.get());
	});

	final CompletableFuture<SlotReport> heartbeatSlotReportFuture = new CompletableFuture<>();
	rmGateway.setTaskExecutorHeartbeatConsumer((resourceID, slotReport) -> heartbeatSlotReportFuture.complete(slotReport));

	rpc.registerGateway(rmAddress, rmGateway);

	final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0);
	final ResourceProfile resourceProfile = new ResourceProfile(1.0, 1);
	final SlotReport slotReport1 = new SlotReport(
		new SlotStatus(
			slotId,
			resourceProfile));
	final SlotReport slotReport2 = new SlotReport(
		new SlotStatus(
			slotId,
			resourceProfile,
			new JobID(),
			new AllocationID()));

	final TestingTaskSlotTable taskSlotTable = new TestingTaskSlotTable(new ArrayDeque<>(Arrays.asList(slotReport1, slotReport2)));

	final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setTaskSlotTable(taskSlotTable)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final TaskExecutor taskManager = new TaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		HEARTBEAT_SERVICES,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		taskManager.start();

		// define a leader and see that a registration happens
		resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId);

		// register resource manager success will trigger monitoring heartbeat target between tm and rm
		assertThat(taskExecutorRegistrationFuture.get(), equalTo(taskManagerLocation.getResourceID()));
		assertThat(initialSlotReportFuture.get(), equalTo(slotReport1));

		TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);

		// trigger the heartbeat asynchronously
		taskExecutorGateway.heartbeatFromResourceManager(rmResourceId);

		// wait for heartbeat response
		SlotReport actualSlotReport = heartbeatSlotReportFuture.get();

		// the new slot report should be reported
		assertEquals(slotReport2, actualSlotReport);
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}
}
 
Example 19
Source File: TaskExecutorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testShouldShutDownTaskManagerServicesInPostStop() throws Exception {
	final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService);

	final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());

	final IOManager ioManager = new IOManagerAsync(tmp.newFolder().getAbsolutePath());

	final TaskExecutorLocalStateStoresManager localStateStoresManager = new TaskExecutorLocalStateStoresManager(
		false,
		ioManager.getSpillingDirectories(),
		Executors.directExecutor());

	final MemoryManager memoryManager = new MemoryManager(
		4096,
		1,
		4096,
		MemoryType.HEAP,
		false);

	final NetworkEnvironment networkEnvironment = new NetworkEnvironment(
		1,
		1,
		0,
		0,
		2,
		8,
		true);
	networkEnvironment.start();

	final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder()
		.setTaskManagerLocation(taskManagerLocation)
		.setMemoryManager(memoryManager)
		.setIoManager(ioManager)
		.setNetworkEnvironment(networkEnvironment)
		.setTaskSlotTable(taskSlotTable)
		.setJobLeaderService(jobLeaderService)
		.setTaskStateManager(localStateStoresManager)
		.build();

	final long heartbeatInterval = 1000L;
	final long heartbeatTimeout = 1000L;
	final HeartbeatServices heartbeatServices = new HeartbeatServices(heartbeatInterval, heartbeatTimeout);

	final TaskExecutor taskManager = new TaskExecutor(
		rpc,
		taskManagerConfiguration,
		haServices,
		taskManagerServices,
		heartbeatServices,
		UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(),
		null,
		dummyBlobCacheService,
		testingFatalErrorHandler);

	try {
		taskManager.start();
	} finally {
		RpcUtils.terminateRpcEndpoint(taskManager, timeout);
	}

	assertThat(memoryManager.isShutdown(), is(true));
	assertThat(networkEnvironment.isShutdown(), is(true));
	assertThat(ioManager.isProperlyShutDown(), is(true));
}
 
Example 20
Source File: MetricQueryServiceTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testHandleOversizedMetricMessage() throws Exception {
	ActorSystem s = AkkaUtils.createLocalActorSystem(new Configuration());
	try {
		final long sizeLimit = 200L;
		ActorRef serviceActor = MetricQueryService.startMetricQueryService(s, null, sizeLimit);
		TestActorRef testActorRef = TestActorRef.create(s, Props.create(TestActor.class));
		TestActor testActor = (TestActor) testActorRef.underlyingActor();

		final TaskManagerMetricGroup tm = UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup();

		final String gaugeValue = "Hello";
		final long requiredGaugesToExceedLimit = sizeLimit / gaugeValue.length() + 1;
		List<Tuple2<String, Gauge<String>>> gauges = LongStream.range(0, requiredGaugesToExceedLimit)
			.mapToObj(x -> Tuple2.of("gauge" + x, (Gauge<String>) () -> "Hello" + x))
			.collect(Collectors.toList());
		gauges.forEach(gauge -> MetricQueryService.notifyOfAddedMetric(serviceActor, gauge.f1, gauge.f0, tm));

		MetricQueryService.notifyOfAddedMetric(serviceActor, new SimpleCounter(), "counter", tm);
		MetricQueryService.notifyOfAddedMetric(serviceActor, new TestHistogram(), "histogram", tm);
		MetricQueryService.notifyOfAddedMetric(serviceActor, new TestMeter(), "meter", tm);

		serviceActor.tell(MetricQueryService.getCreateDump(), testActorRef);
		testActor.waitForResult();

		MetricDumpSerialization.MetricSerializationResult dump = testActor.getSerializationResult();

		assertTrue(dump.serializedCounters.length > 0);
		assertEquals(1, dump.numCounters);
		assertTrue(dump.serializedMeters.length > 0);
		assertEquals(1, dump.numMeters);

		// gauges exceeded the size limit and will be excluded
		assertEquals(0, dump.serializedGauges.length);
		assertEquals(0, dump.numGauges);

		assertTrue(dump.serializedHistograms.length > 0);
		assertEquals(1, dump.numHistograms);

		// unregister all but one gauge to ensure gauges are reported again if the remaining fit
		for (int x = 1; x < gauges.size(); x++) {
			MetricQueryService.notifyOfRemovedMetric(serviceActor, gauges.get(x).f1);
		}

		serviceActor.tell(MetricQueryService.getCreateDump(), testActorRef);
		testActor.waitForResult();

		MetricDumpSerialization.MetricSerializationResult recoveredDump = testActor.getSerializationResult();

		assertTrue(recoveredDump.serializedCounters.length > 0);
		assertEquals(1, recoveredDump.numCounters);
		assertTrue(recoveredDump.serializedMeters.length > 0);
		assertEquals(1, recoveredDump.numMeters);
		assertTrue(recoveredDump.serializedGauges.length > 0);
		assertEquals(1, recoveredDump.numGauges);
		assertTrue(recoveredDump.serializedHistograms.length > 0);
		assertEquals(1, recoveredDump.numHistograms);
	} finally {
		s.terminate();
	}
}