org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager Java Examples

The following examples show how to use org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JobManagerRunnerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testLibraryCacheManagerRegistration() throws Exception {
	final BlobLibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager(
		VoidPermanentBlobService.INSTANCE,
		FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
		new String[]{});
	final JobManagerRunner jobManagerRunner = createJobManagerRunner(libraryCacheManager);

	try {
		jobManagerRunner.start();

		final JobID jobID = jobGraph.getJobID();
		assertThat(libraryCacheManager.hasClassLoader(jobID), is(true));

		jobManagerRunner.close();

		assertThat(libraryCacheManager.hasClassLoader(jobID), is(false));
	} finally {
		jobManagerRunner.close();
	}
}
 
Example #2
Source File: JobManagerRunnerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setupClass() {
	libraryCacheManager = new BlobLibraryCacheManager(
		FailingPermanentBlobService.INSTANCE,
		FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
		new String[]{});

	defaultJobMasterServiceFactory = new TestingJobMasterServiceFactory();

	final JobVertex jobVertex = new JobVertex("Test vertex");
	jobVertex.setInvokableClass(NoOpInvokable.class);
	jobGraph = new JobGraph(jobVertex);

	archivedExecutionGraph = new ArchivedExecutionGraphBuilder()
		.setJobID(jobGraph.getJobID())
		.setState(JobStatus.FINISHED)
		.build();
}
 
Example #3
Source File: JobManagerRunnerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setupClass() {
	libraryCacheManager = new BlobLibraryCacheManager(
		FailingPermanentBlobService.INSTANCE,
		FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
		new String[]{});

	defaultJobMasterServiceFactory = new TestingJobMasterServiceFactory();

	final JobVertex jobVertex = new JobVertex("Test vertex");
	jobVertex.setInvokableClass(NoOpInvokable.class);
	jobGraph = new JobGraph(jobVertex);

	archivedExecutionGraph = new ArchivedExecutionGraphBuilder()
		.setJobID(jobGraph.getJobID())
		.setState(JobStatus.FINISHED)
		.build();
}
 
Example #4
Source File: JobManagerRunnerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testLibraryCacheManagerRegistration() throws Exception {
	final BlobLibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager(
		VoidPermanentBlobService.INSTANCE,
		FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
		new String[]{});
	final JobManagerRunner jobManagerRunner = createJobManagerRunner(libraryCacheManager);

	try {
		jobManagerRunner.start();

		final JobID jobID = jobGraph.getJobID();
		assertThat(libraryCacheManager.hasClassLoader(jobID), is(true));

		jobManagerRunner.close();

		assertThat(libraryCacheManager.hasClassLoader(jobID), is(false));
	} finally {
		jobManagerRunner.close();
	}
}
 
Example #5
Source File: TaskExecutor.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private JobManagerConnection associateWithJobManager(
		JobID jobID,
		ResourceID resourceID,
		JobMasterGateway jobMasterGateway) {
	checkNotNull(jobID);
	checkNotNull(resourceID);
	checkNotNull(jobMasterGateway);

	TaskManagerActions taskManagerActions = new TaskManagerActionsImpl(jobMasterGateway);

	CheckpointResponder checkpointResponder = new RpcCheckpointResponder(jobMasterGateway);
	GlobalAggregateManager aggregateManager = new RpcGlobalAggregateManager(jobMasterGateway);

	final LibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager(
		blobCacheService.getPermanentBlobService(),
		taskManagerConfiguration.getClassLoaderResolveOrder(),
		taskManagerConfiguration.getAlwaysParentFirstLoaderPatterns());

	ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = new RpcResultPartitionConsumableNotifier(
		jobMasterGateway,
		getRpcService().getExecutor(),
		taskManagerConfiguration.getTimeout());

	PartitionProducerStateChecker partitionStateChecker = new RpcPartitionStateChecker(jobMasterGateway);

	registerQueryableState(jobID, jobMasterGateway);

	return new JobManagerConnection(
		jobID,
		resourceID,
		jobMasterGateway,
		taskManagerActions,
		checkpointResponder,
		aggregateManager,
		libraryCacheManager,
		resultPartitionConsumableNotifier,
		partitionStateChecker);
}
 
Example #6
Source File: TaskExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobManagerConnection associateWithJobManager(
		JobID jobID,
		ResourceID resourceID,
		JobMasterGateway jobMasterGateway) {
	checkNotNull(jobID);
	checkNotNull(resourceID);
	checkNotNull(jobMasterGateway);

	TaskManagerActions taskManagerActions = new TaskManagerActionsImpl(jobMasterGateway);

	CheckpointResponder checkpointResponder = new RpcCheckpointResponder(jobMasterGateway);
	GlobalAggregateManager aggregateManager = new RpcGlobalAggregateManager(jobMasterGateway);

	final LibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager(
		blobCacheService.getPermanentBlobService(),
		taskManagerConfiguration.getClassLoaderResolveOrder(),
		taskManagerConfiguration.getAlwaysParentFirstLoaderPatterns());

	ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = new RpcResultPartitionConsumableNotifier(
		jobMasterGateway,
		getRpcService().getExecutor(),
		taskManagerConfiguration.getTimeout());

	PartitionProducerStateChecker partitionStateChecker = new RpcPartitionStateChecker(jobMasterGateway);

	registerQueryableState(jobID, jobMasterGateway);

	return new JobManagerConnection(
		jobID,
		resourceID,
		jobMasterGateway,
		taskManagerActions,
		checkpointResponder,
		aggregateManager,
		libraryCacheManager,
		resultPartitionConsumableNotifier,
		partitionStateChecker);
}
 
Example #7
Source File: StreamTaskTerminationTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * FLINK-6833
 *
 * <p>Tests that a finished stream task cannot be failed by an asynchronous checkpointing operation after
 * the stream task has stopped running.
 */
@Test
public void testConcurrentAsyncCheckpointCannotFailFinishedStreamTask() throws Exception {
	final Configuration taskConfiguration = new Configuration();
	final StreamConfig streamConfig = new StreamConfig(taskConfiguration);
	final NoOpStreamOperator<Long> noOpStreamOperator = new NoOpStreamOperator<>();

	final StateBackend blockingStateBackend = new BlockingStateBackend();

	streamConfig.setStreamOperator(noOpStreamOperator);
	streamConfig.setOperatorID(new OperatorID());
	streamConfig.setStateBackend(blockingStateBackend);

	final long checkpointId = 0L;
	final long checkpointTimestamp = 0L;

	final JobInformation jobInformation = new JobInformation(
		new JobID(),
		"Test Job",
		new SerializedValue<>(new ExecutionConfig()),
		new Configuration(),
		Collections.emptyList(),
		Collections.emptyList());

	final TaskInformation taskInformation = new TaskInformation(
		new JobVertexID(),
		"Test Task",
		1,
		1,
		BlockingStreamTask.class.getName(),
		taskConfiguration);

	final TaskManagerRuntimeInfo taskManagerRuntimeInfo = new TestingTaskManagerRuntimeInfo();

	TaskEventDispatcher taskEventDispatcher = new TaskEventDispatcher();
	final NetworkEnvironment networkEnv = mock(NetworkEnvironment.class);
	when(networkEnv.createKvStateTaskRegistry(any(JobID.class), any(JobVertexID.class))).thenReturn(mock(TaskKvStateRegistry.class));
	when(networkEnv.getTaskEventDispatcher()).thenReturn(taskEventDispatcher);

	BlobCacheService blobService =
		new BlobCacheService(mock(PermanentBlobCache.class), mock(TransientBlobCache.class));

	final Task task = new Task(
		jobInformation,
		taskInformation,
		new ExecutionAttemptID(),
		new AllocationID(),
		0,
		0,
		Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
		Collections.<InputGateDeploymentDescriptor>emptyList(),
		0,
		new MemoryManager(32L * 1024L, 1),
		new IOManagerAsync(),
		networkEnv,
		mock(BroadcastVariableManager.class),
		new TestTaskStateManager(),
		mock(TaskManagerActions.class),
		mock(InputSplitProvider.class),
		mock(CheckpointResponder.class),
		new TestGlobalAggregateManager(),
		blobService,
		new BlobLibraryCacheManager(
			blobService.getPermanentBlobService(),
			FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
			new String[0]),
		mock(FileCache.class),
		taskManagerRuntimeInfo,
		UnregisteredMetricGroups.createUnregisteredTaskMetricGroup(),
		new NoOpResultPartitionConsumableNotifier(),
		mock(PartitionProducerStateChecker.class),
		Executors.directExecutor());

	CompletableFuture<Void> taskRun = CompletableFuture.runAsync(
		() -> task.run(),
		TestingUtils.defaultExecutor());

	// wait until the stream task started running
	RUN_LATCH.await();

	// trigger a checkpoint
	task.triggerCheckpointBarrier(checkpointId, checkpointTimestamp, CheckpointOptions.forCheckpointWithDefaultLocation());

	// wait until the task has completed execution
	taskRun.get();

	// check that no failure occurred
	if (task.getFailureCause() != null) {
		throw new Exception("Task failed", task.getFailureCause());
	}

	// check that we have entered the finished state
	assertEquals(ExecutionState.FINISHED, task.getExecutionState());
}
 
Example #8
Source File: TaskCheckpointingBehaviourTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private static Task createTask(
	StreamOperator<?> op,
	StateBackend backend,
	CheckpointResponder checkpointResponder,
	boolean failOnCheckpointErrors) throws IOException {

	Configuration taskConfig = new Configuration();
	StreamConfig cfg = new StreamConfig(taskConfig);
	cfg.setStreamOperator(op);
	cfg.setOperatorID(new OperatorID());
	cfg.setStateBackend(backend);

	ExecutionConfig executionConfig = new ExecutionConfig();
	executionConfig.setFailTaskOnCheckpointError(failOnCheckpointErrors);

	JobInformation jobInformation = new JobInformation(
			new JobID(),
			"test job name",
			new SerializedValue<>(executionConfig),
			new Configuration(),
			Collections.emptyList(),
			Collections.emptyList());

	TaskInformation taskInformation = new TaskInformation(
			new JobVertexID(),
			"test task name",
			1,
			11,
			TestStreamTask.class.getName(),
			taskConfig);

	TaskKvStateRegistry mockKvRegistry = mock(TaskKvStateRegistry.class);
	TaskEventDispatcher taskEventDispatcher = new TaskEventDispatcher();
	NetworkEnvironment network = mock(NetworkEnvironment.class);
	when(network.createKvStateTaskRegistry(any(JobID.class), any(JobVertexID.class))).thenReturn(mockKvRegistry);
	when(network.getTaskEventDispatcher()).thenReturn(taskEventDispatcher);

	BlobCacheService blobService =
		new BlobCacheService(mock(PermanentBlobCache.class), mock(TransientBlobCache.class));

	return new Task(
			jobInformation,
			taskInformation,
			new ExecutionAttemptID(),
			new AllocationID(),
			0,
			0,
			Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
			Collections.<InputGateDeploymentDescriptor>emptyList(),
			0,
			mock(MemoryManager.class),
			mock(IOManager.class),
			network,
			mock(BroadcastVariableManager.class),
			new TestTaskStateManager(),
			mock(TaskManagerActions.class),
			mock(InputSplitProvider.class),
			checkpointResponder,
			new TestGlobalAggregateManager(),
			blobService,
			new BlobLibraryCacheManager(
				blobService.getPermanentBlobService(),
				FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
				new String[0]),
			new FileCache(new String[] { EnvironmentInformation.getTemporaryFileDirectory() },
				blobService.getPermanentBlobService()),
			new TestingTaskManagerRuntimeInfo(),
			UnregisteredMetricGroups.createUnregisteredTaskMetricGroup(),
			new NoOpResultPartitionConsumableNotifier(),
			mock(PartitionProducerStateChecker.class),
			Executors.directExecutor());
}
 
Example #9
Source File: JobManagerSharedServices.java    From flink with Apache License 2.0 4 votes vote down vote up
public static JobManagerSharedServices fromConfiguration(
		Configuration config,
		BlobServer blobServer) throws Exception {

	checkNotNull(config);
	checkNotNull(blobServer);

	final String classLoaderResolveOrder =
		config.getString(CoreOptions.CLASSLOADER_RESOLVE_ORDER);

	final String[] alwaysParentFirstLoaderPatterns = CoreOptions.getParentFirstLoaderPatterns(config);

	final BlobLibraryCacheManager libraryCacheManager =
		new BlobLibraryCacheManager(
			blobServer,
			FlinkUserCodeClassLoaders.ResolveOrder.fromString(classLoaderResolveOrder),
			alwaysParentFirstLoaderPatterns);

	final FiniteDuration timeout;
	try {
		timeout = AkkaUtils.getTimeout(config);
	} catch (NumberFormatException e) {
		throw new IllegalConfigurationException(AkkaUtils.formatDurationParsingErrorMessage());
	}

	final ScheduledExecutorService futureExecutor = Executors.newScheduledThreadPool(
			Hardware.getNumberCPUCores(),
			new ExecutorThreadFactory("jobmanager-future"));

	final StackTraceSampleCoordinator stackTraceSampleCoordinator =
		new StackTraceSampleCoordinator(futureExecutor, timeout.toMillis());
	final int cleanUpInterval = config.getInteger(WebOptions.BACKPRESSURE_CLEANUP_INTERVAL);
	final BackPressureStatsTrackerImpl backPressureStatsTracker = new BackPressureStatsTrackerImpl(
		stackTraceSampleCoordinator,
		cleanUpInterval,
		config.getInteger(WebOptions.BACKPRESSURE_NUM_SAMPLES),
		config.getInteger(WebOptions.BACKPRESSURE_REFRESH_INTERVAL),
		Time.milliseconds(config.getInteger(WebOptions.BACKPRESSURE_DELAY)));

	futureExecutor.scheduleWithFixedDelay(
		backPressureStatsTracker::cleanUpOperatorStatsCache,
		cleanUpInterval,
		cleanUpInterval,
		TimeUnit.MILLISECONDS);

	return new JobManagerSharedServices(
		futureExecutor,
		libraryCacheManager,
		RestartStrategyFactory.createRestartStrategyFactory(config),
		stackTraceSampleCoordinator,
		backPressureStatsTracker,
		blobServer);
}
 
Example #10
Source File: JvmExitOnFatalErrorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

			System.err.println("creating task");

			// we suppress process exits via errors here to not
			// have a test that exits accidentally due to a programming error
			try {
				final Configuration taskManagerConfig = new Configuration();
				taskManagerConfig.setBoolean(TaskManagerOptions.KILL_ON_OUT_OF_MEMORY, true);

				final JobID jid = new JobID();
				final AllocationID allocationID = new AllocationID();
				final JobVertexID jobVertexId = new JobVertexID();
				final ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
				final AllocationID slotAllocationId = new AllocationID();

				final SerializedValue<ExecutionConfig> execConfig = new SerializedValue<>(new ExecutionConfig());

				final JobInformation jobInformation = new JobInformation(
						jid, "Test Job", execConfig, new Configuration(),
						Collections.emptyList(), Collections.emptyList());

				final TaskInformation taskInformation = new TaskInformation(
						jobVertexId, "Test Task", 1, 1, OomInvokable.class.getName(), new Configuration());

				final MemoryManager memoryManager = new MemoryManager(1024 * 1024, 1);
				final IOManager ioManager = new IOManagerAsync();

				final NetworkEnvironment networkEnvironment = mock(NetworkEnvironment.class);
				when(networkEnvironment.createKvStateTaskRegistry(jid, jobVertexId)).thenReturn(mock(TaskKvStateRegistry.class));
				TaskEventDispatcher taskEventDispatcher = mock(TaskEventDispatcher.class);
				when(networkEnvironment.getTaskEventDispatcher()).thenReturn(taskEventDispatcher);

				final TaskManagerRuntimeInfo tmInfo = TaskManagerConfiguration.fromConfiguration(taskManagerConfig);

				final Executor executor = Executors.newCachedThreadPool();

				BlobCacheService blobService =
					new BlobCacheService(mock(PermanentBlobCache.class), mock(TransientBlobCache.class));

				final TaskLocalStateStore localStateStore =
					new TaskLocalStateStoreImpl(
						jid,
						allocationID,
						jobVertexId,
						0,
						TestLocalRecoveryConfig.disabled(),
						executor);

				final TaskStateManager slotStateManager =
					new TaskStateManagerImpl(
						jid,
						executionAttemptID,
						localStateStore,
						null,
						mock(CheckpointResponder.class));

				Task task = new Task(
						jobInformation,
						taskInformation,
						executionAttemptID,
						slotAllocationId,
						0,       // subtaskIndex
						0,       // attemptNumber
						Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
						Collections.<InputGateDeploymentDescriptor>emptyList(),
						0,       // targetSlotNumber
						memoryManager,
						ioManager,
						networkEnvironment,
						new BroadcastVariableManager(),
						slotStateManager,
						new NoOpTaskManagerActions(),
						new NoOpInputSplitProvider(),
						new NoOpCheckpointResponder(),
						new TestGlobalAggregateManager(),
						blobService,
						new BlobLibraryCacheManager(
							blobService.getPermanentBlobService(),
							FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
							new String[0]),
						new FileCache(tmInfo.getTmpDirectories(), blobService.getPermanentBlobService()),
						tmInfo,
						UnregisteredMetricGroups.createUnregisteredTaskMetricGroup(),
						new NoOpResultPartitionConsumableNotifier(),
						new NoOpPartitionProducerStateChecker(),
						executor);

				System.err.println("starting task thread");

				task.startTaskThread();
			}
			catch (Throwable t) {
				System.err.println("ERROR STARTING TASK");
				t.printStackTrace();
			}

			System.err.println("parking the main thread");
			CommonTestUtils.blockForeverNonInterruptibly();
		}
 
Example #11
Source File: TaskTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testExecutionFailsInBlobsMissing() throws Exception {
	final PermanentBlobKey missingKey = new PermanentBlobKey();

	final Configuration config = new Configuration();
	config.setString(BlobServerOptions.STORAGE_DIRECTORY,
		TEMPORARY_FOLDER.newFolder().getAbsolutePath());
	config.setLong(BlobServerOptions.CLEANUP_INTERVAL, 1L);

	final BlobServer blobServer = new BlobServer(config, new VoidBlobStore());
	blobServer.start();
	InetSocketAddress serverAddress = new InetSocketAddress("localhost", blobServer.getPort());
	final PermanentBlobCache permanentBlobCache = new PermanentBlobCache(config, new VoidBlobStore(), serverAddress);

	final BlobLibraryCacheManager libraryCacheManager =
		new BlobLibraryCacheManager(
			permanentBlobCache,
			FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
			new String[0]);

	final Task task = createTaskBuilder()
		.setRequiredJarFileBlobKeys(Collections.singletonList(missingKey))
		.setLibraryCacheManager(libraryCacheManager)
		.build();

	// task should be new and perfect
	assertEquals(ExecutionState.CREATED, task.getExecutionState());
	assertFalse(task.isCanceledOrFailed());
	assertNull(task.getFailureCause());

	// should fail
	task.run();

	// verify final state
	assertEquals(ExecutionState.FAILED, task.getExecutionState());
	assertTrue(task.isCanceledOrFailed());
	assertNotNull(task.getFailureCause());
	assertNotNull(task.getFailureCause().getMessage());
	assertTrue(task.getFailureCause().getMessage().contains("Failed to fetch BLOB"));

	assertNull(task.getInvokable());
}
 
Example #12
Source File: TaskTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testExecutionFailsInBlobsMissing() throws Exception {
	final PermanentBlobKey missingKey = new PermanentBlobKey();

	final Configuration config = new Configuration();
	config.setString(BlobServerOptions.STORAGE_DIRECTORY,
		TEMPORARY_FOLDER.newFolder().getAbsolutePath());
	config.setLong(BlobServerOptions.CLEANUP_INTERVAL, 1L);

	final BlobServer blobServer = new BlobServer(config, new VoidBlobStore());
	blobServer.start();
	InetSocketAddress serverAddress = new InetSocketAddress("localhost", blobServer.getPort());
	final PermanentBlobCache permanentBlobCache = new PermanentBlobCache(config, new VoidBlobStore(), serverAddress);

	final BlobLibraryCacheManager libraryCacheManager =
		new BlobLibraryCacheManager(
			permanentBlobCache,
			FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
			new String[0]);

	final Task task = new TaskBuilder()
		.setRequiredJarFileBlobKeys(Collections.singletonList(missingKey))
		.setLibraryCacheManager(libraryCacheManager)
		.build();

	// task should be new and perfect
	assertEquals(ExecutionState.CREATED, task.getExecutionState());
	assertFalse(task.isCanceledOrFailed());
	assertNull(task.getFailureCause());

	// should fail
	task.run();

	// verify final state
	assertEquals(ExecutionState.FAILED, task.getExecutionState());
	assertTrue(task.isCanceledOrFailed());
	assertNotNull(task.getFailureCause());
	assertNotNull(task.getFailureCause().getMessage());
	assertTrue(task.getFailureCause().getMessage().contains("Failed to fetch BLOB"));

	assertNull(task.getInvokable());
}
 
Example #13
Source File: JobManagerSharedServices.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static JobManagerSharedServices fromConfiguration(
		Configuration config,
		BlobServer blobServer) throws Exception {

	checkNotNull(config);
	checkNotNull(blobServer);

	final String classLoaderResolveOrder =
		config.getString(CoreOptions.CLASSLOADER_RESOLVE_ORDER);

	final String[] alwaysParentFirstLoaderPatterns = CoreOptions.getParentFirstLoaderPatterns(config);

	final BlobLibraryCacheManager libraryCacheManager =
		new BlobLibraryCacheManager(
			blobServer,
			FlinkUserCodeClassLoaders.ResolveOrder.fromString(classLoaderResolveOrder),
			alwaysParentFirstLoaderPatterns);

	final FiniteDuration timeout;
	try {
		timeout = AkkaUtils.getTimeout(config);
	} catch (NumberFormatException e) {
		throw new IllegalConfigurationException(AkkaUtils.formatDurationParsingErrorMessage());
	}

	final ScheduledExecutorService futureExecutor = Executors.newScheduledThreadPool(
			Hardware.getNumberCPUCores(),
			new ExecutorThreadFactory("jobmanager-future"));

	final StackTraceSampleCoordinator stackTraceSampleCoordinator =
		new StackTraceSampleCoordinator(futureExecutor, timeout.toMillis());
	final int cleanUpInterval = config.getInteger(WebOptions.BACKPRESSURE_CLEANUP_INTERVAL);
	final BackPressureStatsTrackerImpl backPressureStatsTracker = new BackPressureStatsTrackerImpl(
		stackTraceSampleCoordinator,
		cleanUpInterval,
		config.getInteger(WebOptions.BACKPRESSURE_NUM_SAMPLES),
		config.getInteger(WebOptions.BACKPRESSURE_REFRESH_INTERVAL),
		Time.milliseconds(config.getInteger(WebOptions.BACKPRESSURE_DELAY)));

	futureExecutor.scheduleWithFixedDelay(
		backPressureStatsTracker::cleanUpOperatorStatsCache,
		cleanUpInterval,
		cleanUpInterval,
		TimeUnit.MILLISECONDS);

	return new JobManagerSharedServices(
		futureExecutor,
		libraryCacheManager,
		RestartStrategyFactory.createRestartStrategyFactory(config),
		stackTraceSampleCoordinator,
		backPressureStatsTracker,
		blobServer);
}
 
Example #14
Source File: JvmExitOnFatalErrorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

			System.err.println("creating task");

			// we suppress process exits via errors here to not
			// have a test that exits accidentally due to a programming error
			try {
				final Configuration taskManagerConfig = new Configuration();
				taskManagerConfig.setBoolean(TaskManagerOptions.KILL_ON_OUT_OF_MEMORY, true);

				final JobID jid = new JobID();
				final AllocationID allocationID = new AllocationID();
				final JobVertexID jobVertexId = new JobVertexID();
				final ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
				final AllocationID slotAllocationId = new AllocationID();

				final SerializedValue<ExecutionConfig> execConfig = new SerializedValue<>(new ExecutionConfig());

				final JobInformation jobInformation = new JobInformation(
						jid, "Test Job", execConfig, new Configuration(),
						Collections.emptyList(), Collections.emptyList());

				final TaskInformation taskInformation = new TaskInformation(
						jobVertexId, "Test Task", 1, 1, OomInvokable.class.getName(), new Configuration());

				final MemoryManager memoryManager = new MemoryManager(1024 * 1024, 1);
				final IOManager ioManager = new IOManagerAsync();

				final ShuffleEnvironment<?, ?> shuffleEnvironment = new NettyShuffleEnvironmentBuilder().build();

				final TaskManagerRuntimeInfo tmInfo = TaskManagerConfiguration.fromConfiguration(taskManagerConfig);

				final Executor executor = Executors.newCachedThreadPool();

				BlobCacheService blobService =
					new BlobCacheService(mock(PermanentBlobCache.class), mock(TransientBlobCache.class));

				final TaskLocalStateStore localStateStore =
					new TaskLocalStateStoreImpl(
						jid,
						allocationID,
						jobVertexId,
						0,
						TestLocalRecoveryConfig.disabled(),
						executor);

				final TaskStateManager slotStateManager =
					new TaskStateManagerImpl(
						jid,
						executionAttemptID,
						localStateStore,
						null,
						mock(CheckpointResponder.class));

				Task task = new Task(
						jobInformation,
						taskInformation,
						executionAttemptID,
						slotAllocationId,
						0,       // subtaskIndex
						0,       // attemptNumber
						Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
						Collections.<InputGateDeploymentDescriptor>emptyList(),
						0,       // targetSlotNumber
						memoryManager,
						ioManager,
						shuffleEnvironment,
						new KvStateService(new KvStateRegistry(), null, null),
						new BroadcastVariableManager(),
						new TaskEventDispatcher(),
						slotStateManager,
						new NoOpTaskManagerActions(),
						new NoOpInputSplitProvider(),
						new NoOpCheckpointResponder(),
						new TestGlobalAggregateManager(),
						blobService,
						new BlobLibraryCacheManager(
							blobService.getPermanentBlobService(),
							FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
							new String[0]),
						new FileCache(tmInfo.getTmpDirectories(), blobService.getPermanentBlobService()),
						tmInfo,
						UnregisteredMetricGroups.createUnregisteredTaskMetricGroup(),
						new NoOpResultPartitionConsumableNotifier(),
						new NoOpPartitionProducerStateChecker(),
						executor);

				System.err.println("starting task thread");

				task.startTaskThread();
			}
			catch (Throwable t) {
				System.err.println("ERROR STARTING TASK");
				t.printStackTrace();
			}

			System.err.println("parking the main thread");
			CommonTestUtils.blockForeverNonInterruptibly();
		}
 
Example #15
Source File: StreamTaskTerminationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * FLINK-6833
 *
 * <p>Tests that a finished stream task cannot be failed by an asynchronous checkpointing operation after
 * the stream task has stopped running.
 */
@Test
public void testConcurrentAsyncCheckpointCannotFailFinishedStreamTask() throws Exception {
	final Configuration taskConfiguration = new Configuration();
	final StreamConfig streamConfig = new StreamConfig(taskConfiguration);
	final NoOpStreamOperator<Long> noOpStreamOperator = new NoOpStreamOperator<>();

	final StateBackend blockingStateBackend = new BlockingStateBackend();

	streamConfig.setStreamOperator(noOpStreamOperator);
	streamConfig.setOperatorID(new OperatorID());
	streamConfig.setStateBackend(blockingStateBackend);

	final long checkpointId = 0L;
	final long checkpointTimestamp = 0L;

	final JobInformation jobInformation = new JobInformation(
		new JobID(),
		"Test Job",
		new SerializedValue<>(new ExecutionConfig()),
		new Configuration(),
		Collections.emptyList(),
		Collections.emptyList());

	final TaskInformation taskInformation = new TaskInformation(
		new JobVertexID(),
		"Test Task",
		1,
		1,
		BlockingStreamTask.class.getName(),
		taskConfiguration);

	final TaskManagerRuntimeInfo taskManagerRuntimeInfo = new TestingTaskManagerRuntimeInfo();

	final ShuffleEnvironment<?, ?> shuffleEnvironment = new NettyShuffleEnvironmentBuilder().build();

	BlobCacheService blobService =
		new BlobCacheService(mock(PermanentBlobCache.class), mock(TransientBlobCache.class));

	final Task task = new Task(
		jobInformation,
		taskInformation,
		new ExecutionAttemptID(),
		new AllocationID(),
		0,
		0,
		Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
		Collections.<InputGateDeploymentDescriptor>emptyList(),
		0,
		new MemoryManager(32L * 1024L, 1),
		new IOManagerAsync(),
		shuffleEnvironment,
		new KvStateService(new KvStateRegistry(), null, null),
		mock(BroadcastVariableManager.class),
		new TaskEventDispatcher(),
		new TestTaskStateManager(),
		mock(TaskManagerActions.class),
		mock(InputSplitProvider.class),
		mock(CheckpointResponder.class),
		new TestGlobalAggregateManager(),
		blobService,
		new BlobLibraryCacheManager(
			blobService.getPermanentBlobService(),
			FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
			new String[0]),
		mock(FileCache.class),
		taskManagerRuntimeInfo,
		UnregisteredMetricGroups.createUnregisteredTaskMetricGroup(),
		new NoOpResultPartitionConsumableNotifier(),
		mock(PartitionProducerStateChecker.class),
		Executors.directExecutor());

	CompletableFuture<Void> taskRun = CompletableFuture.runAsync(
		() -> task.run(),
		TestingUtils.defaultExecutor());

	// wait until the stream task started running
	RUN_LATCH.await();

	// trigger a checkpoint
	task.triggerCheckpointBarrier(checkpointId, checkpointTimestamp, CheckpointOptions.forCheckpointWithDefaultLocation(), false);

	// wait until the task has completed execution
	taskRun.get();

	// check that no failure occurred
	if (task.getFailureCause() != null) {
		throw new Exception("Task failed", task.getFailureCause());
	}

	// check that we have entered the finished state
	assertEquals(ExecutionState.FINISHED, task.getExecutionState());
}
 
Example #16
Source File: TaskCheckpointingBehaviourTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private static Task createTask(
	StreamOperator<?> op,
	StateBackend backend,
	CheckpointResponder checkpointResponder) throws IOException {

	Configuration taskConfig = new Configuration();
	StreamConfig cfg = new StreamConfig(taskConfig);
	cfg.setStreamOperator(op);
	cfg.setOperatorID(new OperatorID());
	cfg.setStateBackend(backend);

	ExecutionConfig executionConfig = new ExecutionConfig();

	JobInformation jobInformation = new JobInformation(
			new JobID(),
			"test job name",
			new SerializedValue<>(executionConfig),
			new Configuration(),
			Collections.emptyList(),
			Collections.emptyList());

	TaskInformation taskInformation = new TaskInformation(
			new JobVertexID(),
			"test task name",
			1,
			11,
			TestStreamTask.class.getName(),
			taskConfig);

	ShuffleEnvironment<?, ?> shuffleEnvironment = new NettyShuffleEnvironmentBuilder().build();

	BlobCacheService blobService =
		new BlobCacheService(mock(PermanentBlobCache.class), mock(TransientBlobCache.class));

	return new Task(
			jobInformation,
			taskInformation,
			new ExecutionAttemptID(),
			new AllocationID(),
			0,
			0,
			Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
			Collections.<InputGateDeploymentDescriptor>emptyList(),
			0,
			mock(MemoryManager.class),
			mock(IOManager.class),
			shuffleEnvironment,
			new KvStateService(new KvStateRegistry(), null, null),
			mock(BroadcastVariableManager.class),
			new TaskEventDispatcher(),
			new TestTaskStateManager(),
			mock(TaskManagerActions.class),
			mock(InputSplitProvider.class),
			checkpointResponder,
			new TestGlobalAggregateManager(),
			blobService,
			new BlobLibraryCacheManager(
				blobService.getPermanentBlobService(),
				FlinkUserCodeClassLoaders.ResolveOrder.CHILD_FIRST,
				new String[0]),
			new FileCache(new String[] { EnvironmentInformation.getTemporaryFileDirectory() },
				blobService.getPermanentBlobService()),
			new TestingTaskManagerRuntimeInfo(),
			UnregisteredMetricGroups.createUnregisteredTaskMetricGroup(),
			new NoOpResultPartitionConsumableNotifier(),
			mock(PartitionProducerStateChecker.class),
			Executors.directExecutor());
}
 
Example #17
Source File: JobManagerSharedServices.java    From flink with Apache License 2.0 4 votes vote down vote up
public static JobManagerSharedServices fromConfiguration(
		Configuration config,
		BlobServer blobServer,
		FatalErrorHandler fatalErrorHandler) {

	checkNotNull(config);
	checkNotNull(blobServer);

	final String classLoaderResolveOrder =
		config.getString(CoreOptions.CLASSLOADER_RESOLVE_ORDER);

	final String[] alwaysParentFirstLoaderPatterns = CoreOptions.getParentFirstLoaderPatterns(config);

	final boolean failOnJvmMetaspaceOomError = config.getBoolean(CoreOptions.FAIL_ON_USER_CLASS_LOADING_METASPACE_OOM);
	final BlobLibraryCacheManager libraryCacheManager =
		new BlobLibraryCacheManager(
			blobServer,
			BlobLibraryCacheManager.defaultClassLoaderFactory(
				FlinkUserCodeClassLoaders.ResolveOrder.fromString(classLoaderResolveOrder),
				alwaysParentFirstLoaderPatterns,
				failOnJvmMetaspaceOomError ? fatalErrorHandler : null));

	final Duration akkaTimeout;
	try {
		akkaTimeout = AkkaUtils.getTimeout(config);
	} catch (NumberFormatException e) {
		throw new IllegalConfigurationException(AkkaUtils.formatDurationParsingErrorMessage());
	}

	final ScheduledExecutorService futureExecutor = Executors.newScheduledThreadPool(
			Hardware.getNumberCPUCores(),
			new ExecutorThreadFactory("jobmanager-future"));

	final int numSamples = config.getInteger(WebOptions.BACKPRESSURE_NUM_SAMPLES);
	final long delayBetweenSamples = config.getInteger(WebOptions.BACKPRESSURE_DELAY);
	final BackPressureRequestCoordinator coordinator = new BackPressureRequestCoordinator(
		futureExecutor,
		akkaTimeout.toMillis() + numSamples * delayBetweenSamples);

	final int cleanUpInterval = config.getInteger(WebOptions.BACKPRESSURE_CLEANUP_INTERVAL);
	final BackPressureStatsTrackerImpl backPressureStatsTracker = new BackPressureStatsTrackerImpl(
		coordinator,
		cleanUpInterval,
		config.getInteger(WebOptions.BACKPRESSURE_REFRESH_INTERVAL));

	futureExecutor.scheduleWithFixedDelay(
		backPressureStatsTracker::cleanUpOperatorStatsCache,
		cleanUpInterval,
		cleanUpInterval,
		TimeUnit.MILLISECONDS);

	return new JobManagerSharedServices(
		futureExecutor,
		libraryCacheManager,
		coordinator,
		backPressureStatsTracker,
		blobServer);
}
 
Example #18
Source File: TaskManagerServices.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Creates and returns the task manager services.
 *
 * @param taskManagerServicesConfiguration task manager configuration
 * @param permanentBlobService permanentBlobService used by the services
 * @param taskManagerMetricGroup metric group of the task manager
 * @param ioExecutor executor for async IO operations
 * @param fatalErrorHandler to handle class loading OOMs
 * @return task manager components
 * @throws Exception
 */
public static TaskManagerServices fromConfiguration(
		TaskManagerServicesConfiguration taskManagerServicesConfiguration,
		PermanentBlobService permanentBlobService,
		MetricGroup taskManagerMetricGroup,
		ExecutorService ioExecutor,
		FatalErrorHandler fatalErrorHandler) throws Exception {

	// pre-start checks
	checkTempDirs(taskManagerServicesConfiguration.getTmpDirPaths());

	final TaskEventDispatcher taskEventDispatcher = new TaskEventDispatcher();

	// start the I/O manager, it will create some temp directories.
	final IOManager ioManager = new IOManagerAsync(taskManagerServicesConfiguration.getTmpDirPaths());

	final ShuffleEnvironment<?, ?> shuffleEnvironment = createShuffleEnvironment(
		taskManagerServicesConfiguration,
		taskEventDispatcher,
		taskManagerMetricGroup,
		ioExecutor);
	final int listeningDataPort = shuffleEnvironment.start();

	final KvStateService kvStateService = KvStateService.fromConfiguration(taskManagerServicesConfiguration);
	kvStateService.start();

	final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new UnresolvedTaskManagerLocation(
		taskManagerServicesConfiguration.getResourceID(),
		taskManagerServicesConfiguration.getExternalAddress(),
		// we expose the task manager location with the listening port
		// iff the external data port is not explicitly defined
		taskManagerServicesConfiguration.getExternalDataPort() > 0 ?
			taskManagerServicesConfiguration.getExternalDataPort() :
			listeningDataPort);

	final BroadcastVariableManager broadcastVariableManager = new BroadcastVariableManager();

	final TaskSlotTable<Task> taskSlotTable = createTaskSlotTable(
		taskManagerServicesConfiguration.getNumberOfSlots(),
		taskManagerServicesConfiguration.getTaskExecutorResourceSpec(),
		taskManagerServicesConfiguration.getTimerServiceShutdownTimeout(),
		taskManagerServicesConfiguration.getPageSize());

	final JobTable jobTable = DefaultJobTable.create();

	final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, taskManagerServicesConfiguration.getRetryingRegistrationConfiguration());

	final String[] stateRootDirectoryStrings = taskManagerServicesConfiguration.getLocalRecoveryStateRootDirectories();

	final File[] stateRootDirectoryFiles = new File[stateRootDirectoryStrings.length];

	for (int i = 0; i < stateRootDirectoryStrings.length; ++i) {
		stateRootDirectoryFiles[i] = new File(stateRootDirectoryStrings[i], LOCAL_STATE_SUB_DIRECTORY_ROOT);
	}

	final TaskExecutorLocalStateStoresManager taskStateManager = new TaskExecutorLocalStateStoresManager(
		taskManagerServicesConfiguration.isLocalRecoveryEnabled(),
		stateRootDirectoryFiles,
		ioExecutor);

	final boolean failOnJvmMetaspaceOomError =
		taskManagerServicesConfiguration.getConfiguration().getBoolean(CoreOptions.FAIL_ON_USER_CLASS_LOADING_METASPACE_OOM);
	final LibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager(
		permanentBlobService,
		BlobLibraryCacheManager.defaultClassLoaderFactory(
			taskManagerServicesConfiguration.getClassLoaderResolveOrder(),
			taskManagerServicesConfiguration.getAlwaysParentFirstLoaderPatterns(),
			failOnJvmMetaspaceOomError ? fatalErrorHandler : null));

	return new TaskManagerServices(
		unresolvedTaskManagerLocation,
		taskManagerServicesConfiguration.getManagedMemorySize().getBytes(),
		ioManager,
		shuffleEnvironment,
		kvStateService,
		broadcastVariableManager,
		taskSlotTable,
		jobTable,
		jobLeaderService,
		taskStateManager,
		taskEventDispatcher,
		ioExecutor,
		libraryCacheManager);
}