org.apache.flink.runtime.jobmaster.JobMasterGateway Java Examples

The following examples show how to use org.apache.flink.runtime.jobmaster.JobMasterGateway. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ResourceManager.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * This method should be called by the framework once it detects that a currently registered
 * job manager has failed.
 *
 * @param jobId identifying the job whose leader shall be disconnected.
 * @param cause The exception which cause the JobManager failed.
 */
protected void closeJobManagerConnection(JobID jobId, Exception cause) {
	JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.remove(jobId);

	if (jobManagerRegistration != null) {
		final ResourceID jobManagerResourceId = jobManagerRegistration.getJobManagerResourceID();
		final JobMasterGateway jobMasterGateway = jobManagerRegistration.getJobManagerGateway();
		final JobMasterId jobMasterId = jobManagerRegistration.getJobMasterId();

		log.info("Disconnect job manager {}@{} for job {} from the resource manager.",
			jobMasterId,
			jobMasterGateway.getAddress(),
			jobId);

		jobManagerHeartbeatManager.unmonitorTarget(jobManagerResourceId);

		jmResourceIdRegistrations.remove(jobManagerResourceId);

		// tell the job manager about the disconnect
		jobMasterGateway.disconnectResourceManager(getFencingToken(), cause);
	} else {
		log.debug("There was no registered job manager for job {}.", jobId);
	}
}
 
Example #2
Source File: Dispatcher.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Nonnull
private <T> List<CompletableFuture<Optional<T>>> queryJobMastersForInformation(Function<JobMasterGateway, CompletableFuture<T>> queryFunction) {
	final int numberJobsRunning = jobManagerRunnerFutures.size();

	ArrayList<CompletableFuture<Optional<T>>> optionalJobInformation = new ArrayList<>(
		numberJobsRunning);

	for (JobID jobId : jobManagerRunnerFutures.keySet()) {
		final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId);

		final CompletableFuture<Optional<T>> optionalRequest = jobMasterGatewayFuture
			.thenCompose(queryFunction::apply)
			.handle((T value, Throwable throwable) -> Optional.ofNullable(value));

		optionalJobInformation.add(optionalRequest);
	}
	return optionalJobInformation;
}
 
Example #3
Source File: Dispatcher.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<JobStatus> requestJobStatus(JobID jobId, Time timeout) {

	final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId);

	final CompletableFuture<JobStatus> jobStatusFuture = jobMasterGatewayFuture.thenCompose(
		(JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobStatus(timeout));

	return jobStatusFuture.exceptionally(
		(Throwable throwable) -> {
			final JobDetails jobDetails = archivedExecutionGraphStore.getAvailableJobDetails(jobId);

			// check whether it is a completed job
			if (jobDetails == null) {
				throw new CompletionException(ExceptionUtils.stripCompletionException(throwable));
			} else {
				return jobDetails.getStatus();
			}
		});
}
 
Example #4
Source File: TaskExecutor.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private void disassociateFromJobManager(JobManagerConnection jobManagerConnection, Exception cause) throws IOException {
	checkNotNull(jobManagerConnection);

	final KvStateRegistry kvStateRegistry = networkEnvironment.getKvStateRegistry();

	if (kvStateRegistry != null) {
		kvStateRegistry.unregisterListener(jobManagerConnection.getJobID());
	}

	final KvStateClientProxy kvStateClientProxy = networkEnvironment.getKvStateProxy();

	if (kvStateClientProxy != null) {
		kvStateClientProxy.updateKvStateLocationOracle(jobManagerConnection.getJobID(), null);
	}

	JobMasterGateway jobManagerGateway = jobManagerConnection.getJobManagerGateway();
	jobManagerGateway.disconnectTaskManager(getResourceID(), cause);
	jobManagerConnection.getLibraryCacheManager().shutdown();
}
 
Example #5
Source File: Dispatcher.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<ClusterOverview> requestClusterOverview(Time timeout) {
	CompletableFuture<ResourceOverview> taskManagerOverviewFuture = runResourceManagerCommand(resourceManagerGateway -> resourceManagerGateway.requestResourceOverview(timeout));

	final List<CompletableFuture<Optional<JobStatus>>> optionalJobInformation = queryJobMastersForInformation(
		(JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobStatus(timeout));

	CompletableFuture<Collection<Optional<JobStatus>>> allOptionalJobsFuture = FutureUtils.combineAll(optionalJobInformation);

	CompletableFuture<Collection<JobStatus>> allJobsFuture = allOptionalJobsFuture.thenApply(this::flattenOptionalCollection);

	final JobsOverview completedJobsOverview = archivedExecutionGraphStore.getStoredJobsOverview();

	return allJobsFuture.thenCombine(
		taskManagerOverviewFuture,
		(Collection<JobStatus> runningJobsStatus, ResourceOverview resourceOverview) -> {
			final JobsOverview allJobsOverview = JobsOverview.create(runningJobsStatus).combine(completedJobsOverview);
			return new ClusterOverview(resourceOverview, allJobsOverview);
		});
}
 
Example #6
Source File: TaskExecutor.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private void failNoLongerAllocatedSlots(AllocatedSlotReport allocatedSlotReport, JobMasterGateway jobMasterGateway) {
	for (AllocatedSlotInfo allocatedSlotInfo : allocatedSlotReport.getAllocatedSlotInfos()) {
		final AllocationID allocationId = allocatedSlotInfo.getAllocationId();
		if (!taskSlotTable.isAllocated(
				allocatedSlotInfo.getSlotIndex(),
				allocatedSlotReport.getJobId(),
				allocationId)) {
			jobMasterGateway.failSlot(
					getResourceID(),
					allocationId,
					new FlinkException(
						String.format(
							"Slot %s on TaskExecutor %s is not allocated by job %s.",
							allocatedSlotInfo.getSlotIndex(),
							getResourceID(),
							allocatedSlotReport.getJobId())));
		}
	}
}
 
Example #7
Source File: DefaultJobLeaderService.java    From flink with Apache License 2.0 6 votes vote down vote up
JobManagerRetryingRegistration(
		Logger log,
		RpcService rpcService,
		String targetName,
		Class<JobMasterGateway> targetType,
		String targetAddress,
		JobMasterId jobMasterId,
		RetryingRegistrationConfiguration retryingRegistrationConfiguration,
		String taskManagerRpcAddress,
		UnresolvedTaskManagerLocation unresolvedTaskManagerLocation) {
	super(
		log,
		rpcService,
		targetName,
		targetType,
		targetAddress,
		jobMasterId,
		retryingRegistrationConfiguration);

	this.taskManagerRpcAddress = taskManagerRpcAddress;
	this.unresolvedTaskManagerLocation = Preconditions.checkNotNull(unresolvedTaskManagerLocation);
}
 
Example #8
Source File: Dispatcher.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<ClusterOverview> requestClusterOverview(Time timeout) {
	CompletableFuture<ResourceOverview> taskManagerOverviewFuture = runResourceManagerCommand(resourceManagerGateway -> resourceManagerGateway.requestResourceOverview(timeout));

	final List<CompletableFuture<Optional<JobStatus>>> optionalJobInformation = queryJobMastersForInformation(
		(JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobStatus(timeout));

	CompletableFuture<Collection<Optional<JobStatus>>> allOptionalJobsFuture = FutureUtils.combineAll(optionalJobInformation);

	CompletableFuture<Collection<JobStatus>> allJobsFuture = allOptionalJobsFuture.thenApply(this::flattenOptionalCollection);

	final JobsOverview completedJobsOverview = archivedExecutionGraphStore.getStoredJobsOverview();

	return allJobsFuture.thenCombine(
		taskManagerOverviewFuture,
		(Collection<JobStatus> runningJobsStatus, ResourceOverview resourceOverview) -> {
			final JobsOverview allJobsOverview = JobsOverview.create(runningJobsStatus).combine(completedJobsOverview);
			return new ClusterOverview(resourceOverview, allJobsOverview);
		});
}
 
Example #9
Source File: JobLeaderService.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public void reconnect() {
	if (stopped) {
		LOG.debug("Cannot reconnect because the JobManagerLeaderListener has already been stopped.");
	} else {
		final RegisteredRpcConnection<JobMasterId, JobMasterGateway, JMTMRegistrationSuccess> currentRpcConnection = rpcConnection;

		if (currentRpcConnection != null) {
			if (currentRpcConnection.isConnected()) {

				if (currentRpcConnection.tryReconnect()) {
					// double check for concurrent stop operation
					if (stopped) {
						currentRpcConnection.close();
					}
				} else {
					LOG.debug("Could not reconnect to the JobMaster {}.", currentRpcConnection.getTargetAddress());
				}
			} else {
				LOG.debug("Ongoing registration to JobMaster {}.", currentRpcConnection.getTargetAddress());
			}
		} else {
			LOG.debug("Cannot reconnect to an unknown JobMaster.");
		}
	}
}
 
Example #10
Source File: JobManagerConnection.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public JobManagerConnection(
			JobID jobID,
			ResourceID resourceID,
			JobMasterGateway jobMasterGateway,
			TaskManagerActions taskManagerActions,
			CheckpointResponder checkpointResponder,
			GlobalAggregateManager aggregateManager,
			LibraryCacheManager libraryCacheManager,
			ResultPartitionConsumableNotifier resultPartitionConsumableNotifier,
			PartitionProducerStateChecker partitionStateChecker) {
	this.jobID = Preconditions.checkNotNull(jobID);
	this.resourceID = Preconditions.checkNotNull(resourceID);
	this.jobMasterGateway = Preconditions.checkNotNull(jobMasterGateway);
	this.taskManagerActions = Preconditions.checkNotNull(taskManagerActions);
	this.checkpointResponder = Preconditions.checkNotNull(checkpointResponder);
	this.aggregateManager = Preconditions.checkNotNull(aggregateManager);
	this.libraryCacheManager = Preconditions.checkNotNull(libraryCacheManager);
	this.resultPartitionConsumableNotifier = Preconditions.checkNotNull(resultPartitionConsumableNotifier);
	this.partitionStateChecker = Preconditions.checkNotNull(partitionStateChecker);
}
 
Example #11
Source File: TaskSubmissionTestEnvironment.java    From flink with Apache License 2.0 6 votes vote down vote up
static void registerJobMasterConnection(
		JobTable jobTable,
		JobID jobId,
		RpcService testingRpcService,
		JobMasterGateway jobMasterGateway,
		TaskManagerActions taskManagerActions,
		Time timeout,
		MainThreadExecutable mainThreadExecutable) {
	mainThreadExecutable.runAsync(() -> {
		final JobTable.Job job = jobTable.getOrCreateJob(jobId, () -> TestingJobServices.newBuilder().build());
		job.connect(
			ResourceID.generate(),
			jobMasterGateway,
			taskManagerActions,
			new TestCheckpointResponder(),
			new TestGlobalAggregateManager(),
			new RpcResultPartitionConsumableNotifier(jobMasterGateway, testingRpcService.getExecutor(), timeout),
			TestingPartitionProducerStateChecker.newBuilder()
				.setPartitionProducerStateFunction((jobID, intermediateDataSetID, resultPartitionID) -> CompletableFuture.completedFuture(ExecutionState.RUNNING))
				.build());
	});
}
 
Example #12
Source File: ResourceManager.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This method should be called by the framework once it detects that a currently registered
 * job manager has failed.
 *
 * @param jobId identifying the job whose leader shall be disconnected.
 * @param cause The exception which cause the JobManager failed.
 */
protected void closeJobManagerConnection(JobID jobId, Exception cause) {
	JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.remove(jobId);

	if (jobManagerRegistration != null) {
		final ResourceID jobManagerResourceId = jobManagerRegistration.getJobManagerResourceID();
		final JobMasterGateway jobMasterGateway = jobManagerRegistration.getJobManagerGateway();
		final JobMasterId jobMasterId = jobManagerRegistration.getJobMasterId();

		log.info("Disconnect job manager {}@{} for job {} from the resource manager.",
			jobMasterId,
			jobMasterGateway.getAddress(),
			jobId);

		jobManagerHeartbeatManager.unmonitorTarget(jobManagerResourceId);

		jmResourceIdRegistrations.remove(jobManagerResourceId);

		// tell the job manager about the disconnect
		jobMasterGateway.disconnectResourceManager(getFencingToken(), cause);
	} else {
		log.debug("There was no registered job manager for job {}.", jobId);
	}
}
 
Example #13
Source File: Dispatcher.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<ClusterOverview> requestClusterOverview(Time timeout) {
	CompletableFuture<ResourceOverview> taskManagerOverviewFuture = runResourceManagerCommand(resourceManagerGateway -> resourceManagerGateway.requestResourceOverview(timeout));

	final List<CompletableFuture<Optional<JobStatus>>> optionalJobInformation = queryJobMastersForInformation(
		(JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobStatus(timeout));

	CompletableFuture<Collection<Optional<JobStatus>>> allOptionalJobsFuture = FutureUtils.combineAll(optionalJobInformation);

	CompletableFuture<Collection<JobStatus>> allJobsFuture = allOptionalJobsFuture.thenApply(this::flattenOptionalCollection);

	final JobsOverview completedJobsOverview = archivedExecutionGraphStore.getStoredJobsOverview();

	return allJobsFuture.thenCombine(
		taskManagerOverviewFuture,
		(Collection<JobStatus> runningJobsStatus, ResourceOverview resourceOverview) -> {
			final JobsOverview allJobsOverview = JobsOverview.create(runningJobsStatus).combine(completedJobsOverview);
			return new ClusterOverview(resourceOverview, allJobsOverview);
		});
}
 
Example #14
Source File: Dispatcher.java    From flink with Apache License 2.0 6 votes vote down vote up
private CompletableFuture<JobMasterGateway> getJobMasterGatewayFuture(JobID jobId) {
	final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = jobManagerRunnerFutures.get(jobId);

	if (jobManagerRunnerFuture == null) {
		return FutureUtils.completedExceptionally(new FlinkJobNotFoundException(jobId));
	} else {
		final CompletableFuture<JobMasterGateway> leaderGatewayFuture = jobManagerRunnerFuture.thenCompose(JobManagerRunner::getLeaderGatewayFuture);
		return leaderGatewayFuture.thenApplyAsync(
			(JobMasterGateway jobMasterGateway) -> {
				// check whether the retrieved JobMasterGateway belongs still to a running JobMaster
				if (jobManagerRunnerFutures.containsKey(jobId)) {
					return jobMasterGateway;
				} else {
					throw new CompletionException(new FlinkJobNotFoundException(jobId));
				}
			},
			getMainThreadExecutor());
	}
}
 
Example #15
Source File: TaskExecutor.java    From flink with Apache License 2.0 6 votes vote down vote up
private void registerQueryableState(JobID jobId, JobMasterGateway jobMasterGateway) {
	final KvStateServer kvStateServer = kvStateService.getKvStateServer();
	final KvStateRegistry kvStateRegistry = kvStateService.getKvStateRegistry();

	if (kvStateServer != null && kvStateRegistry != null) {
		kvStateRegistry.registerListener(
			jobId,
			new RpcKvStateRegistryListener(
				jobMasterGateway,
				kvStateServer.getServerAddress()));
	}

	final KvStateClientProxy kvStateProxy = kvStateService.getKvStateClientProxy();

	if (kvStateProxy != null) {
		kvStateProxy.updateKvStateLocationOracle(jobId, jobMasterGateway);
	}
}
 
Example #16
Source File: TaskExecutor.java    From flink with Apache License 2.0 6 votes vote down vote up
private void failNoLongerAllocatedSlots(AllocatedSlotReport allocatedSlotReport, JobMasterGateway jobMasterGateway) {
	for (AllocatedSlotInfo allocatedSlotInfo : allocatedSlotReport.getAllocatedSlotInfos()) {
		final AllocationID allocationId = allocatedSlotInfo.getAllocationId();
		if (!taskSlotTable.isAllocated(
				allocatedSlotInfo.getSlotIndex(),
				allocatedSlotReport.getJobId(),
				allocationId)) {
			jobMasterGateway.failSlot(
					getResourceID(),
					allocationId,
					new FlinkException(
						String.format(
							"Slot %s on TaskExecutor %s is not allocated by job %s.",
							allocatedSlotInfo.getSlotIndex(),
							getResourceID(),
							allocatedSlotReport.getJobId())));
		}
	}
}
 
Example #17
Source File: Dispatcher.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<ArchivedExecutionGraph> requestJob(JobID jobId, Time timeout) {
	final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId);

	final CompletableFuture<ArchivedExecutionGraph> archivedExecutionGraphFuture = jobMasterGatewayFuture.thenCompose(
		(JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJob(timeout));

	return archivedExecutionGraphFuture.exceptionally(
		(Throwable throwable) -> {
			final ArchivedExecutionGraph serializableExecutionGraph = archivedExecutionGraphStore.get(jobId);

			// check whether it is a completed job
			if (serializableExecutionGraph == null) {
				throw new CompletionException(ExceptionUtils.stripCompletionException(throwable));
			} else {
				return serializableExecutionGraph;
			}
		});
}
 
Example #18
Source File: JobLeaderService.java    From flink with Apache License 2.0 6 votes vote down vote up
public void reconnect() {
	if (stopped) {
		LOG.debug("Cannot reconnect because the JobManagerLeaderListener has already been stopped.");
	} else {
		final RegisteredRpcConnection<JobMasterId, JobMasterGateway, JMTMRegistrationSuccess> currentRpcConnection = rpcConnection;

		if (currentRpcConnection != null) {
			if (currentRpcConnection.isConnected()) {

				if (currentRpcConnection.tryReconnect()) {
					// double check for concurrent stop operation
					if (stopped) {
						currentRpcConnection.close();
					}
				} else {
					LOG.debug("Could not reconnect to the JobMaster {}.", currentRpcConnection.getTargetAddress());
				}
			} else {
				LOG.debug("Ongoing registration to JobMaster {}.", currentRpcConnection.getTargetAddress());
			}
		} else {
			LOG.debug("Cannot reconnect to an unknown JobMaster.");
		}
	}
}
 
Example #19
Source File: JobLeaderService.java    From flink with Apache License 2.0 6 votes vote down vote up
JobManagerRetryingRegistration(
		Logger log,
		RpcService rpcService,
		String targetName,
		Class<JobMasterGateway> targetType,
		String targetAddress,
		JobMasterId jobMasterId,
		RetryingRegistrationConfiguration retryingRegistrationConfiguration,
		String taskManagerRpcAddress,
		TaskManagerLocation taskManagerLocation) {
	super(
		log,
		rpcService,
		targetName,
		targetType,
		targetAddress,
		jobMasterId,
		retryingRegistrationConfiguration);

	this.taskManagerRpcAddress = taskManagerRpcAddress;
	this.taskManagerLocation = Preconditions.checkNotNull(taskManagerLocation);
}
 
Example #20
Source File: Dispatcher.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<JobStatus> requestJobStatus(JobID jobId, Time timeout) {

	final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId);

	final CompletableFuture<JobStatus> jobStatusFuture = jobMasterGatewayFuture.thenCompose(
		(JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobStatus(timeout));

	return jobStatusFuture.exceptionally(
		(Throwable throwable) -> {
			final JobDetails jobDetails = archivedExecutionGraphStore.getAvailableJobDetails(jobId);

			// check whether it is a completed job
			if (jobDetails == null) {
				throw new CompletionException(ExceptionUtils.stripCompletionException(throwable));
			} else {
				return jobDetails.getStatus();
			}
		});
}
 
Example #21
Source File: Dispatcher.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<MultipleJobsDetails> requestMultipleJobDetails(Time timeout) {
	List<CompletableFuture<Optional<JobDetails>>> individualOptionalJobDetails = queryJobMastersForInformation(
		(JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobDetails(timeout));

	CompletableFuture<Collection<Optional<JobDetails>>> optionalCombinedJobDetails = FutureUtils.combineAll(
		individualOptionalJobDetails);

	CompletableFuture<Collection<JobDetails>> combinedJobDetails = optionalCombinedJobDetails.thenApply(this::flattenOptionalCollection);

	final Collection<JobDetails> completedJobDetails = archivedExecutionGraphStore.getAvailableJobDetails();

	return combinedJobDetails.thenApply(
		(Collection<JobDetails> runningJobDetails) -> {
			final Collection<JobDetails> allJobDetails = new ArrayList<>(completedJobDetails.size() + runningJobDetails.size());

			allJobDetails.addAll(runningJobDetails);
			allJobDetails.addAll(completedJobDetails);

			return new MultipleJobsDetails(allJobDetails);
		});
}
 
Example #22
Source File: JobManagerConnection.java    From flink with Apache License 2.0 6 votes vote down vote up
public JobManagerConnection(
			JobID jobID,
			ResourceID resourceID,
			JobMasterGateway jobMasterGateway,
			TaskManagerActions taskManagerActions,
			CheckpointResponder checkpointResponder,
			GlobalAggregateManager aggregateManager,
			LibraryCacheManager libraryCacheManager,
			ResultPartitionConsumableNotifier resultPartitionConsumableNotifier,
			PartitionProducerStateChecker partitionStateChecker) {
	this.jobID = Preconditions.checkNotNull(jobID);
	this.resourceID = Preconditions.checkNotNull(resourceID);
	this.jobMasterGateway = Preconditions.checkNotNull(jobMasterGateway);
	this.taskManagerActions = Preconditions.checkNotNull(taskManagerActions);
	this.checkpointResponder = Preconditions.checkNotNull(checkpointResponder);
	this.aggregateManager = Preconditions.checkNotNull(aggregateManager);
	this.libraryCacheManager = Preconditions.checkNotNull(libraryCacheManager);
	this.resultPartitionConsumableNotifier = Preconditions.checkNotNull(resultPartitionConsumableNotifier);
	this.partitionStateChecker = Preconditions.checkNotNull(partitionStateChecker);
}
 
Example #23
Source File: TaskSubmissionTestEnvironment.java    From flink with Apache License 2.0 6 votes vote down vote up
static JobManagerConnection createJobManagerConnection(JobID jobId, JobMasterGateway jobMasterGateway, RpcService testingRpcService, TaskManagerActions taskManagerActions, Time timeout) {
	final LibraryCacheManager libraryCacheManager = mock(LibraryCacheManager.class);
	when(libraryCacheManager.getClassLoader(any(JobID.class))).thenReturn(ClassLoader.getSystemClassLoader());

	final PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
	when(partitionProducerStateChecker.requestPartitionProducerState(any(), any(), any()))
		.thenReturn(CompletableFuture.completedFuture(ExecutionState.RUNNING));

	return new JobManagerConnection(
		jobId,
		ResourceID.generate(),
		jobMasterGateway,
		taskManagerActions,
		mock(CheckpointResponder.class),
		new TestGlobalAggregateManager(),
		libraryCacheManager,
		new RpcResultPartitionConsumableNotifier(jobMasterGateway, testingRpcService.getExecutor(), timeout),
		partitionProducerStateChecker);
}
 
Example #24
Source File: Dispatcher.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<MultipleJobsDetails> requestMultipleJobDetails(Time timeout) {
	List<CompletableFuture<Optional<JobDetails>>> individualOptionalJobDetails = queryJobMastersForInformation(
		(JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobDetails(timeout));

	CompletableFuture<Collection<Optional<JobDetails>>> optionalCombinedJobDetails = FutureUtils.combineAll(
		individualOptionalJobDetails);

	CompletableFuture<Collection<JobDetails>> combinedJobDetails = optionalCombinedJobDetails.thenApply(this::flattenOptionalCollection);

	final Collection<JobDetails> completedJobDetails = archivedExecutionGraphStore.getAvailableJobDetails();

	return combinedJobDetails.thenApply(
		(Collection<JobDetails> runningJobDetails) -> {
			final Collection<JobDetails> allJobDetails = new ArrayList<>(completedJobDetails.size() + runningJobDetails.size());

			allJobDetails.addAll(runningJobDetails);
			allJobDetails.addAll(completedJobDetails);

			return new MultipleJobsDetails(allJobDetails);
		});
}
 
Example #25
Source File: ResourceManager.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * This method should be called by the framework once it detects that a currently registered
 * job manager has failed.
 *
 * @param jobId identifying the job whose leader shall be disconnected.
 * @param cause The exception which cause the JobManager failed.
 */
protected void closeJobManagerConnection(JobID jobId, Exception cause) {
	JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.remove(jobId);

	if (jobManagerRegistration != null) {
		final ResourceID jobManagerResourceId = jobManagerRegistration.getJobManagerResourceID();
		final JobMasterGateway jobMasterGateway = jobManagerRegistration.getJobManagerGateway();
		final JobMasterId jobMasterId = jobManagerRegistration.getJobMasterId();

		log.info("Disconnect job manager {}@{} for job {} from the resource manager.",
			jobMasterId,
			jobMasterGateway.getAddress(),
			jobId);

		jobManagerHeartbeatManager.unmonitorTarget(jobManagerResourceId);

		jmResourceIdRegistrations.remove(jobManagerResourceId);

		// tell the job manager about the disconnect
		jobMasterGateway.disconnectResourceManager(getFencingToken(), cause);
	} else {
		log.debug("There was no registered job manager for job {}.", jobId);
	}
}
 
Example #26
Source File: Dispatcher.java    From flink with Apache License 2.0 6 votes vote down vote up
private CompletableFuture<JobMasterGateway> getJobMasterGatewayFuture(JobID jobId) {
	final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = jobManagerRunnerFutures.get(jobId);

	if (jobManagerRunnerFuture == null) {
		return FutureUtils.completedExceptionally(new FlinkJobNotFoundException(jobId));
	} else {
		final CompletableFuture<JobMasterGateway> leaderGatewayFuture = jobManagerRunnerFuture.thenCompose(JobManagerRunner::getJobMasterGateway);
		return leaderGatewayFuture.thenApplyAsync(
			(JobMasterGateway jobMasterGateway) -> {
				// check whether the retrieved JobMasterGateway belongs still to a running JobMaster
				if (jobManagerRunnerFutures.containsKey(jobId)) {
					return jobMasterGateway;
				} else {
					throw new CompletionException(new FlinkJobNotFoundException(jobId));
				}
			},
			getMainThreadExecutor());
	}
}
 
Example #27
Source File: TaskExecutor.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void offerSlotsToJobManager(final JobID jobId) {
	final JobManagerConnection jobManagerConnection = jobManagerTable.get(jobId);

	if (jobManagerConnection == null) {
		log.debug("There is no job manager connection to the leader of job {}.", jobId);
	} else {
		if (taskSlotTable.hasAllocatedSlots(jobId)) {
			log.info("Offer reserved slots to the leader of job {}.", jobId);

			final JobMasterGateway jobMasterGateway = jobManagerConnection.getJobManagerGateway();

			final Iterator<TaskSlot> reservedSlotsIterator = taskSlotTable.getAllocatedSlots(jobId);
			final JobMasterId jobMasterId = jobManagerConnection.getJobMasterId();

			final Collection<SlotOffer> reservedSlots = new HashSet<>(2);

			while (reservedSlotsIterator.hasNext()) {
				SlotOffer offer = reservedSlotsIterator.next().generateSlotOffer();
				reservedSlots.add(offer);
			}

			CompletableFuture<Collection<SlotOffer>> acceptedSlotsFuture = jobMasterGateway.offerSlots(
				getResourceID(),
				reservedSlots,
				taskManagerConfiguration.getTimeout());

			acceptedSlotsFuture.whenCompleteAsync(
				handleAcceptedSlotOffers(jobId, jobMasterGateway, jobMasterId, reservedSlots),
				getMainThreadExecutor());
		} else {
			log.debug("There are no unassigned slots for the job {}.", jobId);
		}
	}
}
 
Example #28
Source File: Dispatcher.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<String> triggerSavepoint(
		final JobID jobId,
		final String targetDirectory,
		final boolean cancelJob,
		final Time timeout) {
	final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId);

	return jobMasterGatewayFuture.thenCompose(
		(JobMasterGateway jobMasterGateway) ->
			jobMasterGateway.triggerSavepoint(targetDirectory, cancelJob, timeout));
}
 
Example #29
Source File: Dispatcher.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public CompletableFuture<String> triggerSavepoint(
		final JobID jobId,
		final String targetDirectory,
		final boolean cancelJob,
		final Time timeout) {
	final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId);

	return jobMasterGatewayFuture.thenCompose(
		(JobMasterGateway jobMasterGateway) ->
			jobMasterGateway.triggerSavepoint(targetDirectory, cancelJob, timeout));
}
 
Example #30
Source File: TaskExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
private void unregisterTaskAndNotifyFinalState(
		final JobMasterGateway jobMasterGateway,
		final ExecutionAttemptID executionAttemptID) {

	Task task = taskSlotTable.removeTask(executionAttemptID);
	if (task != null) {
		if (!task.getExecutionState().isTerminal()) {
			try {
				task.failExternally(new IllegalStateException("Task is being remove from TaskManager."));
			} catch (Exception e) {
				log.error("Could not properly fail task.", e);
			}
		}

		log.info("Un-registering task and sending final execution state {} to JobManager for task {} {}.",
			task.getExecutionState(), task.getTaskInfo().getTaskName(), task.getExecutionId());

		AccumulatorSnapshot accumulatorSnapshot = task.getAccumulatorRegistry().getSnapshot();

		updateTaskExecutionState(
				jobMasterGateway,
				new TaskExecutionState(
					task.getJobID(),
					task.getExecutionId(),
					task.getExecutionState(),
					task.getFailureCause(),
					accumulatorSnapshot,
					task.getMetricGroup().getIOMetricGroup().createSnapshot()));
	} else {
		log.error("Cannot find task with ID {} to unregister.", executionAttemptID);
	}
}