org.apache.flink.runtime.executiongraph.ExecutionAttemptID Java Examples

The following examples show how to use org.apache.flink.runtime.executiongraph.ExecutionAttemptID. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FileCache.java    From flink with Apache License 2.0 6 votes vote down vote up
public void releaseJob(JobID jobId, ExecutionAttemptID executionId) {
	checkNotNull(jobId);

	synchronized (lock) {
		Set<ExecutionAttemptID> jobRefCounter = jobRefHolders.get(jobId);

		if (jobRefCounter == null || jobRefCounter.isEmpty()) {
			return;
		}

		jobRefCounter.remove(executionId);
		if (jobRefCounter.isEmpty()) {
			executorService.schedule(new DeleteProcess(jobId), cleanupInterval, TimeUnit.MILLISECONDS);
		}
	}
}
 
Example #2
Source File: TaskExecutorSubmissionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that we can submit a task to the TaskManager given that we've allocated a slot there.
 */
@Test(timeout = 10000L)
public void testTaskSubmission() throws Exception {
	final ExecutionAttemptID eid = new ExecutionAttemptID();

	final TaskDeploymentDescriptor tdd = createTestTaskDeploymentDescriptor("test task", eid, TaskExecutorTest.TestInvokable.class);

	final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setSlotSize(1)
			.addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture)
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable taskSlotTable = env.getTaskSlotTable();

		taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd, env.getJobMasterId(), timeout).get();

		taskRunningFuture.get();
	}
}
 
Example #3
Source File: StackTraceSampleCoordinatorTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/** Tests that collecting for a cancelled sample throws no Exception. */
@Test
public void testCollectStackTraceForCanceledSample() throws Exception {
	ExecutionVertex[] vertices = new ExecutionVertex[] {
			mockExecutionVertex(new ExecutionAttemptID(), ExecutionState.RUNNING, true),
	};

	CompletableFuture<StackTraceSample> sampleFuture = coord.triggerStackTraceSample(
			vertices, 1, Time.milliseconds(100L), 0);

	Assert.assertFalse(sampleFuture.isDone());

	coord.cancelStackTraceSample(0, null);

	Assert.assertTrue(sampleFuture.isDone());

	// Verify no error on late collect
	ExecutionAttemptID executionId = vertices[0].getCurrentExecutionAttempt().getAttemptId();
	coord.collectStackTraces(0, executionId, new ArrayList<StackTraceElement[]>());
}
 
Example #4
Source File: CheckpointCoordinatorTestingUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
static ExecutionVertex mockExecutionVertex(
	ExecutionAttemptID attemptID,
	JobVertexID jobVertexID,
	List<OperatorID> jobVertexIDs,
	int parallelism,
	int maxParallelism,
	ExecutionState state,
	ExecutionState ... successiveStates) {

	return mockExecutionVertex(
		attemptID,
		jobVertexID,
		jobVertexIDs,
		null,
		parallelism,
		maxParallelism,
		state,
		successiveStates);
}
 
Example #5
Source File: PendingCheckpointTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private PendingCheckpoint createPendingCheckpoint(CheckpointProperties props, Executor executor) throws IOException {

		final Path checkpointDir = new Path(tmpFolder.newFolder().toURI());
		final FsCheckpointStorageLocation location = new FsCheckpointStorageLocation(
				LocalFileSystem.getSharedInstance(),
				checkpointDir, checkpointDir, checkpointDir,
				CheckpointStorageLocationReference.getDefault(),
				1024,
				4096);

		final Map<ExecutionAttemptID, ExecutionVertex> ackTasks = new HashMap<>(ACK_TASKS);

		return new PendingCheckpoint(
			new JobID(),
			0,
			1,
			ackTasks,
			props,
			location,
			executor);
	}
 
Example #6
Source File: SimpleAckingTaskManagerGateway.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void triggerCheckpoint(
		ExecutionAttemptID executionAttemptID,
		JobID jobId,
		long checkpointId,
		long timestamp,
		CheckpointOptions checkpointOptions,
		boolean advanceToEndOfEventTime) {

	checkpointConsumer.accept(
		executionAttemptID,
		jobId,
		checkpointId,
		timestamp,
		checkpointOptions,
		advanceToEndOfEventTime);
}
 
Example #7
Source File: TaskExecutionStateTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testEqualsHashCode() {
	try {
		final JobID jid = new JobID();
		final ExecutionAttemptID executionId = new ExecutionAttemptID();
		final ExecutionState state = ExecutionState.RUNNING;
		final Throwable error = new RuntimeException("some test error message");
		
		TaskExecutionState s1 = new TaskExecutionState(jid, executionId, state, error);
		TaskExecutionState s2 = new TaskExecutionState(jid, executionId, state, error);
		
		assertEquals(s1.hashCode(), s2.hashCode());
		assertEquals(s1, s2);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #8
Source File: TaskExecutionStateTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testEqualsHashCode() {
	try {
		final JobID jid = new JobID();
		final ExecutionAttemptID executionId = new ExecutionAttemptID();
		final ExecutionState state = ExecutionState.RUNNING;
		final Throwable error = new RuntimeException("some test error message");
		
		TaskExecutionState s1 = new TaskExecutionState(jid, executionId, state, error);
		TaskExecutionState s2 = new TaskExecutionState(jid, executionId, state, error);
		
		assertEquals(s1.hashCode(), s2.hashCode());
		assertEquals(s1, s2);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #9
Source File: LazyFromSourcesSchedulingStrategyTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when partition consumable notified will start available {@link ResultPartitionType#PIPELINED} downstream vertices.
 */
@Test
public void testPipelinedPartitionConsumable() {
	final TestingSchedulingTopology testingSchedulingTopology = new TestingSchedulingTopology();

	final List<TestingSchedulingExecutionVertex> producers = testingSchedulingTopology.addExecutionVertices()
		.withParallelism(2).finish();
	final List<TestingSchedulingExecutionVertex> consumers = testingSchedulingTopology.addExecutionVertices()
		.withParallelism(2).finish();
	testingSchedulingTopology.connectAllToAll(producers, consumers).withResultPartitionType(PIPELINED).finish();

	final LazyFromSourcesSchedulingStrategy schedulingStrategy = startScheduling(testingSchedulingTopology);

	final TestingSchedulingExecutionVertex producer1 = producers.get(0);
	final SchedulingResultPartition partition1 = producer1.getProducedResultPartitions().iterator().next();

	schedulingStrategy.onExecutionStateChange(producer1.getId(), ExecutionState.RUNNING);
	schedulingStrategy.onPartitionConsumable(producer1.getId(), new ResultPartitionID(partition1.getId(), new ExecutionAttemptID()));

	assertThat(testingSchedulerOperation, hasScheduledVertices(consumers));
}
 
Example #10
Source File: RpcTaskManagerGateway.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<StackTraceSampleResponse> requestStackTraceSample(
		ExecutionAttemptID executionAttemptID,
		int sampleId,
		int numSamples,
		Time delayBetweenSamples,
		int maxStackTraceDepth,
		Time timeout) {

	return taskExecutorGateway.requestStackTraceSample(
		executionAttemptID,
		sampleId,
		numSamples,
		delayBetweenSamples,
		maxStackTraceDepth,
		timeout);
}
 
Example #11
Source File: TestingTaskExecutorGateway.java    From flink with Apache License 2.0 6 votes vote down vote up
TestingTaskExecutorGateway(
		String address,
		String hostname,
		BiConsumer<ResourceID, AllocatedSlotReport> heartbeatJobManagerConsumer,
		BiConsumer<JobID, Throwable> disconnectJobManagerConsumer,
		BiFunction<TaskDeploymentDescriptor, JobMasterId, CompletableFuture<Acknowledge>> submitTaskConsumer,
		Function<Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId>, CompletableFuture<Acknowledge>> requestSlotFunction,
		BiFunction<AllocationID, Throwable, CompletableFuture<Acknowledge>> freeSlotFunction,
		Consumer<ResourceID> heartbeatResourceManagerConsumer,
		Consumer<Exception> disconnectResourceManagerConsumer,
		Function<ExecutionAttemptID, CompletableFuture<Acknowledge>> cancelTaskFunction,
		Supplier<CompletableFuture<Boolean>> canBeReleasedSupplier,
		BiConsumer<JobID, Collection<ResultPartitionID>> releasePartitionsConsumer) {
	this.address = Preconditions.checkNotNull(address);
	this.hostname = Preconditions.checkNotNull(hostname);
	this.heartbeatJobManagerConsumer = Preconditions.checkNotNull(heartbeatJobManagerConsumer);
	this.disconnectJobManagerConsumer = Preconditions.checkNotNull(disconnectJobManagerConsumer);
	this.submitTaskConsumer = Preconditions.checkNotNull(submitTaskConsumer);
	this.requestSlotFunction = Preconditions.checkNotNull(requestSlotFunction);
	this.freeSlotFunction = Preconditions.checkNotNull(freeSlotFunction);
	this.heartbeatResourceManagerConsumer = heartbeatResourceManagerConsumer;
	this.disconnectResourceManagerConsumer = disconnectResourceManagerConsumer;
	this.cancelTaskFunction = cancelTaskFunction;
	this.canBeReleasedSupplier = canBeReleasedSupplier;
	this.releasePartitionsConsumer = releasePartitionsConsumer;
}
 
Example #12
Source File: TestingTaskExecutorGateway.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
TestingTaskExecutorGateway(
		String address,
		String hostname,
		BiConsumer<ResourceID, AllocatedSlotReport> heartbeatJobManagerConsumer,
		BiConsumer<JobID, Throwable> disconnectJobManagerConsumer,
		BiFunction<TaskDeploymentDescriptor, JobMasterId, CompletableFuture<Acknowledge>> submitTaskConsumer,
		Function<Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId>, CompletableFuture<Acknowledge>> requestSlotFunction,
		BiFunction<AllocationID, Throwable, CompletableFuture<Acknowledge>> freeSlotFunction,
		Consumer<ResourceID> heartbeatResourceManagerConsumer,
		Consumer<Exception> disconnectResourceManagerConsumer,
		Function<ExecutionAttemptID, CompletableFuture<Acknowledge>> cancelTaskFunction,
		Supplier<Boolean> canBeReleasedSupplier) {
	this.address = Preconditions.checkNotNull(address);
	this.hostname = Preconditions.checkNotNull(hostname);
	this.heartbeatJobManagerConsumer = Preconditions.checkNotNull(heartbeatJobManagerConsumer);
	this.disconnectJobManagerConsumer = Preconditions.checkNotNull(disconnectJobManagerConsumer);
	this.submitTaskConsumer = Preconditions.checkNotNull(submitTaskConsumer);
	this.requestSlotFunction = Preconditions.checkNotNull(requestSlotFunction);
	this.freeSlotFunction = Preconditions.checkNotNull(freeSlotFunction);
	this.heartbeatResourceManagerConsumer = heartbeatResourceManagerConsumer;
	this.disconnectResourceManagerConsumer = disconnectResourceManagerConsumer;
	this.cancelTaskFunction = cancelTaskFunction;
	this.canBeReleasedSupplier = canBeReleasedSupplier;
}
 
Example #13
Source File: TaskExecutor.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<Acknowledge> confirmCheckpoint(
		ExecutionAttemptID executionAttemptID,
		long checkpointId,
		long checkpointTimestamp) {
	log.debug("Confirm checkpoint {}@{} for {}.", checkpointId, checkpointTimestamp, executionAttemptID);

	final Task task = taskSlotTable.getTask(executionAttemptID);

	if (task != null) {
		task.notifyCheckpointComplete(checkpointId);

		return CompletableFuture.completedFuture(Acknowledge.get());
	} else {
		final String message = "TaskManager received a checkpoint confirmation for unknown task " + executionAttemptID + '.';

		log.debug(message);
		return FutureUtils.completedExceptionally(new CheckpointException(message));
	}
}
 
Example #14
Source File: StackTraceSample.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a stack trace sample.
 *
 * @param sampleId          ID of the sample.
 * @param startTime         Time stamp, when the sample was triggered.
 * @param endTime           Time stamp, when all stack traces were
 *                          collected at the JobManager.
 * @param stackTracesByTask Map of stack traces by execution ID.
 */
public StackTraceSample(
		int sampleId,
		long startTime,
		long endTime,
		Map<ExecutionAttemptID, List<StackTraceElement[]>> stackTracesByTask) {

	checkArgument(sampleId >= 0, "Negative sample ID");
	checkArgument(startTime >= 0, "Negative start time");
	checkArgument(endTime >= startTime, "End time before start time");

	this.sampleId = sampleId;
	this.startTime = startTime;
	this.endTime = endTime;
	this.stackTracesByTask = Collections.unmodifiableMap(stackTracesByTask);
}
 
Example #15
Source File: TaskExecutor.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public CompletableFuture<StackTraceSampleResponse> requestStackTraceSample(
		final ExecutionAttemptID executionAttemptId,
		final int sampleId,
		final int numSamples,
		final Time delayBetweenSamples,
		final int maxStackTraceDepth,
		final Time timeout) {

	final Task task = taskSlotTable.getTask(executionAttemptId);
	if (task == null) {
		return FutureUtils.completedExceptionally(
			new IllegalStateException(String.format("Cannot sample task %s. " +
				"Task is not known to the task manager.", executionAttemptId)));
	}

	final CompletableFuture<List<StackTraceElement[]>> stackTracesFuture = stackTraceSampleService.requestStackTraceSample(
		TaskStackTraceSampleableTaskAdapter.fromTask(task),
		numSamples,
		delayBetweenSamples,
		maxStackTraceDepth);

	return stackTracesFuture.thenApply(stackTraces ->
		new StackTraceSampleResponse(sampleId, executionAttemptId, stackTraces));
}
 
Example #16
Source File: CheckpointCoordinator.java    From flink with Apache License 2.0 5 votes vote down vote up
private void failPendingCheckpointDueToTaskFailure(
	final PendingCheckpoint pendingCheckpoint,
	final CheckpointFailureReason reason,
	final ExecutionAttemptID executionAttemptID) {

	failPendingCheckpointDueToTaskFailure(pendingCheckpoint, reason, null, executionAttemptID);
}
 
Example #17
Source File: ActorTaskManagerGateway.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void notifyCheckpointComplete(
		ExecutionAttemptID executionAttemptID,
		JobID jobId,
		long checkpointId,
		long timestamp) {

	Preconditions.checkNotNull(executionAttemptID);
	Preconditions.checkNotNull(jobId);

	actorGateway.tell(new NotifyCheckpointComplete(jobId, executionAttemptID, checkpointId, timestamp));
}
 
Example #18
Source File: SchedulerBase.java    From flink with Apache License 2.0 5 votes vote down vote up
private String retrieveTaskManagerLocation(ExecutionAttemptID executionAttemptID) {
	final Optional<Execution> currentExecution = Optional.ofNullable(executionGraph.getRegisteredExecutions().get(executionAttemptID));

	return currentExecution
		.map(Execution::getAssignedResourceLocation)
		.map(TaskManagerLocation::toString)
		.orElse("Unknown location");
}
 
Example #19
Source File: TaskCheckpointingBehaviourTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void declineCheckpoint(
	JobID jobID,
	ExecutionAttemptID executionAttemptID,
	long checkpointId,
	Throwable cause) {

	declinedLatch.trigger();
}
 
Example #20
Source File: NetworkEnvironment.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public void unregisterTask(Task task) {
	LOG.debug("Unregister task {} from network environment (state: {}).",
			task.getTaskInfo().getTaskNameWithSubtasks(), task.getExecutionState());

	final ExecutionAttemptID executionId = task.getExecutionId();

	synchronized (lock) {
		if (isShutdown) {
			// no need to do anything when we are not operational
			return;
		}

		if (task.isCanceledOrFailed()) {
			resultPartitionManager.releasePartitionsProducedBy(executionId, task.getFailureCause());
		}

		for (ResultPartition partition : task.getProducedPartitions()) {
			taskEventDispatcher.unregisterPartition(partition.getPartitionId());
			partition.destroyBufferPool();
		}

		final SingleInputGate[] inputGates = task.getAllInputGates();

		if (inputGates != null) {
			for (SingleInputGate gate : inputGates) {
				try {
					if (gate != null) {
						gate.releaseAllResources();
					}
				}
				catch (IOException e) {
					LOG.error("Error during release of reader resources: " + e.getMessage(), e);
				}
			}
		}
	}
}
 
Example #21
Source File: TaskExecutor.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void unregisterTaskAndNotifyFinalState(
		final JobMasterGateway jobMasterGateway,
		final ExecutionAttemptID executionAttemptID) {

	Task task = taskSlotTable.removeTask(executionAttemptID);
	if (task != null) {
		if (!task.getExecutionState().isTerminal()) {
			try {
				task.failExternally(new IllegalStateException("Task is being remove from TaskManager."));
			} catch (Exception e) {
				log.error("Could not properly fail task.", e);
			}
		}

		log.info("Un-registering task and sending final execution state {} to JobManager for task {} {}.",
			task.getExecutionState(), task.getTaskInfo().getTaskName(), task.getExecutionId());

		AccumulatorSnapshot accumulatorSnapshot = task.getAccumulatorRegistry().getSnapshot();

		updateTaskExecutionState(
				jobMasterGateway,
				new TaskExecutionState(
					task.getJobID(),
					task.getExecutionId(),
					task.getExecutionState(),
					task.getFailureCause(),
					accumulatorSnapshot,
					task.getMetricGroup().getIOMetricGroup().createSnapshot()));
	} else {
		log.error("Cannot find task with ID {} to unregister.", executionAttemptID);
	}
}
 
Example #22
Source File: TestCheckpointResponder.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public DeclineReport(
	JobID jobID,
	ExecutionAttemptID executionAttemptID,
	long checkpointId,
	Throwable cause) {

	super(jobID, executionAttemptID, checkpointId);
	this.cause = cause;
}
 
Example #23
Source File: StackTraceSampleResponse.java    From flink with Apache License 2.0 5 votes vote down vote up
public StackTraceSampleResponse(
		int sampleId,
		ExecutionAttemptID executionAttemptID,
		List<StackTraceElement[]> samples) {
	this.sampleId = sampleId;
	this.executionAttemptID = Preconditions.checkNotNull(executionAttemptID);
	this.samples = Preconditions.checkNotNull(samples);
}
 
Example #24
Source File: StackTraceSampleCoordinatorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/** Tests that collecting for a unknown task fails. */
@Test(expected = IllegalArgumentException.class)
public void testCollectStackTraceForUnknownTask() throws Exception {
	ExecutionVertex[] vertices = new ExecutionVertex[] {
			mockExecutionVertex(new ExecutionAttemptID(), ExecutionState.RUNNING, true),
	};

	coord.triggerStackTraceSample(vertices, 1, Time.milliseconds(100L), 0);

	coord.collectStackTraces(0, new ExecutionAttemptID(), new ArrayList<StackTraceElement[]>());
}
 
Example #25
Source File: SingleInputGateTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private static Map<InputGateID, SingleInputGate> createInputGateWithLocalChannels(
		NettyShuffleEnvironment network,
		int numberOfGates,
		@SuppressWarnings("SameParameterValue") int numberOfLocalChannels) {
	ShuffleDescriptor[] channelDescs = new NettyShuffleDescriptor[numberOfLocalChannels];
	for (int i = 0; i < numberOfLocalChannels; i++) {
		channelDescs[i] = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
	}

	InputGateDeploymentDescriptor[] gateDescs = new InputGateDeploymentDescriptor[numberOfGates];
	IntermediateDataSetID[] ids = new IntermediateDataSetID[numberOfGates];
	for (int i = 0; i < numberOfGates; i++) {
		ids[i] = new IntermediateDataSetID();
		gateDescs[i] = new InputGateDeploymentDescriptor(
			ids[i],
			ResultPartitionType.PIPELINED,
			0,
			channelDescs);
	}

	ExecutionAttemptID consumerID = new ExecutionAttemptID();
	SingleInputGate[] gates = network.createInputGates(
		network.createShuffleIOOwnerContext("", consumerID, new UnregisteredMetricsGroup()),
		SingleInputGateBuilder.NO_OP_PRODUCER_CHECKER,
		Arrays.asList(gateDescs)).toArray(new SingleInputGate[] {});
	Map<InputGateID, SingleInputGate> inputGatesById = new HashMap<>();
	for (int i = 0; i < numberOfGates; i++) {
		inputGatesById.put(new InputGateID(ids[i], consumerID), gates[i]);
	}

	return inputGatesById;
}
 
Example #26
Source File: TestCheckpointResponder.java    From flink with Apache License 2.0 5 votes vote down vote up
public AcknowledgeReport(
	JobID jobID,
	ExecutionAttemptID executionAttemptID,
	long checkpointId,
	CheckpointMetrics checkpointMetrics,
	TaskStateSnapshot subtaskState) {

	super(jobID, executionAttemptID, checkpointId);
	this.checkpointMetrics = checkpointMetrics;
	this.subtaskState = subtaskState;
}
 
Example #27
Source File: SchedulerBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void deliverOperatorEventToCoordinator(
		final ExecutionAttemptID taskExecutionId,
		final OperatorID operatorId,
		final OperatorEvent evt) throws FlinkException {

	// Failure semantics (as per the javadocs of the method):
	// If the task manager sends an event for a non-running task or an non-existing operator
	// coordinator, then respond with an exception to the call. If task and coordinator exist,
	// then we assume that the call from the TaskManager was valid, and any bubbling exception
	// needs to cause a job failure.

	final Execution exec = executionGraph.getRegisteredExecutions().get(taskExecutionId);
	if (exec == null || exec.getState() != ExecutionState.RUNNING) {
		// This situation is common when cancellation happens, or when the task failed while the
		// event was just being dispatched asynchronously on the TM side.
		// It should be fine in those expected situations to just ignore this event, but, to be
		// on the safe, we notify the TM that the event could not be delivered.
		throw new TaskNotRunningException("Task is not known or in state running on the JobManager.");
	}

	final OperatorCoordinatorHolder coordinator = coordinatorMap.get(operatorId);
	if (coordinator == null) {
		throw new FlinkException("No coordinator registered for operator " + operatorId);
	}

	try {
		coordinator.handleEventFromOperator(exec.getParallelSubtaskIndex(), evt);
	} catch (Throwable t) {
		ExceptionUtils.rethrowIfFatalErrorOrOOM(t);
		handleGlobalFailure(t);
	}
}
 
Example #28
Source File: TestingTaskExecutorGateway.java    From flink with Apache License 2.0 5 votes vote down vote up
TestingTaskExecutorGateway(
		String address,
		String hostname,
		BiConsumer<ResourceID, AllocatedSlotReport> heartbeatJobManagerConsumer,
		BiConsumer<JobID, Throwable> disconnectJobManagerConsumer,
		BiFunction<TaskDeploymentDescriptor, JobMasterId, CompletableFuture<Acknowledge>> submitTaskConsumer,
		Function<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>, CompletableFuture<Acknowledge>> requestSlotFunction,
		BiFunction<AllocationID, Throwable, CompletableFuture<Acknowledge>> freeSlotFunction,
		Consumer<ResourceID> heartbeatResourceManagerConsumer,
		Consumer<Exception> disconnectResourceManagerConsumer,
		Function<ExecutionAttemptID, CompletableFuture<Acknowledge>> cancelTaskFunction,
		Supplier<CompletableFuture<Boolean>> canBeReleasedSupplier,
		TriConsumer<JobID, Set<ResultPartitionID>, Set<ResultPartitionID>> releaseOrPromotePartitionsConsumer,
		Consumer<Collection<IntermediateDataSetID>> releaseClusterPartitionsConsumer,
		TriFunction<ExecutionAttemptID, OperatorID, SerializedValue<OperatorEvent>, CompletableFuture<Acknowledge>> operatorEventHandler,
		Supplier<CompletableFuture<ThreadDumpInfo>> requestThreadDumpSupplier) {

	this.address = Preconditions.checkNotNull(address);
	this.hostname = Preconditions.checkNotNull(hostname);
	this.heartbeatJobManagerConsumer = Preconditions.checkNotNull(heartbeatJobManagerConsumer);
	this.disconnectJobManagerConsumer = Preconditions.checkNotNull(disconnectJobManagerConsumer);
	this.submitTaskConsumer = Preconditions.checkNotNull(submitTaskConsumer);
	this.requestSlotFunction = Preconditions.checkNotNull(requestSlotFunction);
	this.freeSlotFunction = Preconditions.checkNotNull(freeSlotFunction);
	this.heartbeatResourceManagerConsumer = heartbeatResourceManagerConsumer;
	this.disconnectResourceManagerConsumer = disconnectResourceManagerConsumer;
	this.cancelTaskFunction = cancelTaskFunction;
	this.canBeReleasedSupplier = canBeReleasedSupplier;
	this.releaseOrPromotePartitionsConsumer = releaseOrPromotePartitionsConsumer;
	this.releaseClusterPartitionsConsumer = releaseClusterPartitionsConsumer;
	this.operatorEventHandler = operatorEventHandler;
	this.requestThreadDumpSupplier = requestThreadDumpSupplier;
}
 
Example #29
Source File: TaskExecutorSubmissionTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that repeated remote {@link PartitionNotFoundException}s ultimately fail the receiver.
 */
@Test(timeout = 10000L)
public void testRemotePartitionNotFound() throws Exception {
	final int dataPort = NetUtils.getAvailablePort();
	Configuration config = new Configuration();
	config.setInteger(NettyShuffleEnvironmentOptions.DATA_PORT, dataPort);
	config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
	config.setInteger(NettyShuffleEnvironmentOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);

	// Remote location (on the same TM though) for the partition
	NettyShuffleDescriptor sdd =
		NettyShuffleDescriptorBuilder.newBuilder().setDataPort(dataPort).buildRemote();
	TaskDeploymentDescriptor tdd = createReceiver(sdd);
	ExecutionAttemptID eid = tdd.getExecutionAttemptId();

	final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
	final CompletableFuture<Void> taskFailedFuture = new CompletableFuture<>();

	try (TaskSubmissionTestEnvironment env =
		new TaskSubmissionTestEnvironment.Builder(jobId)
			.setSlotSize(2)
			.addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture)
			.addTaskManagerActionListener(eid, ExecutionState.FAILED, taskFailedFuture)
			.setConfiguration(config)
			.setLocalCommunication(false)
			.useRealNonMockShuffleEnvironment()
			.build()) {
		TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
		TaskSlotTable taskSlotTable = env.getTaskSlotTable();

		taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
		tmGateway.submitTask(tdd, env.getJobMasterId(), timeout).get();
		taskRunningFuture.get();

		taskFailedFuture.get();
		assertThat(taskSlotTable.getTask(eid).getFailureCause(), instanceOf(PartitionNotFoundException.class));
	}
}
 
Example #30
Source File: UpdateSchedulerNgOnInternalFailuresListener.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void notifyTaskFailure(final ExecutionAttemptID attemptId, final Throwable t) {
	schedulerNg.updateTaskExecutionState(new TaskExecutionState(
		jobId,
		attemptId,
		ExecutionState.FAILED,
		t));
}