org.apache.flink.runtime.instance.ActorGateway Java Examples

The following examples show how to use org.apache.flink.runtime.instance.ActorGateway. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DFCusterClient.java    From df_data_service with Apache License 2.0 6 votes vote down vote up
@Override
public GetClusterStatusResponse getClusterStatus() {
    ActorGateway jmGateway;
    try {
        jmGateway = getJobManagerGateway();
        Future<Object> future = jmGateway.ask(GetClusterStatus.getInstance(), timeout);
        Object result = Await.result(future, timeout);
        if (result instanceof GetClusterStatusResponse) {
            return (GetClusterStatusResponse) result;
        } else {
            throw new RuntimeException("Received the wrong reply " + result + " from cluster.");
        }
    } catch (Exception e) {
        throw new RuntimeException("Couldn't retrieve the Cluster status.", e);
    }
}
 
Example #2
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Cancels a job identified by the job id.
 * @param jobId the job id
 * @throws Exception In case an error occurred.
 */
public void cancel(JobID jobId) throws Exception {
	final ActorGateway jobManager = getJobManagerGateway();

	Object cancelMsg = new JobManagerMessages.CancelJob(jobId);

	Future<Object> response = jobManager.ask(cancelMsg, timeout);
	final Object rc = Await.result(response, timeout);

	if (rc instanceof JobManagerMessages.CancellationSuccess) {
		// no further action required
	} else if (rc instanceof JobManagerMessages.CancellationFailure) {
		throw new Exception("Canceling the job with ID " + jobId + " failed.",
			((JobManagerMessages.CancellationFailure) rc).cause());
	} else {
		throw new IllegalStateException("Unexpected response: " + rc);
	}
}
 
Example #3
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Requests the {@link JobStatus} of the job with the given {@link JobID}.
 */
public CompletableFuture<JobStatus> getJobStatus(JobID jobId) {
	final ActorGateway jobManager;
	try {
		jobManager = getJobManagerGateway();
	} catch (FlinkException e) {
		throw new RuntimeException("Could not retrieve JobManage gateway.", e);
	}

	Future<Object> response = jobManager.ask(JobManagerMessages.getRequestJobStatus(jobId), timeout);

	CompletableFuture<Object> javaFuture = FutureUtils.toJava(response);

	return javaFuture.thenApply((responseMessage) -> {
		if (responseMessage instanceof JobManagerMessages.CurrentJobStatus) {
			return ((JobManagerMessages.CurrentJobStatus) responseMessage).status();
		} else if (responseMessage instanceof JobManagerMessages.JobNotFound) {
			throw new CompletionException(
				new IllegalStateException("Could not find job with JobId " + jobId));
		} else {
			throw new CompletionException(
				new IllegalStateException("Unknown JobManager response of type " + responseMessage.getClass()));
		}
	});
}
 
Example #4
Source File: TaskInputSplitProviderTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testRequestNextInputSplitWithInvalidExecutionID() throws InputSplitProviderException {

	final JobID jobID = new JobID();
	final JobVertexID vertexID = new JobVertexID();
	final ExecutionAttemptID executionID = new ExecutionAttemptID();
	final FiniteDuration timeout = new FiniteDuration(10, TimeUnit.SECONDS);

	final ActorGateway gateway = new NullInputSplitGateway();


	final TaskInputSplitProvider provider = new TaskInputSplitProvider(
		gateway,
		jobID,
		vertexID,
		executionID,
		timeout);

	// The jobManager will return a
	InputSplit nextInputSplit = provider.getNextInputSplit(getClass().getClassLoader());

	assertTrue(nextInputSplit == null);
}
 
Example #5
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Cancels a job identified by the job id and triggers a savepoint.
 * @param jobId the job id
 * @param savepointDirectory directory the savepoint should be written to
 * @return path where the savepoint is located
 * @throws Exception In case an error occurred.
 */
public String cancelWithSavepoint(JobID jobId, @Nullable String savepointDirectory) throws Exception {
	final ActorGateway jobManager = getJobManagerGateway();

	Object cancelMsg = new JobManagerMessages.CancelJobWithSavepoint(jobId, savepointDirectory);

	Future<Object> response = jobManager.ask(cancelMsg, timeout);
	final Object rc = Await.result(response, timeout);

	if (rc instanceof JobManagerMessages.CancellationSuccess) {
		JobManagerMessages.CancellationSuccess success = (JobManagerMessages.CancellationSuccess) rc;
		return success.savepointPath();
	} else if (rc instanceof JobManagerMessages.CancellationFailure) {
		throw new Exception("Cancel & savepoint for the job with ID " + jobId + " failed.",
			((JobManagerMessages.CancellationFailure) rc).cause());
	} else {
		throw new IllegalStateException("Unexpected response: " + rc);
	}
}
 
Example #6
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Stops a program on Flink cluster whose job-manager is configured in this client's configuration.
 * Stopping works only for streaming programs. Be aware, that the program might continue to run for
 * a while after sending the stop command, because after sources stopped to emit data all operators
 * need to finish processing.
 *
 * @param jobId
 *            the job ID of the streaming program to stop
 * @throws Exception
 *             If the job ID is invalid (ie, is unknown or refers to a batch job) or if sending the stop signal
 *             failed. That might be due to an I/O problem, ie, the job-manager is unreachable.
 */
public void stop(final JobID jobId) throws Exception {
	final ActorGateway jobManager = getJobManagerGateway();

	Future<Object> response = jobManager.ask(new JobManagerMessages.StopJob(jobId), timeout);

	final Object rc = Await.result(response, timeout);

	if (rc instanceof JobManagerMessages.StoppingSuccess) {
		// no further action required
	} else if (rc instanceof JobManagerMessages.StoppingFailure) {
		throw new Exception("Stopping the job with ID " + jobId + " failed.",
			((JobManagerMessages.StoppingFailure) rc).cause());
	} else {
		throw new IllegalStateException("Unexpected response: " + rc);
	}
}
 
Example #7
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Triggers a savepoint for the job identified by the job id. The savepoint will be written to the given savepoint
 * directory, or {@link org.apache.flink.configuration.CheckpointingOptions#SAVEPOINT_DIRECTORY} if it is null.
 *
 * @param jobId job id
 * @param savepointDirectory directory the savepoint should be written to
 * @return path future where the savepoint is located
 * @throws FlinkException if no connection to the cluster could be established
 */
public CompletableFuture<String> triggerSavepoint(JobID jobId, @Nullable String savepointDirectory) throws FlinkException {
	final ActorGateway jobManager = getJobManagerGateway();

	Future<Object> response = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobId, Option.<String>apply(savepointDirectory)),
		new FiniteDuration(1, TimeUnit.HOURS));
	CompletableFuture<Object> responseFuture = FutureUtils.toJava(response);

	return responseFuture.thenApply((responseMessage) -> {
		if (responseMessage instanceof JobManagerMessages.TriggerSavepointSuccess) {
			JobManagerMessages.TriggerSavepointSuccess success = (JobManagerMessages.TriggerSavepointSuccess) responseMessage;
			return success.savepointPath();
		} else if (responseMessage instanceof JobManagerMessages.TriggerSavepointFailure) {
			JobManagerMessages.TriggerSavepointFailure failure = (JobManagerMessages.TriggerSavepointFailure) responseMessage;
			throw new CompletionException(failure.cause());
		} else {
			throw new CompletionException(
				new IllegalStateException("Unknown JobManager response of type " + responseMessage.getClass()));
		}
	});
}
 
Example #8
Source File: TaskManagerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testTerminationOnFatalError() {
	highAvailabilityServices.setJobMasterLeaderRetriever(
		HighAvailabilityServices.DEFAULT_JOB_ID,
		new SettableLeaderRetrievalService());

	new JavaTestKit(system){{

		final ActorGateway taskManager = TestingUtils.createTaskManager(
				system,
				highAvailabilityServices, // no jobmanager
				new Configuration(),
				true,
				false);

		try {
			watch(taskManager.actor());
			taskManager.tell(new FatalError("test fatal error", new Exception("something super bad")));
			expectTerminated(d, taskManager.actor());
		}
		finally {
			taskManager.tell(Kill.getInstance());
		}
	}};
}
 
Example #9
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Lists the currently running and finished jobs on the cluster.
 *
 * @return future collection of running and finished jobs
 * @throws Exception if no connection to the cluster could be established
 */
public CompletableFuture<Collection<JobStatusMessage>> listJobs() throws Exception {
	final ActorGateway jobManager = getJobManagerGateway();

	Future<Object> response = jobManager.ask(new RequestJobDetails(true, false), timeout);
	CompletableFuture<Object> responseFuture = FutureUtils.toJava(response);

	return responseFuture.thenApply((responseMessage) -> {
		if (responseMessage instanceof MultipleJobsDetails) {
			MultipleJobsDetails details = (MultipleJobsDetails) responseMessage;

			final Collection<JobDetails> jobDetails = details.getJobs();
			Collection<JobStatusMessage> flattenedDetails = new ArrayList<>(jobDetails.size());
			jobDetails.forEach(detail -> flattenedDetails.add(new JobStatusMessage(detail.getJobId(), detail.getJobName(), detail.getStatus(), detail.getStartTime())));
			return flattenedDetails;
		} else {
			throw new CompletionException(
				new IllegalStateException("Unknown JobManager response of type " + responseMessage.getClass()));
		}
	});
}
 
Example #10
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Requests and returns the accumulators for the given job identifier. Accumulators can be
 * requested while a is running or after it has finished.
 * @param jobID The job identifier of a job.
 * @param loader The class loader for deserializing the accumulator results.
 * @return A Map containing the accumulator's name and its value.
 */
public Map<String, OptionalFailure<Object>> getAccumulators(JobID jobID, ClassLoader loader) throws Exception {
	ActorGateway jobManagerGateway = getJobManagerGateway();

	Future<Object> response;
	try {
		response = jobManagerGateway.ask(new RequestAccumulatorResults(jobID), timeout);
	} catch (Exception e) {
		throw new Exception("Failed to query the job manager gateway for accumulators.", e);
	}

	Object result = Await.result(response, timeout);

	if (result instanceof AccumulatorResultsFound) {
		Map<String, SerializedValue<OptionalFailure<Object>>> serializedAccumulators =
				((AccumulatorResultsFound) result).result();

		return AccumulatorHelper.deserializeAccumulators(serializedAccumulators, loader);

	} else if (result instanceof AccumulatorResultsErroneous) {
		throw ((AccumulatorResultsErroneous) result).cause();
	} else {
		throw new Exception("Failed to fetch accumulators for the job " + jobID + ".");
	}
}
 
Example #11
Source File: JobDeployerTest.java    From AthenaX with Apache License 2.0 6 votes vote down vote up
@Test
public void testDeployerWithIsolatedConfiguration() throws Exception {
  YarnClusterConfiguration clusterConf = mock(YarnClusterConfiguration.class);
  doReturn(new YarnConfiguration()).when(clusterConf).conf();
  ScheduledExecutorService executor = mock(ScheduledExecutorService.class);
  Configuration flinkConf = new Configuration();
  YarnClient client = mock(YarnClient.class);
  JobDeployer deploy = new JobDeployer(clusterConf, client, executor, flinkConf);
  AthenaXYarnClusterDescriptor desc = mock(AthenaXYarnClusterDescriptor.class);

  YarnClusterClient clusterClient = mock(YarnClusterClient.class);
  doReturn(clusterClient).when(desc).deploy();

  ActorGateway actorGateway = mock(ActorGateway.class);
  doReturn(actorGateway).when(clusterClient).getJobManagerGateway();
  doReturn(Future$.MODULE$.successful(null)).when(actorGateway).ask(any(), any());

  JobGraph jobGraph = mock(JobGraph.class);
  doReturn(JobID.generate()).when(jobGraph).getJobID();
  deploy.start(desc, jobGraph);

  verify(clusterClient).runDetached(jobGraph, null);
}
 
Example #12
Source File: LeaderRetrievalUtils.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public void notifyLeaderAddress(final String leaderAddress, final UUID leaderSessionID) {
	if(leaderAddress != null && !leaderAddress.equals("") && !futureActorGateway.isCompleted()) {
		AkkaUtils.getActorRefFuture(leaderAddress, actorSystem, timeout)
			.map(new Mapper<ActorRef, ActorGateway>() {
				public ActorGateway apply(ActorRef ref) {
					return new AkkaActorGateway(ref, leaderSessionID);
				}
			}, actorSystem.dispatcher())
			.onComplete(new OnComplete<ActorGateway>() {
				@Override
				public void onComplete(Throwable failure, ActorGateway success) throws Throwable {
					if (failure == null) {
						completePromise(success);
					} else {
						LOG.debug("Could not retrieve the leader for address " + leaderAddress + ".", failure);
					}
				}
			}, actorSystem.dispatcher());
	}
}
 
Example #13
Source File: LeaderRetrievalUtils.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Retrieves the current leader gateway using the given {@link LeaderRetrievalService}. If the
 * current leader could not be retrieved after the given timeout, then a
 * {@link LeaderRetrievalException} is thrown.
 *
 * @param leaderRetrievalService {@link LeaderRetrievalService} which is used for the leader retrieval
 * @param actorSystem ActorSystem which is used for the {@link LeaderRetrievalListener} implementation
 * @param timeout Timeout value for the retrieval call
 * @return The current leader gateway
 * @throws LeaderRetrievalException If the actor gateway could not be retrieved or the timeout has been exceeded
 */
public static ActorGateway retrieveLeaderGateway(
		LeaderRetrievalService leaderRetrievalService,
		ActorSystem actorSystem,
		FiniteDuration timeout)
	throws LeaderRetrievalException {
	LeaderGatewayListener listener = new LeaderGatewayListener(actorSystem, timeout);

	try {
		leaderRetrievalService.start(listener);

		Future<ActorGateway> actorGatewayFuture = listener.getActorGatewayFuture();

		return Await.result(actorGatewayFuture, timeout);
	} catch (Exception e) {
		throw new LeaderRetrievalException("Could not retrieve the leader gateway.", e);
	} finally {
		try {
			leaderRetrievalService.stop();
		} catch (Exception fe) {
			LOG.warn("Could not stop the leader retrieval service.", fe);
		}
	}
}
 
Example #14
Source File: LocalStreamEnvironmentWithAsyncExecution.java    From flink-crawler with Apache License 2.0 6 votes vote down vote up
/**
 * Stop the <jobID> job. This should be called even if isRunning() returns false, so that the LocalFlinkMiniCluster
 * will be terminated.
 * 
 * @param jobID
 * @throws Exception
 */
public void stop(JobID jobID) throws Exception {
    // Try to cancel the job.
    ActorGateway leader = _exec.getLeaderGateway(_exec.timeout());
    Future<Object> response = leader.ask(new JobManagerMessages.CancelJob(jobID),
            _exec.timeout());

    Object result = Await.result(response, _exec.timeout());
    if (result instanceof CancellationSuccess) {
        // All good.
    } else if (result instanceof CancellationFailure) {
        CancellationFailure failure = (CancellationFailure) result;
        throw new RuntimeException("Failure cancelling job", failure.cause());
    } else {
        throw new RuntimeException("Unexpected result of cancelling job: " + result);
    }
}
 
Example #15
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the {@link ActorGateway} of the current job manager leader using
 * the {@link LeaderRetrievalService}.
 *
 * @return ActorGateway of the current job manager leader
 * @throws Exception
 */
public ActorGateway getJobManagerGateway() throws FlinkException {
	log.debug("Looking up JobManager");

	try {
		return LeaderRetrievalUtils.retrieveLeaderGateway(
			highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID),
			actorSystemLoader.get(),
			lookupTimeout);
	} catch (LeaderRetrievalException lre) {
		throw new FlinkException("Could not connect to the leading JobManager. Please check that the " +
			"JobManager is running.", lre);
	}
}
 
Example #16
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tells the JobManager to finish the sessions (jobs) defined by the given IDs.
 *
 * @param jobIds The IDs that identify the sessions.
 */
public void endSessions(List<JobID> jobIds) throws Exception {
	if (jobIds == null) {
		throw new IllegalArgumentException("The JobIDs must not be null");
	}

	ActorGateway jobManagerGateway = getJobManagerGateway();

	for (JobID jid : jobIds) {
		if (jid != null) {
			log.info("Telling job manager to end the session {}.", jid);
			jobManagerGateway.tell(new JobManagerMessages.RemoveCachedJob(jid));
		}
	}
}
 
Example #17
Source File: LeaderRetrievalUtils.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void completePromise(ActorGateway gateway) {
	synchronized (lock) {
		if (!futureActorGateway.isCompleted()) {
			futureActorGateway.success(gateway);
		}
	}
}
 
Example #18
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public CompletableFuture<Acknowledge> disposeSavepoint(String savepointPath) throws FlinkException {
	final ActorGateway jobManager = getJobManagerGateway();

	Object msg = new JobManagerMessages.DisposeSavepoint(savepointPath);
	CompletableFuture<Object> responseFuture = FutureUtils.toJava(
		jobManager.ask(
			msg,
			timeout));

	return responseFuture.thenApply(
		(Object response) -> {
			if (response instanceof JobManagerMessages.DisposeSavepointSuccess$) {
				return Acknowledge.get();
			} else if (response instanceof JobManagerMessages.DisposeSavepointFailure) {
				JobManagerMessages.DisposeSavepointFailure failureResponse = (JobManagerMessages.DisposeSavepointFailure) response;

				if (failureResponse.cause() instanceof ClassNotFoundException) {
					throw new CompletionException(
						new ClassNotFoundException("Savepoint disposal failed, because of a " +
							"missing class. This is most likely caused by a custom state " +
							"instance, which cannot be disposed without the user code class " +
							"loader. Please provide the program jar with which you have created " +
							"the savepoint via -j <JAR> for disposal.",
							failureResponse.cause().getCause()));
				} else {
					throw new CompletionException(failureResponse.cause());
				}
			} else {
				throw new CompletionException(new FlinkRuntimeException("Unknown response type " + response.getClass().getSimpleName() + '.'));
			}
		});
}
 
Example #19
Source File: ActorGatewayKvStateRegistryListener.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public ActorGatewayKvStateRegistryListener(
	ActorGateway jobManager,
	InetSocketAddress kvStateServerAddress) {

	this.jobManager = Preconditions.checkNotNull(jobManager, "JobManager");
	this.kvStateServerAddress = Preconditions.checkNotNull(kvStateServerAddress, "ServerAddress");
}
 
Example #20
Source File: TaskInputSplitProvider.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public TaskInputSplitProvider(
	ActorGateway jobManager,
	JobID jobID,
	JobVertexID vertexID,
	ExecutionAttemptID executionID,
	FiniteDuration timeout) {

	this.jobManager = Preconditions.checkNotNull(jobManager);
	this.jobID = Preconditions.checkNotNull(jobID);
	this.vertexID = Preconditions.checkNotNull(vertexID);
	this.executionID = Preconditions.checkNotNull(executionID);
	this.timeout = Preconditions.checkNotNull(timeout);
}
 
Example #21
Source File: ActorGatewayResultPartitionConsumableNotifier.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public ActorGatewayResultPartitionConsumableNotifier(
	ExecutionContext executionContext,
	ActorGateway jobManager,
	FiniteDuration jobManagerMessageTimeout) {

	this.executionContext = Preconditions.checkNotNull(executionContext);
	this.jobManager = Preconditions.checkNotNull(jobManager);
	this.jobManagerMessageTimeout = Preconditions.checkNotNull(jobManagerMessageTimeout);
}
 
Example #22
Source File: LocalStreamEnvironmentWithAsyncExecution.java    From flink-crawler with Apache License 2.0 5 votes vote down vote up
/**
 * Return whether <jobID> is currently running.
 * 
 * @param jobID
 * @return true if running.
 * @throws Exception
 */
public boolean isRunning(JobID jobID) throws Exception {
    ActorGateway leader = _exec.getLeaderGateway(_exec.timeout());
    Future<Object> response = leader.ask(new JobManagerMessages.RequestJobStatus(jobID),
            _exec.timeout());
    Object result = Await.result(response, _exec.timeout());
    if (result instanceof CurrentJobStatus) {
        JobStatus jobStatus = ((CurrentJobStatus) result).status();
        return !jobStatus.isGloballyTerminalState();
    } else if (response instanceof JobNotFound) {
        return false;
    } else {
        throw new RuntimeException("Unexpected response to job status: " + result);
    }
}
 
Example #23
Source File: ActorGatewayKvStateLocationOracle.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public ActorGatewayKvStateLocationOracle(
		ActorGateway jobManagerActorGateway,
		Time timeout) {
	this.jobManagerActorGateway = Preconditions.checkNotNull(jobManagerActorGateway);

	Preconditions.checkNotNull(timeout);
	this.timeout = FiniteDuration.apply(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);
}
 
Example #24
Source File: TaskManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskManager sends a proper exception back to the sender if the stop task
 * message fails.
 */
@Test
public void testStopTaskFailure() throws Exception {
	ActorGateway jobManager = null;
	ActorGateway taskManager = null;

	try {
		final ExecutionAttemptID executionAttemptId = new ExecutionAttemptID();

		ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, LEADER_SESSION_ID));
		jobManager = new AkkaActorGateway(jm, LEADER_SESSION_ID);

		highAvailabilityServices.setJobMasterLeaderRetriever(
			HighAvailabilityServices.DEFAULT_JOB_ID,
			new StandaloneLeaderRetrievalService(jobManager.path(), jobManager.leaderSessionID()));

		taskManager = TestingUtils.createTaskManager(
			system,
			highAvailabilityServices,
			new Configuration(),
			true,
			true);

		TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(
			new JobID(),
			"test job",
			new JobVertexID(),
			executionAttemptId,
			new SerializedValue<>(new ExecutionConfig()),
			"test task",
			1,
			0,
			1,
			0,
			new Configuration(),
			new Configuration(),
			BlockingNoOpInvokable.class.getName(),
			Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
			Collections.<InputGateDeploymentDescriptor>emptyList(),
			Collections.emptyList(),
			Collections.emptyList(),
			0);

		Future<Object> submitResponse = taskManager.ask(new SubmitTask(tdd), timeout);

		Await.result(submitResponse, timeout);

		final Future<Object> taskRunning = taskManager.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(executionAttemptId), timeout);

		Await.result(taskRunning, timeout);

		Future<Object> stopResponse = taskManager.ask(new StopTask(executionAttemptId), timeout);

		try {
			Await.result(stopResponse, timeout);

			fail("The stop task message should have failed.");
		} catch (UnsupportedOperationException e) {
			// expected
		}
	} finally {
		TestingUtils.stopActor(jobManager);
		TestingUtils.stopActor(taskManager);
	}
}
 
Example #25
Source File: TaskManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskManager sends a proper exception back to the sender if the trigger stack
 * trace message fails.
 */
@Test
public void testUpdateTaskInputPartitionsFailure() throws Exception {
	ActorGateway jobManager = null;
	ActorGateway taskManager = null;

	try {

		final ExecutionAttemptID executionAttemptId = new ExecutionAttemptID();

		ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, LEADER_SESSION_ID));
		jobManager = new AkkaActorGateway(jm, LEADER_SESSION_ID);

		highAvailabilityServices.setJobMasterLeaderRetriever(
			HighAvailabilityServices.DEFAULT_JOB_ID,
			new StandaloneLeaderRetrievalService(jobManager.path(), jobManager.leaderSessionID()));

		taskManager = TestingUtils.createTaskManager(
			system,
			highAvailabilityServices,
			new Configuration(),
			true,
			true);

		TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(
			new JobID(),
			"test job",
			new JobVertexID(),
			executionAttemptId,
			new SerializedValue<>(new ExecutionConfig()),
			"test task",
			1,
			0,
			1,
			0,
			new Configuration(),
			new Configuration(),
			BlockingNoOpInvokable.class.getName(),
			Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
			Collections.<InputGateDeploymentDescriptor>emptyList(),
			Collections.emptyList(),
			Collections.emptyList(),
			0);

		Future<Object> submitResponse = taskManager.ask(new SubmitTask(tdd), timeout);

		Await.result(submitResponse, timeout);

		Future<Object> partitionUpdateResponse = taskManager.ask(
			new TaskMessages.UpdateTaskSinglePartitionInfo(
				executionAttemptId,
				new IntermediateDataSetID(),
				new InputChannelDeploymentDescriptor(new ResultPartitionID(), ResultPartitionLocation.createLocal())),
			timeout);

		try {
			Await.result(partitionUpdateResponse, timeout);

			fail("The update task input partitions message should have failed.");
		} catch (Exception e) {
			// expected
		}
	} finally {
		TestingUtils.stopActor(jobManager);
		TestingUtils.stopActor(taskManager);
	}
}
 
Example #26
Source File: TaskManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskManager sends a proper exception back to the sender if the trigger stack
 * trace message fails.
 */
@Test
public void testStackTraceSampleFailure() throws Exception {
	ActorGateway jobManager = null;
	ActorGateway taskManager = null;

	try {

		ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, LEADER_SESSION_ID));
		jobManager = new AkkaActorGateway(jm, LEADER_SESSION_ID);

		highAvailabilityServices.setJobMasterLeaderRetriever(
			HighAvailabilityServices.DEFAULT_JOB_ID,
			new StandaloneLeaderRetrievalService(jobManager.path(), jobManager.leaderSessionID()));

		taskManager = TestingUtils.createTaskManager(
			system,
			highAvailabilityServices,
			new Configuration(),
			true,
			true);

		Future<Object> stackTraceResponse = taskManager.ask(
			new TriggerStackTraceSample(
				0,
				new ExecutionAttemptID(),
				0,
				Time.milliseconds(1L),
				0),
			timeout);

		try {
			Await.result(stackTraceResponse, timeout);

			fail("The trigger stack trace message should have failed.");
		} catch (IllegalStateException e) {
			// expected
		}
	} finally {
		TestingUtils.stopActor(jobManager);
		TestingUtils.stopActor(taskManager);
	}
}
 
Example #27
Source File: TaskManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the TaskManager sends a proper exception back to the sender if the submit task
 * message fails.
 */
@Test
public void testSubmitTaskFailure() throws Exception {
	ActorGateway jobManager = null;
	ActorGateway taskManager = null;

	try {

		ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, LEADER_SESSION_ID));
		jobManager = new AkkaActorGateway(jm, LEADER_SESSION_ID);

		highAvailabilityServices.setJobMasterLeaderRetriever(
			HighAvailabilityServices.DEFAULT_JOB_ID,
			new StandaloneLeaderRetrievalService(jobManager.path(), jobManager.leaderSessionID()));

		taskManager = TestingUtils.createTaskManager(
			system,
			highAvailabilityServices,
			new Configuration(),
			true,
			true);

		TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(
			new JobID(),
			"test job",
			new JobVertexID(),
			new ExecutionAttemptID(),
			new SerializedValue<>(new ExecutionConfig()),
			"test task",
			0, // this will make the submission fail because the number of key groups must be >= 1
			0,
			1,
			0,
			new Configuration(),
			new Configuration(),
			"Foobar",
			Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
			Collections.<InputGateDeploymentDescriptor>emptyList(),
			Collections.emptyList(),
			Collections.emptyList(),
			0);

		Future<Object> submitResponse = taskManager.ask(new SubmitTask(tdd), timeout);

		try {
			Await.result(submitResponse, timeout);

			fail("The submit task message should have failed.");
		} catch (IllegalArgumentException e) {
			// expected
		}
	} finally {
		TestingUtils.stopActor(jobManager);
		TestingUtils.stopActor(taskManager);
	}
}
 
Example #28
Source File: TaskManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Test that a failing schedule or update consumers call leads to the failing of the respective
 * task.
 *
 * <p>IMPORTANT: We have to make sure that the invokable's cancel method is called, because only
 * then the future is completed. We do this by not eagerly deploy consumer tasks and requiring
 * the invokable to fill one memory segment. The completed memory segment will trigger the
 * scheduling of the downstream operator since it is in pipeline mode. After we've filled the
 * memory segment, we'll block the invokable and wait for the task failure due to the failed
 * schedule or update consumers call.
 */
@Test(timeout = 10000L)
public void testFailingScheduleOrUpdateConsumersMessage() throws Exception {
	new JavaTestKit(system) {{
		final Configuration configuration = new Configuration();

		// set the memory segment to the smallest size possible, because we have to fill one
		// memory buffer to trigger the schedule or update consumers message to the downstream
		// operators
		configuration.setString(TaskManagerOptions.MEMORY_SEGMENT_SIZE, "4096");

		final JobID jid = new JobID();
		final JobVertexID vid = new JobVertexID();
		final ExecutionAttemptID eid = new ExecutionAttemptID();
		final SerializedValue<ExecutionConfig> executionConfig = new SerializedValue<>(new ExecutionConfig());

		final ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor = new ResultPartitionDeploymentDescriptor(
			new IntermediateDataSetID(),
			new IntermediateResultPartitionID(),
			ResultPartitionType.PIPELINED,
			1,
			1,
			true);

		final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, executionConfig,
			"TestTask", 1, 0, 1, 0, new Configuration(), new Configuration(),
			TestInvokableRecordCancel.class.getName(),
			Collections.singletonList(resultPartitionDeploymentDescriptor),
			Collections.<InputGateDeploymentDescriptor>emptyList(),
			new ArrayList<>(), Collections.emptyList(), 0);

		ActorRef jmActorRef = system.actorOf(Props.create(FailingScheduleOrUpdateConsumersJobManager.class, LEADER_SESSION_ID), "jobmanager");
		ActorGateway jobManager = new AkkaActorGateway(jmActorRef, LEADER_SESSION_ID);

		highAvailabilityServices.setJobMasterLeaderRetriever(
			HighAvailabilityServices.DEFAULT_JOB_ID,
			new StandaloneLeaderRetrievalService(jobManager.path(), jobManager.leaderSessionID()));

		final ActorGateway taskManager = TestingUtils.createTaskManager(
			system,
			highAvailabilityServices,
			configuration,
			true,
			true);

		try {
			TestInvokableRecordCancel.resetGotCanceledFuture();

			Future<Object> result = taskManager.ask(new SubmitTask(tdd), timeout);

			Await.result(result, timeout);

			CompletableFuture<Boolean> cancelFuture = TestInvokableRecordCancel.gotCanceled();

			assertEquals(true, cancelFuture.get());
		} finally {
			TestingUtils.stopActor(taskManager);
			TestingUtils.stopActor(jobManager);
		}
	}};
}
 
Example #29
Source File: TaskManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testLogNotFoundHandling() throws Exception {

	new JavaTestKit(system){{

		// we require a JobManager so that the BlobService is also started
		ActorGateway jobManager = null;
		ActorGateway taskManager = null;

		try {

			// Create the JM
			ActorRef jm = system.actorOf(Props.create(
				new SimplePartitionStateLookupJobManagerCreator(LEADER_SESSION_ID, getTestActor())));

			jobManager = new AkkaActorGateway(jm, LEADER_SESSION_ID);

			final int dataPort = NetUtils.getAvailablePort();
			Configuration config = new Configuration();
			config.setInteger(TaskManagerOptions.DATA_PORT, dataPort);
			config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
			config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);
			config.setString(ConfigConstants.TASK_MANAGER_LOG_PATH_KEY, "/i/dont/exist");

			highAvailabilityServices.setJobMasterLeaderRetriever(
				HighAvailabilityServices.DEFAULT_JOB_ID,
				new StandaloneLeaderRetrievalService(jobManager.path(), jobManager.leaderSessionID()));

			taskManager = TestingUtils.createTaskManager(
				system,
				highAvailabilityServices,
				config,
				false,
				true);

			// ---------------------------------------------------------------------------------

			final ActorGateway tm = taskManager;

			new Within(d) {
				@Override
				protected void run() {
					Future<Object> logFuture = tm.ask(TaskManagerMessages.getRequestTaskManagerLog(), timeout);
					try {
						Await.result(logFuture, timeout);
						Assert.fail();
					} catch (Exception e) {
						Assert.assertTrue(e.getMessage().startsWith("TaskManager log files are unavailable. Log file could not be found at"));
					}
				}
			};
		} finally {
			TestingUtils.stopActor(taskManager);
			TestingUtils.stopActor(jobManager);
		}
	}};
}
 
Example #30
Source File: TaskManagerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 *  Tests that repeated local {@link PartitionNotFoundException}s ultimately fail the receiver.
 */
@Test
public void testLocalPartitionNotFound() throws Exception {

	new JavaTestKit(system){{

		ActorGateway jobManager = null;
		ActorGateway taskManager = null;

		final ActorGateway testActorGateway = new AkkaActorGateway(
				getTestActor(),
			LEADER_SESSION_ID);

		try {
			final IntermediateDataSetID resultId = new IntermediateDataSetID();

			// Create the JM
			ActorRef jm = system.actorOf(Props.create(
					new SimplePartitionStateLookupJobManagerCreator(LEADER_SESSION_ID, getTestActor())));

			jobManager = new AkkaActorGateway(jm, LEADER_SESSION_ID);

			highAvailabilityServices.setJobMasterLeaderRetriever(
				HighAvailabilityServices.DEFAULT_JOB_ID,
				new StandaloneLeaderRetrievalService(jobManager.path(), jobManager.leaderSessionID()));

			final Configuration config = new Configuration();
			config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_INITIAL, 100);
			config.setInteger(TaskManagerOptions.NETWORK_REQUEST_BACKOFF_MAX, 200);

			taskManager = TestingUtils.createTaskManager(
					system,
					highAvailabilityServices,
					config,
					true,
					true);

			// ---------------------------------------------------------------------------------

			final ActorGateway tm = taskManager;

			final JobID jid = new JobID();
			final JobVertexID vid = new JobVertexID();
			final ExecutionAttemptID eid = new ExecutionAttemptID();

			final ResultPartitionID partitionId = new ResultPartitionID();

			// Local location (on the same TM though) for the partition
			final ResultPartitionLocation loc = ResultPartitionLocation.createLocal();

			final InputChannelDeploymentDescriptor[] icdd =
					new InputChannelDeploymentDescriptor[] {
							new InputChannelDeploymentDescriptor(partitionId, loc)};

			final InputGateDeploymentDescriptor igdd =
					new InputGateDeploymentDescriptor(resultId, ResultPartitionType.PIPELINED, 0, icdd);

			final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(
					jid, "TestJob", vid, eid,
					new SerializedValue<>(new ExecutionConfig()),
					"Receiver", 1, 0, 1, 0,
					new Configuration(), new Configuration(),
					Tasks.AgnosticReceiver.class.getName(),
					Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
					Collections.singletonList(igdd),
					Collections.emptyList(),
					Collections.emptyList(), 0);

			new Within(new FiniteDuration(120, TimeUnit.SECONDS)) {
				@Override
				protected void run() {
					// Submit the task
					tm.tell(new SubmitTask(tdd), testActorGateway);
					expectMsgClass(Acknowledge.get().getClass());

					// Wait to be notified about the final execution state by the mock JM
					TaskExecutionState msg = expectMsgClass(TaskExecutionState.class);

					// The task should fail after repeated requests
					assertEquals(msg.getExecutionState(), ExecutionState.FAILED);

					Throwable error = msg.getError(getClass().getClassLoader());
					if (error.getClass() != PartitionNotFoundException.class) {
						error.printStackTrace();
						fail("Wrong exception: " + error.getMessage());
					}
				}
			};
		}
		catch (Exception e) {
			e.printStackTrace();
			fail(e.getMessage());
		}
		finally {
			TestingUtils.stopActor(taskManager);
			TestingUtils.stopActor(jobManager);
		}
	}};
}