Java Code Examples for org.apache.flink.util.ExceptionUtils#findThrowable()

The following examples show how to use org.apache.flink.util.ExceptionUtils#findThrowable() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RetryRequestFailureHandler.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
@Override
public void onFailure(ActionRequest actionRequest, Throwable throwable, int i, RequestIndexer requestIndexer) throws Throwable {
    if (ExceptionUtils.findThrowable(throwable, EsRejectedExecutionException.class).isPresent()) {
        requestIndexer.add(new ActionRequest[]{actionRequest});
    } else {
        if (ExceptionUtils.findThrowable(throwable, SocketTimeoutException.class).isPresent()) {
            return;
        } else {
            Optional<IOException> exp = ExceptionUtils.findThrowable(throwable, IOException.class);
            if (exp.isPresent()) {
                IOException ioExp = exp.get();
                if (ioExp != null && ioExp.getMessage() != null && ioExp.getMessage().contains("max retry timeout")) {
                    log.error(ioExp.getMessage());
                    return;
                }
            }
        }
        throw throwable;
    }
}
 
Example 2
Source File: AbstractAsynchronousOperationHandlersTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that an querying an unknown trigger id will return an exceptionally completed
 * future.
 */
@Test
public void testUnknownTriggerId() throws Exception {
	final TestingRestfulGateway testingRestfulGateway = new TestingRestfulGateway.Builder().build();

	try {
		testingStatusHandler.handleRequest(
			statusOperationRequest(new TriggerId()),
			testingRestfulGateway).get();

		fail("This should have failed with a RestHandlerException.");
	} catch (ExecutionException ee) {
		final Optional<RestHandlerException> optionalRestHandlerException = ExceptionUtils.findThrowable(ee, RestHandlerException.class);

		assertThat(optionalRestHandlerException.isPresent(), is(true));

		final RestHandlerException restHandlerException = optionalRestHandlerException.get();

		assertThat(restHandlerException.getMessage(), containsString("Operation not found"));
		assertThat(restHandlerException.getHttpResponseStatus(), is(HttpResponseStatus.NOT_FOUND));
	}
}
 
Example 3
Source File: AbstractAsynchronousOperationHandlersTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that an querying an unknown trigger id will return an exceptionally completed
 * future.
 */
@Test
public void testUnknownTriggerId() throws Exception {
	final TestingRestfulGateway testingRestfulGateway = new TestingRestfulGateway.Builder().build();

	try {
		testingStatusHandler.handleRequest(
			statusOperationRequest(new TriggerId()),
			testingRestfulGateway).get();

		fail("This should have failed with a RestHandlerException.");
	} catch (ExecutionException ee) {
		final Optional<RestHandlerException> optionalRestHandlerException = ExceptionUtils.findThrowable(ee, RestHandlerException.class);

		assertThat(optionalRestHandlerException.isPresent(), is(true));

		final RestHandlerException restHandlerException = optionalRestHandlerException.get();

		assertThat(restHandlerException.getMessage(), containsString("Operation not found"));
		assertThat(restHandlerException.getHttpResponseStatus(), is(HttpResponseStatus.NOT_FOUND));
	}
}
 
Example 4
Source File: AbstractAsynchronousOperationHandlersTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that an querying an unknown trigger id will return an exceptionally completed
 * future.
 */
@Test
public void testUnknownTriggerId() throws Exception {
	final TestingRestfulGateway testingRestfulGateway = new TestingRestfulGateway.Builder().build();

	try {
		testingStatusHandler.handleRequest(
			statusOperationRequest(new TriggerId()),
			testingRestfulGateway).get();

		fail("This should have failed with a RestHandlerException.");
	} catch (ExecutionException ee) {
		final Optional<RestHandlerException> optionalRestHandlerException = ExceptionUtils.findThrowable(ee, RestHandlerException.class);

		assertThat(optionalRestHandlerException.isPresent(), is(true));

		final RestHandlerException restHandlerException = optionalRestHandlerException.get();

		assertThat(restHandlerException.getMessage(), containsString("Operation not found"));
		assertThat(restHandlerException.getHttpResponseStatus(), is(HttpResponseStatus.NOT_FOUND));
	}
}
 
Example 5
Source File: RetryRequestFailureHandler.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
@Override
public void onFailure(ActionRequest actionRequest, Throwable throwable, int i, RequestIndexer requestIndexer) throws Throwable {
    if (ExceptionUtils.findThrowable(throwable, EsRejectedExecutionException.class).isPresent()) {
        requestIndexer.add(new ActionRequest[]{actionRequest});
    } else {
        if (ExceptionUtils.findThrowable(throwable, SocketTimeoutException.class).isPresent()) {
            return;
        } else {
            Optional<IOException> exp = ExceptionUtils.findThrowable(throwable, IOException.class);
            if (exp.isPresent()) {
                IOException ioExp = exp.get();
                if (ioExp != null && ioExp.getMessage() != null && ioExp.getMessage().contains("max retry timeout")) {
                    log.error(ioExp.getMessage());
                    return;
                }
            }
        }
        throw throwable;
    }
}
 
Example 6
Source File: JobRetrievalITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testNonExistingJobRetrieval() throws Exception {
	final JobID jobID = new JobID();

	try {
		client.requestJobResult(jobID).get();
		fail();
	} catch (Exception exception) {
		Optional<Throwable> expectedCause = ExceptionUtils.findThrowable(exception,
			candidate -> candidate.getMessage() != null && candidate.getMessage().contains("Could not find Flink job"));
		if (!expectedCause.isPresent()) {
			throw exception;
		}
	}
}
 
Example 7
Source File: RestartPipelinedRegionFailoverStrategy.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a set of IDs corresponding to the set of vertices that should be restarted.
 * In this strategy, all task vertices in 'involved' regions are proposed to be restarted.
 * The 'involved' regions are calculated with rules below:
 * 1. The region containing the failed task is always involved
 * 2. If an input result partition of an involved region is not available, i.e. Missing or Corrupted,
 *    the region containing the partition producer task is involved
 * 3. If a region is involved, all of its consumer regions are involved
 *
 * @param executionVertexId ID of the failed task
 * @param cause cause of the failure
 * @return set of IDs of vertices to restart
 */
@Override
public Set<ExecutionVertexID> getTasksNeedingRestart(ExecutionVertexID executionVertexId, Throwable cause) {
	LOG.info("Calculating tasks to restart to recover the failed task {}.", executionVertexId);

	final SchedulingPipelinedRegion failedRegion = topology.getPipelinedRegionOfVertex(executionVertexId);
	if (failedRegion == null) {
		// TODO: show the task name in the log
		throw new IllegalStateException("Can not find the failover region for task " + executionVertexId, cause);
	}

	// if the failure cause is data consumption error, mark the corresponding data partition to be failed,
	// so that the failover process will try to recover it
	Optional<PartitionException> dataConsumptionException = ExceptionUtils.findThrowable(
		cause, PartitionException.class);
	if (dataConsumptionException.isPresent()) {
		resultPartitionAvailabilityChecker.markResultPartitionFailed(
			dataConsumptionException.get().getPartitionId().getPartitionId());
	}

	// calculate the tasks to restart based on the result of regions to restart
	Set<ExecutionVertexID> tasksToRestart = new HashSet<>();
	for (SchedulingPipelinedRegion region : getRegionsToRestart(failedRegion)) {
		region.getVertices().forEach(vertex -> tasksToRestart.add(vertex.getId()));
	}

	// the previous failed partition will be recovered. remove its failed state from the checker
	if (dataConsumptionException.isPresent()) {
		resultPartitionAvailabilityChecker.removeResultPartitionFromFailedState(
			dataConsumptionException.get().getPartitionId().getPartitionId());
	}

	LOG.info("{} tasks should be restarted to recover the failed task {}. ", tasksToRestart.size(), executionVertexId);
	return tasksToRestart;
}
 
Example 8
Source File: JobSubmitHandlerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRejectionOnCountMismatch() throws Exception {
	final Path jobGraphFile = TEMPORARY_FOLDER.newFile().toPath();
	try (ObjectOutputStream objectOut = new ObjectOutputStream(Files.newOutputStream(jobGraphFile))) {
		objectOut.writeObject(new JobGraph("testjob"));
	}
	final Path countExceedingFile = TEMPORARY_FOLDER.newFile().toPath();

	TestingDispatcherGateway.Builder builder = new TestingDispatcherGateway.Builder();
	builder
		.setBlobServerPort(blobServer.getPort())
		.setSubmitFunction(jobGraph -> CompletableFuture.completedFuture(Acknowledge.get()))
		.setHostname("localhost");
	DispatcherGateway mockGateway = builder.build();

	JobSubmitHandler handler = new JobSubmitHandler(
		() -> CompletableFuture.completedFuture(mockGateway),
		RpcUtils.INF_TIMEOUT,
		Collections.emptyMap(),
		TestingUtils.defaultExecutor(),
		configuration);

	JobSubmitRequestBody request = new JobSubmitRequestBody(jobGraphFile.getFileName().toString(), Collections.emptyList(), Collections.emptyList());

	try {
		handler.handleRequest(new HandlerRequest<>(request, EmptyMessageParameters.getInstance(), Collections.emptyMap(), Collections.emptyMap(), Arrays.asList(jobGraphFile.toFile(), countExceedingFile.toFile())), mockGateway)
			.get();
	} catch (Exception e) {
		ExceptionUtils.findThrowable(e, candidate -> candidate instanceof RestHandlerException && candidate.getMessage().contains("count"));
	}
}
 
Example 9
Source File: RestartPipelinedRegionStrategy.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a set of IDs corresponding to the set of vertices that should be restarted.
 * In this strategy, all task vertices in 'involved' regions are proposed to be restarted.
 * The 'involved' regions are calculated with rules below:
 * 1. The region containing the failed task is always involved
 * 2. If an input result partition of an involved region is not available, i.e. Missing or Corrupted,
 *    the region containing the partition producer task is involved
 * 3. If a region is involved, all of its consumer regions are involved
 *
 * @param executionVertexId ID of the failed task
 * @param cause cause of the failure
 * @return set of IDs of vertices to restart
 */
@Override
public Set<ExecutionVertexID> getTasksNeedingRestart(ExecutionVertexID executionVertexId, Throwable cause) {
	LOG.info("Calculating tasks to restart to recover the failed task {}.", executionVertexId);

	final FailoverRegion failedRegion = vertexToRegionMap.get(executionVertexId);
	if (failedRegion == null) {
		// TODO: show the task name in the log
		throw new IllegalStateException("Can not find the failover region for task " + executionVertexId, cause);
	}

	// if the failure cause is data consumption error, mark the corresponding data partition to be failed,
	// so that the failover process will try to recover it
	Optional<PartitionException> dataConsumptionException = ExceptionUtils.findThrowable(
		cause, PartitionException.class);
	if (dataConsumptionException.isPresent()) {
		resultPartitionAvailabilityChecker.markResultPartitionFailed(
			dataConsumptionException.get().getPartitionId().getPartitionId());
	}

	// calculate the tasks to restart based on the result of regions to restart
	Set<ExecutionVertexID> tasksToRestart = new HashSet<>();
	for (FailoverRegion region : getRegionsToRestart(failedRegion)) {
		tasksToRestart.addAll(region.getAllExecutionVertexIDs());
	}

	// the previous failed partition will be recovered. remove its failed state from the checker
	if (dataConsumptionException.isPresent()) {
		resultPartitionAvailabilityChecker.removeResultPartitionFromFailedState(
			dataConsumptionException.get().getPartitionId().getPartitionId());
	}

	LOG.info("{} tasks should be restarted to recover the failed task {}. ", tasksToRestart.size(), executionVertexId);
	return tasksToRestart;
}
 
Example 10
Source File: StandaloneApplicationClusterConfigurationParserFactoryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testInvalidJobIdThrows() {
	final String invalidJobId = "0xINVALID";
	final String[] args = {"--configDir", confDirPath, "--job-classname", "foobar", "--job-id", invalidJobId};

	try {
		commandLineParser.parse(args);
		fail("Did not throw expected FlinkParseException");
	} catch (FlinkParseException e) {
		Optional<IllegalArgumentException> cause = ExceptionUtils.findThrowable(e, IllegalArgumentException.class);
		assertTrue(cause.isPresent());
		assertThat(cause.get().getMessage(), containsString(invalidJobId));
	}
}
 
Example 11
Source File: KafkaConsumerTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test that ensures the KafkaConsumer is properly failing if the topic doesnt exist
 * and a wrong broker was specified.
 *
 * @throws Exception
 */
public void runFailOnNoBrokerTest() throws Exception {
	try {
		Properties properties = new Properties();

		StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
		see.getConfig().disableSysoutLogging();
		see.setRestartStrategy(RestartStrategies.noRestart());
		see.setParallelism(1);

		// use wrong ports for the consumers
		properties.setProperty("bootstrap.servers", "localhost:80");
		properties.setProperty("group.id", "test");
		properties.setProperty("request.timeout.ms", "3000"); // let the test fail fast
		properties.setProperty("socket.timeout.ms", "3000");
		properties.setProperty("session.timeout.ms", "2000");
		properties.setProperty("fetch.max.wait.ms", "2000");
		properties.setProperty("heartbeat.interval.ms", "1000");
		properties.putAll(secureProps);
		FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer("doesntexist", new SimpleStringSchema(), properties);
		DataStream<String> stream = see.addSource(source);
		stream.print();
		see.execute("No broker test");
	} catch (JobExecutionException jee) {
		if (kafkaServer.getVersion().equals("0.9") ||
			kafkaServer.getVersion().equals("0.10") ||
			kafkaServer.getVersion().equals("0.11") ||
			kafkaServer.getVersion().equals("2.0")) {
			final Optional<TimeoutException> optionalTimeoutException = ExceptionUtils.findThrowable(jee, TimeoutException.class);
			assertTrue(optionalTimeoutException.isPresent());

			final TimeoutException timeoutException = optionalTimeoutException.get();
			assertEquals("Timeout expired while fetching topic metadata", timeoutException.getMessage());
		} else {
			final Optional<Throwable> optionalThrowable = ExceptionUtils.findThrowableWithMessage(jee, "Unable to retrieve any partitions");
			assertTrue(optionalThrowable.isPresent());
			assertTrue(optionalThrowable.get() instanceof RuntimeException);
		}
	}
}
 
Example 12
Source File: CheckpointCoordinatorTriggeringTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testStopPeriodicScheduler() throws Exception {
	// set up the coordinator and validate the initial state
	CheckpointCoordinator checkpointCoordinator = createCheckpointCoordinator();

	final CompletableFuture<CompletedCheckpoint> onCompletionPromise1 =
		triggerPeriodicCheckpoint(checkpointCoordinator);
	manuallyTriggeredScheduledExecutor.triggerAll();
	try {
		onCompletionPromise1.get();
		fail("The triggerCheckpoint call expected an exception");
	} catch (ExecutionException e) {
		final Optional<CheckpointException> checkpointExceptionOptional =
			ExceptionUtils.findThrowable(e, CheckpointException.class);
		assertTrue(checkpointExceptionOptional.isPresent());
		assertEquals(CheckpointFailureReason.PERIODIC_SCHEDULER_SHUTDOWN,
			checkpointExceptionOptional.get().getCheckpointFailureReason());
	}

	// Not periodic
	final CompletableFuture<CompletedCheckpoint> onCompletionPromise2 = checkpointCoordinator.triggerCheckpoint(
		CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION),
		null,
		false,
		false);
	manuallyTriggeredScheduledExecutor.triggerAll();
	assertFalse(onCompletionPromise2.isCompletedExceptionally());
}
 
Example 13
Source File: JobRetrievalITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testNonExistingJobRetrieval() throws Exception {
	final JobID jobID = new JobID();

	try {
		client.requestJobResult(jobID).get();
		fail();
	} catch (Exception exception) {
		Optional<Throwable> expectedCause = ExceptionUtils.findThrowable(exception,
			candidate -> candidate.getMessage() != null && candidate.getMessage().contains("Could not find Flink job"));
		if (!expectedCause.isPresent()) {
			throw exception;
		}
	}
}
 
Example 14
Source File: JobSubmitHandlerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testRejectionOnCountMismatch() throws Exception {
	final Path jobGraphFile = TEMPORARY_FOLDER.newFile().toPath();
	try (ObjectOutputStream objectOut = new ObjectOutputStream(Files.newOutputStream(jobGraphFile))) {
		objectOut.writeObject(new JobGraph("testjob"));
	}
	final Path countExceedingFile = TEMPORARY_FOLDER.newFile().toPath();

	TestingDispatcherGateway.Builder builder = new TestingDispatcherGateway.Builder();
	builder
		.setBlobServerPort(blobServer.getPort())
		.setSubmitFunction(jobGraph -> CompletableFuture.completedFuture(Acknowledge.get()))
		.setHostname("localhost");
	DispatcherGateway mockGateway = builder.build();

	JobSubmitHandler handler = new JobSubmitHandler(
		() -> CompletableFuture.completedFuture(mockGateway),
		RpcUtils.INF_TIMEOUT,
		Collections.emptyMap(),
		TestingUtils.defaultExecutor(),
		configuration);

	JobSubmitRequestBody request = new JobSubmitRequestBody(jobGraphFile.getFileName().toString(), Collections.emptyList(), Collections.emptyList());

	try {
		handler.handleRequest(new HandlerRequest<>(request, EmptyMessageParameters.getInstance(), Collections.emptyMap(), Collections.emptyMap(), Arrays.asList(jobGraphFile.toFile(), countExceedingFile.toFile())), mockGateway)
			.get();
	} catch (Exception e) {
		ExceptionUtils.findThrowable(e, candidate -> candidate instanceof RestHandlerException && candidate.getMessage().contains("count"));
	}
}
 
Example 15
Source File: ClassLoaderITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCheckpointedStreamingClassloaderJobWithCustomClassLoader() throws ProgramInvocationException {
	// checkpointed streaming job with custom classes for the checkpoint (FLINK-2543)
	// the test also ensures that user specific exceptions are serializable between JobManager <--> JobClient.
	PackagedProgram streamingCheckpointedProg = PackagedProgram.newBuilder()
		.setJarFile(new File(STREAMING_CHECKPOINTED_PROG_JAR_FILE))
		.build();

	TestStreamEnvironment.setAsContext(
		miniClusterResource.getMiniCluster(),
		parallelism,
		Collections.singleton(new Path(STREAMING_CHECKPOINTED_PROG_JAR_FILE)),
		Collections.emptyList());

	try {
		streamingCheckpointedProg.invokeInteractiveModeForExecution();
	} catch (Exception e) {
		// Program should terminate with a 'SuccessException':
		// the exception class is contained in the user-jar, but is not present on the maven classpath
		// the deserialization of the exception should thus fail here
		Optional<Throwable> exception = ExceptionUtils.findThrowable(e,
			candidate -> candidate.getClass().getName().equals("org.apache.flink.test.classloading.jar.CheckpointedStreamingProgram$SuccessException"));

		if (!exception.isPresent()) {
			// if this is achieved, either we failed due to another exception or the user-specific
			// exception is not serialized between JobManager and JobClient.
			throw e;
		}

		try {
			Class.forName(exception.get().getClass().getName());
			fail("Deserialization of user exception should have failed.");
		} catch (ClassNotFoundException expected) {
			// expected
		}
	}
}
 
Example 16
Source File: RestClusterClientTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSubmitJobAndWaitForExecutionResult() throws Exception {
	final TestJobExecutionResultHandler testJobExecutionResultHandler =
		new TestJobExecutionResultHandler(
			new RestHandlerException("should trigger retry", HttpResponseStatus.SERVICE_UNAVAILABLE),
			JobExecutionResultResponseBody.inProgress(),
			JobExecutionResultResponseBody.created(new JobResult.Builder()
				.applicationStatus(ApplicationStatus.SUCCEEDED)
				.jobId(jobId)
				.netRuntime(Long.MAX_VALUE)
				.accumulatorResults(Collections.singletonMap("testName", new SerializedValue<>(OptionalFailure.of(1.0))))
				.build()),
			JobExecutionResultResponseBody.created(new JobResult.Builder()
				.applicationStatus(ApplicationStatus.FAILED)
				.jobId(jobId)
				.netRuntime(Long.MAX_VALUE)
				.serializedThrowable(new SerializedThrowable(new RuntimeException("expected")))
				.build()));

	// fail first HTTP polling attempt, which should not be a problem because of the retries
	final AtomicBoolean firstPollFailed = new AtomicBoolean();
	failHttpRequest = (messageHeaders, messageParameters, requestBody) ->
		messageHeaders instanceof JobExecutionResultHeaders && !firstPollFailed.getAndSet(true);

	try (TestRestServerEndpoint restServerEndpoint = createRestServerEndpoint(
		testJobExecutionResultHandler,
		new TestJobSubmitHandler())) {
		RestClusterClient<?> restClusterClient = createRestClusterClient(restServerEndpoint.getServerAddress().getPort());

		try {
			JobExecutionResult jobExecutionResult;

			jobExecutionResult = ClientUtils.submitJobAndWaitForResult(restClusterClient, jobGraph, ClassLoader.getSystemClassLoader());
			assertThat(jobExecutionResult.getJobID(), equalTo(jobId));
			assertThat(jobExecutionResult.getNetRuntime(), equalTo(Long.MAX_VALUE));
			assertThat(
				jobExecutionResult.getAllAccumulatorResults(),
				equalTo(Collections.singletonMap("testName", 1.0)));

			try {
				ClientUtils.submitJobAndWaitForResult(restClusterClient, jobGraph, ClassLoader.getSystemClassLoader());
				fail("Expected exception not thrown.");
			} catch (final ProgramInvocationException e) {
				final Optional<RuntimeException> cause = ExceptionUtils.findThrowable(e, RuntimeException.class);

				assertThat(cause.isPresent(), is(true));
				assertThat(cause.get().getMessage(), equalTo("expected"));
			}
		} finally {
			restClusterClient.close();
		}
	}
}
 
Example 17
Source File: SavepointITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSubmitWithUnknownSavepointPath() throws Exception {
	// Config
	int numTaskManagers = 1;
	int numSlotsPerTaskManager = 1;
	int parallelism = numTaskManagers * numSlotsPerTaskManager;

	final Configuration config = new Configuration();
	config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointDir.toURI().toString());

	MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(
		new MiniClusterResourceConfiguration.Builder()
			.setConfiguration(config)
			.setNumberTaskManagers(numTaskManagers)
			.setNumberSlotsPerTaskManager(numSlotsPerTaskManager)
			.build());
	cluster.before();
	ClusterClient<?> client = cluster.getClusterClient();

	try {

		// High value to ensure timeouts if restarted.
		int numberOfRetries = 1000;
		// Submit the job
		// Long delay to ensure that the test times out if the job
		// manager tries to restart the job.
		final JobGraph jobGraph = createJobGraph(parallelism, numberOfRetries, 3600000);

		// Set non-existing savepoint path
		jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath("unknown path"));
		assertEquals("unknown path", jobGraph.getSavepointRestoreSettings().getRestorePath());

		LOG.info("Submitting job " + jobGraph.getJobID() + " in detached mode.");

		try {
			client.setDetached(false);
			client.submitJob(jobGraph, SavepointITCase.class.getClassLoader());
		} catch (Exception e) {
			Optional<JobExecutionException> expectedJobExecutionException = ExceptionUtils.findThrowable(e, JobExecutionException.class);
			Optional<FileNotFoundException> expectedFileNotFoundException = ExceptionUtils.findThrowable(e, FileNotFoundException.class);
			if (!(expectedJobExecutionException.isPresent() && expectedFileNotFoundException.isPresent())) {
				throw e;
			}
		}
	} finally {
		cluster.after();
	}
}
 
Example 18
Source File: SavepointITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSubmitWithUnknownSavepointPath() throws Exception {
	// Config
	int numTaskManagers = 1;
	int numSlotsPerTaskManager = 1;
	int parallelism = numTaskManagers * numSlotsPerTaskManager;

	final Configuration config = new Configuration();
	config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointDir.toURI().toString());

	MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(
		new MiniClusterResourceConfiguration.Builder()
			.setConfiguration(config)
			.setNumberTaskManagers(numTaskManagers)
			.setNumberSlotsPerTaskManager(numSlotsPerTaskManager)
			.build());
	cluster.before();
	ClusterClient<?> client = cluster.getClusterClient();

	try {

		// High value to ensure timeouts if restarted.
		int numberOfRetries = 1000;
		// Submit the job
		// Long delay to ensure that the test times out if the job
		// manager tries to restart the job.
		final JobGraph jobGraph = createJobGraph(parallelism, numberOfRetries, 3600000);

		// Set non-existing savepoint path
		jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath("unknown path"));
		assertEquals("unknown path", jobGraph.getSavepointRestoreSettings().getRestorePath());

		LOG.info("Submitting job " + jobGraph.getJobID() + " in detached mode.");

		try {
			ClientUtils.submitJobAndWaitForResult(client, jobGraph, SavepointITCase.class.getClassLoader());
		} catch (Exception e) {
			Optional<JobExecutionException> expectedJobExecutionException = ExceptionUtils.findThrowable(e, JobExecutionException.class);
			Optional<FileNotFoundException> expectedFileNotFoundException = ExceptionUtils.findThrowable(e, FileNotFoundException.class);
			if (!(expectedJobExecutionException.isPresent() && expectedFileNotFoundException.isPresent())) {
				throw e;
			}
		}
	} finally {
		cluster.after();
	}
}
 
Example 19
Source File: CheckpointCoordinatorTriggeringTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTriggerCheckpointSnapshotMasterHookFailed() throws Exception {
	// create some mock Execution vertices that receive the checkpoint trigger messages
	final ExecutionAttemptID attemptID = new ExecutionAttemptID();
	final AtomicInteger taskManagerCheckpointTriggeredTimes = new AtomicInteger(0);
	final SimpleAckingTaskManagerGateway.CheckpointConsumer checkpointConsumer =
		(executionAttemptID,
		jobId, checkpointId,
		timestamp,
		checkpointOptions,
		advanceToEndOfEventTime) -> taskManagerCheckpointTriggeredTimes.incrementAndGet();
	ExecutionVertex vertex = mockExecutionVertex(attemptID, checkpointConsumer);

	// set up the coordinator and validate the initial state
	CheckpointCoordinator checkpointCoordinator = createCheckpointCoordinator(vertex);

	final CompletableFuture<String> masterHookCheckpointFuture = new CompletableFuture<>();
	checkpointCoordinator.addMasterHook(new TestingMasterHook(masterHookCheckpointFuture));
	checkpointCoordinator.startCheckpointScheduler();
	final CompletableFuture<CompletedCheckpoint> onCompletionPromise =
		triggerPeriodicCheckpoint(checkpointCoordinator);

	// checkpoint trigger will not finish since master hook checkpoint is not finished yet
	manuallyTriggeredScheduledExecutor.triggerAll();
	assertTrue(checkpointCoordinator.isTriggering());

	// continue triggering
	masterHookCheckpointFuture.completeExceptionally(new Exception("by design"));

	manuallyTriggeredScheduledExecutor.triggerAll();
	assertFalse(checkpointCoordinator.isTriggering());

	try {
		onCompletionPromise.get();
		fail("Should not reach here");
	} catch (ExecutionException e) {
		final Optional<CheckpointException> checkpointExceptionOptional =
			ExceptionUtils.findThrowable(e, CheckpointException.class);
		assertTrue(checkpointExceptionOptional.isPresent());
		assertEquals(CheckpointFailureReason.TRIGGER_CHECKPOINT_FAILURE,
			checkpointExceptionOptional.get().getCheckpointFailureReason());
	}
	// it doesn't really trigger task manager to do checkpoint
	assertEquals(0, taskManagerCheckpointTriggeredTimes.get());
	assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
}
 
Example 20
Source File: SavepointITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSubmitWithUnknownSavepointPath() throws Exception {
	// Config
	int numTaskManagers = 1;
	int numSlotsPerTaskManager = 1;
	int parallelism = numTaskManagers * numSlotsPerTaskManager;

	final Configuration config = new Configuration();
	config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointDir.toURI().toString());

	MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(
		new MiniClusterResourceConfiguration.Builder()
			.setConfiguration(config)
			.setNumberTaskManagers(numTaskManagers)
			.setNumberSlotsPerTaskManager(numSlotsPerTaskManager)
			.build());
	cluster.before();
	ClusterClient<?> client = cluster.getClusterClient();

	try {

		// High value to ensure timeouts if restarted.
		int numberOfRetries = 1000;
		// Submit the job
		// Long delay to ensure that the test times out if the job
		// manager tries to restart the job.
		final JobGraph jobGraph = createJobGraph(parallelism, numberOfRetries, 3600000);

		// Set non-existing savepoint path
		jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath("unknown path"));
		assertEquals("unknown path", jobGraph.getSavepointRestoreSettings().getRestorePath());

		LOG.info("Submitting job " + jobGraph.getJobID() + " in detached mode.");

		try {
			client.setDetached(false);
			client.submitJob(jobGraph, SavepointITCase.class.getClassLoader());
		} catch (Exception e) {
			Optional<JobExecutionException> expectedJobExecutionException = ExceptionUtils.findThrowable(e, JobExecutionException.class);
			Optional<FileNotFoundException> expectedFileNotFoundException = ExceptionUtils.findThrowable(e, FileNotFoundException.class);
			if (!(expectedJobExecutionException.isPresent() && expectedFileNotFoundException.isPresent())) {
				throw e;
			}
		}
	} finally {
		cluster.after();
	}
}