Java Code Examples for org.apache.flink.client.ClientUtils#submitJob()

The following examples show how to use org.apache.flink.client.ClientUtils#submitJob() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: RestClusterClientTest.java From flink with Apache License 2.0

6 votes

/**
 * Tests that we can submit a jobGraph in detached mode.
 */
@Test
public void testDetachedJobSubmission() throws Exception {

	final TestJobSubmitHandler testJobSubmitHandler = new TestJobSubmitHandler();

	try (TestRestServerEndpoint restServerEndpoint = createRestServerEndpoint(
		testJobSubmitHandler)) {
		RestClusterClient<?> restClusterClient = createRestClusterClient(restServerEndpoint.getServerAddress().getPort());

		try {
			final JobSubmissionResult jobSubmissionResult = ClientUtils.submitJob(restClusterClient, jobGraph);

			// if the detached mode didn't work, then we would not reach this point because the execution result
			// retrieval would have failed.
			assertThat(jobSubmissionResult, is(instanceOf(DetachedJobExecutionResult.class)));
			assertThat(jobSubmissionResult.getJobID(), is(jobId));
		} finally {
			restClusterClient.close();
		}
	}

}

Example 2

Source File: AbstractOperatorRestoreTestBase.java From flink with Apache License 2.0

6 votes

private void restoreJob(ClusterClient<?> clusterClient, Deadline deadline, String savepointPath) throws Exception {
	JobGraph jobToRestore = createJobGraph(ExecutionMode.RESTORE);
	jobToRestore.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath, allowNonRestoredState));

	assertNotNull("Job doesn't have a JobID.", jobToRestore.getJobID());

	ClientUtils.submitJob(clusterClient, jobToRestore);

	CompletableFuture<JobStatus> jobStatusFuture = FutureUtils.retrySuccessfulWithDelay(
		() -> clusterClient.getJobStatus(jobToRestore.getJobID()),
		Time.milliseconds(50),
		deadline,
		(jobStatus) -> jobStatus == JobStatus.FINISHED,
		TestingUtils.defaultScheduledExecutor());
	assertEquals(
		JobStatus.FINISHED,
		jobStatusFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
}

Example 3

Source File: SavepointITCase.java From flink with Apache License 2.0

6 votes

private String submitJobAndTakeSavepoint(MiniClusterResourceFactory clusterFactory, int parallelism) throws Exception {
	final JobGraph jobGraph = createJobGraph(parallelism, 0, 1000);
	final JobID jobId = jobGraph.getJobID();
	StatefulCounter.resetForTest(parallelism);

	MiniClusterWithClientResource cluster = clusterFactory.get();
	cluster.before();
	ClusterClient<?> client = cluster.getClusterClient();

	try {
		ClientUtils.submitJob(client, jobGraph);

		StatefulCounter.getProgressLatch().await();

		return client.cancelWithSavepoint(jobId, null).get();
	} finally {
		cluster.after();
		StatefulCounter.resetForTest(parallelism);
	}
}

Example 4

Source File: CancelingTestBase.java From flink with Apache License 2.0

5 votes

protected void runAndCancelJob(Plan plan, final int msecsTillCanceling, int maxTimeTillCanceled) throws Exception {
	// submit job
	final JobGraph jobGraph = getJobGraph(plan);

	final long rpcTimeout = AkkaUtils.getTimeoutAsTime(configuration).toMilliseconds();

	ClusterClient<?> client = CLUSTER.getClusterClient();
	JobSubmissionResult jobSubmissionResult = ClientUtils.submitJob(client, jobGraph);

	Deadline submissionDeadLine = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();

	JobStatus jobStatus = client.getJobStatus(jobSubmissionResult.getJobID()).get(rpcTimeout, TimeUnit.MILLISECONDS);
	while (jobStatus != JobStatus.RUNNING && submissionDeadLine.hasTimeLeft()) {
		Thread.sleep(50);
		jobStatus = client.getJobStatus(jobSubmissionResult.getJobID()).get(rpcTimeout, TimeUnit.MILLISECONDS);
	}
	if (jobStatus != JobStatus.RUNNING) {
		Assert.fail("Job not in state RUNNING.");
	}

	Thread.sleep(msecsTillCanceling);

	client.cancel(jobSubmissionResult.getJobID()).get();

	Deadline cancelDeadline = new FiniteDuration(maxTimeTillCanceled, TimeUnit.MILLISECONDS).fromNow();

	JobStatus jobStatusAfterCancel = client.getJobStatus(jobSubmissionResult.getJobID()).get(rpcTimeout, TimeUnit.MILLISECONDS);
	while (jobStatusAfterCancel != JobStatus.CANCELED && cancelDeadline.hasTimeLeft()) {
		Thread.sleep(50);
		jobStatusAfterCancel = client.getJobStatus(jobSubmissionResult.getJobID()).get(rpcTimeout, TimeUnit.MILLISECONDS);
	}
	if (jobStatusAfterCancel != JobStatus.CANCELED) {
		Assert.fail("Failed to cancel job with ID " + jobSubmissionResult.getJobID() + '.');
	}
}

Example 5

Source File: AbstractQueryableStateTestBase.java From flink with Apache License 2.0

5 votes

/**
 * Tests simple value state queryable state instance. Each source emits
 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
 * queried. The tests succeeds after each subtask index is queried with
 * value numElements (the latest element updated the state).
 */
@Test
public void testValueState() throws Exception {
	final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
	final long numElements = 1024L;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStateBackend(stateBackend);
	env.setParallelism(maxParallelism);
	// Very important, because cluster is shared between tests and we
	// don't explicitly check that all slots are available before
	// submitting.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));

	DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));

	// Value state
	ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType());

	source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
		private static final long serialVersionUID = 7662520075515707428L;

		@Override
		public Integer getKey(Tuple2<Integer, Long> value) {
			return value.f0;
		}
	}).asQueryableState("hakuna", valueState);

	try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {

		final JobID jobId = autoCancellableJob.getJobId();
		final JobGraph jobGraph = autoCancellableJob.getJobGraph();

		ClientUtils.submitJob(clusterClient, jobGraph);
		executeValueQuery(deadline, client, jobId, "hakuna", valueState, numElements);
	}
}

Example 6

Source File: ClientTest.java From flink with Apache License 2.0

5 votes

/**
 * This test verifies correct job submission messaging logic and plan translation calls.
 */
@Test
public void shouldSubmitToJobClient() throws Exception {
	final ClusterClient<?> clusterClient = new MiniClusterClient(new Configuration(), MINI_CLUSTER_RESOURCE.getMiniCluster());
	JobGraph jobGraph = FlinkPipelineTranslationUtil.getJobGraph(
			plan,
			new Configuration(),
			1);

	jobGraph.addJars(Collections.emptyList());
	jobGraph.setClasspaths(Collections.emptyList());

	JobSubmissionResult result = ClientUtils.submitJob(clusterClient, jobGraph);
	assertNotNull(result);
}

Example 7

Source File: SavepointReaderITTestBase.java From flink with Apache License 2.0

5 votes

private String takeSavepoint(JobGraph jobGraph) throws Exception {
	SavepointSource.initializeForTest();

	ClusterClient<?> client = miniClusterResource.getClusterClient();
	JobID jobId = jobGraph.getJobID();

	Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5));

	String dirPath = getTempDirPath(new AbstractID().toHexString());

	try {
		JobSubmissionResult result = ClientUtils.submitJob(client, jobGraph);

		boolean finished = false;
		while (deadline.hasTimeLeft()) {
			if (SavepointSource.isFinished()) {
				finished = true;

				break;
			}

			try {
				Thread.sleep(2L);
			} catch (InterruptedException ignored) {
				Thread.currentThread().interrupt();
			}
		}

		if (!finished) {
			Assert.fail("Failed to initialize state within deadline");
		}

		CompletableFuture<String> path = client.triggerSavepoint(result.getJobID(), dirPath);
		return path.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
	} finally {
		client.cancel(jobId).get();
	}
}

Example 8

Source File: ResumeCheckpointManuallyITCase.java From flink with Apache License 2.0

5 votes

private static String runJobAndGetExternalizedCheckpoint(StateBackend backend, File checkpointDir, @Nullable String externalCheckpoint, ClusterClient<?> client) throws Exception {
	JobGraph initialJobGraph = getJobGraph(backend, externalCheckpoint);
	NotifyingInfiniteTupleSource.countDownLatch = new CountDownLatch(PARALLELISM);

	ClientUtils.submitJob(client, initialJobGraph);

	// wait until all sources have been started
	NotifyingInfiniteTupleSource.countDownLatch.await();

	waitUntilExternalizedCheckpointCreated(checkpointDir, initialJobGraph.getJobID());
	client.cancel(initialJobGraph.getJobID()).get();
	waitUntilCanceled(initialJobGraph.getJobID(), client);

	return getExternalizedCheckpointCheckpointPath(checkpointDir, initialJobGraph.getJobID());
}

Example 9

Source File: SavepointITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testTriggerSavepointWithCheckpointingDisabled() throws Exception {
	// Config
	final int numTaskManagers = 1;
	final int numSlotsPerTaskManager = 1;

	final Configuration config = new Configuration();

	final MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(
		new MiniClusterResourceConfiguration.Builder()
			.setConfiguration(config)
			.setNumberTaskManagers(numTaskManagers)
			.setNumberSlotsPerTaskManager(numSlotsPerTaskManager)
			.build());
	cluster.before();
	final ClusterClient<?> client = cluster.getClusterClient();

	final JobVertex vertex = new JobVertex("Blocking vertex");
	vertex.setInvokableClass(BlockingNoOpInvokable.class);
	vertex.setParallelism(1);

	final JobGraph graph = new JobGraph(vertex);

	try {
		ClientUtils.submitJob(client, graph);

		client.triggerSavepoint(graph.getJobID(), null).get();

		fail();
	} catch (ExecutionException e) {
		assertTrue(ExceptionUtils.findThrowable(e, IllegalStateException.class).isPresent());
		assertTrue(ExceptionUtils.findThrowableWithMessage(e, graph.getJobID().toString()).isPresent());
		assertTrue(ExceptionUtils.findThrowableWithMessage(e, "is not a streaming job").isPresent());
	} finally {
		cluster.after();
	}
}

Example 10

Source File: NotifyCheckpointAbortedITCase.java From flink with Apache License 2.0

5 votes

/**
 * Verify operators would be notified as checkpoint aborted.
 *
 * <p>The job would run with at least two checkpoints. The 1st checkpoint would fail due to add checkpoint to store,
 * and the 2nd checkpoint would decline by async checkpoint phase of 'DeclineSink'.
 *
 * <p>The job graph looks like:
 * NormalSource --> keyBy --> NormalMap --> DeclineSink
 */
@Test(timeout = TEST_TIMEOUT)
public void testNotifyCheckpointAborted() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
	env.getCheckpointConfig().enableUnalignedCheckpoints(unalignedCheckpointEnabled);
	env.getCheckpointConfig().setTolerableCheckpointFailureNumber(1);
	env.disableOperatorChaining();
	env.setParallelism(1);

	final StateBackend failingStateBackend = new DeclineSinkFailingStateBackend(checkpointPath);
	env.setStateBackend(failingStateBackend);

	env.addSource(new NormalSource()).name("NormalSource")
		.keyBy((KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0)
		.transform("NormalMap", TypeInformation.of(Integer.class), new NormalMap())
		.transform(DECLINE_SINK_NAME, TypeInformation.of(Object.class), new DeclineSink());

	final ClusterClient<?> clusterClient = cluster.getClusterClient();
	JobGraph jobGraph = env.getStreamGraph().getJobGraph();
	JobID jobID = jobGraph.getJobID();

	ClientUtils.submitJob(clusterClient, jobGraph);

	TestingCompletedCheckpointStore.addCheckpointLatch.await();
	TestingCompletedCheckpointStore.abortCheckpointLatch.trigger();

	verifyAllOperatorsNotifyAborted();
	resetAllOperatorsNotifyAbortedLatches();
	verifyAllOperatorsNotifyAbortedTimes(1);

	DeclineSink.waitLatch.trigger();
	verifyAllOperatorsNotifyAborted();
	verifyAllOperatorsNotifyAbortedTimes(2);

	clusterClient.cancel(jobID).get();
}

Example 11

Source File: AbstractQueryableStateTestBase.java From flink with Apache License 2.0

5 votes

/**
 * Tests simple value state queryable state instance. Each source emits
 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
 * queried. The tests succeeds after each subtask index is queried with
 * value numElements (the latest element updated the state).
 *
 * <p>This is the same as the simple value state test, but uses the API shortcut.
 */
@Test
public void testValueStateShortcut() throws Exception {
	final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
	final long numElements = 1024L;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStateBackend(stateBackend);
	env.setParallelism(maxParallelism);
	// Very important, because cluster is shared between tests and we
	// don't explicitly check that all slots are available before
	// submitting.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));

	DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));

	// Value state shortcut
	final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState =
			source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
				private static final long serialVersionUID = 9168901838808830068L;

				@Override
				public Integer getKey(Tuple2<Integer, Long> value) {
					return value.f0;
				}
			}).asQueryableState("matata");

	@SuppressWarnings("unchecked")
	final ValueStateDescriptor<Tuple2<Integer, Long>> stateDesc =
			(ValueStateDescriptor<Tuple2<Integer, Long>>) queryableState.getStateDescriptor();

	try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {

		final JobID jobId = autoCancellableJob.getJobId();
		final JobGraph jobGraph = autoCancellableJob.getJobGraph();

		ClientUtils.submitJob(clusterClient, jobGraph);
		executeValueQuery(deadline, client, jobId, "matata", stateDesc, numElements);
	}
}

Example 12

Source File: SavepointITCase.java From flink with Apache License 2.0

4 votes

private void restoreJobAndVerifyState(
	String savepointPath,
	MiniClusterResourceFactory clusterFactory,
	int parallelism) throws Exception {
	final JobGraph jobGraph = createJobGraph(parallelism, 0, 1000);
	jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath, false));
	final JobID jobId = jobGraph.getJobID();
	StatefulCounter.resetForTest(parallelism);

	MiniClusterWithClientResource cluster = clusterFactory.get();
	cluster.before();
	ClusterClient<?> client = cluster.getClusterClient();

	try {
		ClientUtils.submitJob(client, jobGraph);

		// Await state is restored
		StatefulCounter.getRestoreLatch().await();

		// Await some progress after restore
		StatefulCounter.getProgressLatch().await();

		client.cancel(jobId).get();

		FutureUtils.retrySuccessfulWithDelay(
			() -> client.getJobStatus(jobId),
			Time.milliseconds(50),
			Deadline.now().plus(Duration.ofSeconds(30)),
			status -> status == JobStatus.CANCELED,
			TestingUtils.defaultScheduledExecutor()
		);

		client.disposeSavepoint(savepointPath)
			.get();

		assertFalse("Savepoint not properly cleaned up.", new File(savepointPath).exists());
	} finally {
		cluster.after();
		StatefulCounter.resetForTest(parallelism);
	}
}

Example 13

Source File: SavepointMigrationTestBase.java From flink with Apache License 2.0

4 votes

@SafeVarargs
protected final void restoreAndExecute(
		StreamExecutionEnvironment env,
		String savepointPath,
		Tuple2<String, Integer>... expectedAccumulators) throws Exception {

	final Deadline deadLine = Deadline.fromNow(Duration.ofMinutes(5));

	ClusterClient<?> client = miniClusterResource.getClusterClient();

	// Submit the job
	JobGraph jobGraph = env.getStreamGraph().getJobGraph();

	jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));

	JobSubmissionResult jobSubmissionResult = ClientUtils.submitJob(client, jobGraph);

	boolean done = false;
	while (deadLine.hasTimeLeft()) {

		// try and get a job result, this will fail if the job already failed. Use this
		// to get out of this loop
		JobID jobId = jobSubmissionResult.getJobID();

		try {
			CompletableFuture<JobStatus> jobStatusFuture = client.getJobStatus(jobSubmissionResult.getJobID());

			JobStatus jobStatus = jobStatusFuture.get(5, TimeUnit.SECONDS);

			assertNotEquals(JobStatus.FAILED, jobStatus);
		} catch (Exception e) {
			fail("Could not connect to job: " + e);
		}

		Thread.sleep(100);
		Map<String, Object> accumulators = client.getAccumulators(jobId).get();

		boolean allDone = true;
		for (Tuple2<String, Integer> acc : expectedAccumulators) {
			Object numFinished = accumulators.get(acc.f0);
			if (numFinished == null) {
				allDone = false;
				break;
			}
			if (!numFinished.equals(acc.f1)) {
				allDone = false;
				break;
			}
		}

		if (allDone) {
			done = true;
			break;
		}
	}

	if (!done) {
		fail("Did not see the expected accumulator results within time limit.");
	}
}

Example 14

Source File: SavepointMigrationTestBase.java From flink with Apache License 2.0

4 votes

@SafeVarargs
protected final void executeAndSavepoint(
		StreamExecutionEnvironment env,
		String savepointPath,
		Tuple2<String, Integer>... expectedAccumulators) throws Exception {

	final Deadline deadLine = Deadline.fromNow(Duration.ofMinutes(5));

	ClusterClient<?> client = miniClusterResource.getClusterClient();

	// Submit the job
	JobGraph jobGraph = env.getStreamGraph().getJobGraph();

	JobSubmissionResult jobSubmissionResult = ClientUtils.submitJob(client, jobGraph);

	LOG.info("Submitted job {} and waiting...", jobSubmissionResult.getJobID());

	boolean done = false;
	while (deadLine.hasTimeLeft()) {
		Thread.sleep(100);
		Map<String, Object> accumulators = client.getAccumulators(jobSubmissionResult.getJobID()).get();

		boolean allDone = true;
		for (Tuple2<String, Integer> acc : expectedAccumulators) {
			Object accumOpt = accumulators.get(acc.f0);
			if (accumOpt == null) {
				allDone = false;
				break;
			}

			Integer numFinished = (Integer) accumOpt;
			if (!numFinished.equals(acc.f1)) {
				allDone = false;
				break;
			}
		}
		if (allDone) {
			done = true;
			break;
		}
	}

	if (!done) {
		fail("Did not see the expected accumulator results within time limit.");
	}

	LOG.info("Triggering savepoint.");

	CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobSubmissionResult.getJobID(), null);

	String jobmanagerSavepointPath = savepointPathFuture.get(deadLine.timeLeft().toMillis(), TimeUnit.MILLISECONDS);

	File jobManagerSavepoint = new File(new URI(jobmanagerSavepointPath).getPath());
	// savepoints were changed to be directories in Flink 1.3
	if (jobManagerSavepoint.isDirectory()) {
		FileUtils.moveDirectory(jobManagerSavepoint, new File(savepointPath));
	} else {
		FileUtils.moveFile(jobManagerSavepoint, new File(savepointPath));
	}
}

Example 15

Source File: JMXJobManagerMetricTest.java From flink with Apache License 2.0

4 votes

/**
 * Tests that metrics registered on the JobManager are actually accessible via JMX.
 */
@Test
public void testJobManagerJMXMetricAccess() throws Exception {
	Deadline deadline = Deadline.now().plus(Duration.ofMinutes(2));

	try {
		JobVertex sourceJobVertex = new JobVertex("Source");
		sourceJobVertex.setInvokableClass(BlockingInvokable.class);

		JobGraph jobGraph = new JobGraph("TestingJob", sourceJobVertex);
		jobGraph.setSnapshotSettings(new JobCheckpointingSettings(
			Collections.<JobVertexID>emptyList(),
			Collections.<JobVertexID>emptyList(),
			Collections.<JobVertexID>emptyList(),
			new CheckpointCoordinatorConfiguration(
				500,
				500,
				50,
				5,
				CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION,
				true,
				false,
				false,
				0),
			null));

		ClusterClient<?> client = MINI_CLUSTER_RESOURCE.getClusterClient();
		ClientUtils.submitJob(client, jobGraph);

		FutureUtils.retrySuccessfulWithDelay(
			() -> client.getJobStatus(jobGraph.getJobID()),
			Time.milliseconds(10),
			deadline,
			status -> status == JobStatus.RUNNING,
			TestingUtils.defaultScheduledExecutor()
		).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);

		MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
		Set<ObjectName> nameSet = mBeanServer.queryNames(new ObjectName("org.apache.flink.jobmanager.job.lastCheckpointSize:job_name=TestingJob,*"), null);
		Assert.assertEquals(1, nameSet.size());
		assertEquals(-1L, mBeanServer.getAttribute(nameSet.iterator().next(), "Value"));

		BlockingInvokable.unblock();
	} finally {
		BlockingInvokable.unblock();
	}
}

Example 16

Source File: WebFrontendITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testCancelYarn() throws Exception {
	// this only works if there is no active job at this point
	assertTrue(getRunningJobs(CLUSTER.getClusterClient()).isEmpty());

	// Create a task
	final JobVertex sender = new JobVertex("Sender");
	sender.setParallelism(2);
	sender.setInvokableClass(BlockingInvokable.class);

	final JobGraph jobGraph = new JobGraph("Stoppable streaming test job", sender);
	final JobID jid = jobGraph.getJobID();

	ClusterClient<?> clusterClient = CLUSTER.getClusterClient();
	ClientUtils.submitJob(clusterClient, jobGraph);

	// wait for job to show up
	while (getRunningJobs(CLUSTER.getClusterClient()).isEmpty()) {
		Thread.sleep(10);
	}

	// wait for tasks to be properly running
	BlockingInvokable.latch.await();

	final Duration testTimeout = Duration.ofMinutes(2);
	final LocalTime deadline = LocalTime.now().plus(testTimeout);

	try (HttpTestClient client = new HttpTestClient("localhost", getRestPort())) {
		// Request the file from the web server
		client.sendGetRequest("/jobs/" + jid + "/yarn-cancel", getTimeLeft(deadline));

		HttpTestClient.SimpleHttpResponse response = client.getNextResponse(getTimeLeft(deadline));

		assertEquals(HttpResponseStatus.ACCEPTED, response.getStatus());
		assertEquals("application/json; charset=UTF-8", response.getType());
		assertEquals("{}", response.getContent());
	}

	// wait for cancellation to finish
	while (!getRunningJobs(CLUSTER.getClusterClient()).isEmpty()) {
		Thread.sleep(20);
	}

	BlockingInvokable.reset();
}

Example 17

Source File: RescalingITCase.java From flink with Apache License 2.0

4 votes

/**
 * Tests that a job cannot be restarted from a savepoint with a different parallelism if the
 * rescaled operator has non-partitioned state.
 *
 * @throws Exception
 */
@Test
public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception {
	final int parallelism = numSlots / 2;
	final int parallelism2 = numSlots;
	final int maxParallelism = 13;

	Duration timeout = Duration.ofMinutes(3);
	Deadline deadline = Deadline.now().plus(timeout);

	ClusterClient<?> client = cluster.getClusterClient();

	try {
		JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);

		final JobID jobID = jobGraph.getJobID();

		ClientUtils.submitJob(client, jobGraph);

		// wait until the operator is started
		StateSourceBase.workStartedLatch.await();

		CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobID, null);

		final String savepointPath = savepointPathFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);

		client.cancel(jobID).get();

		while (!getRunningJobs(client).isEmpty()) {
			Thread.sleep(50);
		}

		// job successfully removed
		JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED);

		scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));

		ClientUtils.submitJobAndWaitForResult(client, scaledJobGraph, RescalingITCase.class.getClassLoader());
	} catch (JobExecutionException exception) {
		if (exception.getCause() instanceof IllegalStateException) {
			// we expect a IllegalStateException wrapped
			// in a JobExecutionException, because the job containing non-partitioned state
			// is being rescaled
		} else {
			throw exception;
		}
	}
}

Example 18

Source File: AbstractOperatorRestoreTestBase.java From flink with Apache License 2.0

4 votes

private String migrateJob(ClusterClient<?> clusterClient, Deadline deadline) throws Throwable {

		URL savepointResource = AbstractOperatorRestoreTestBase.class.getClassLoader().getResource("operatorstate/" + getMigrationSavepointName());
		if (savepointResource == null) {
			throw new IllegalArgumentException("Savepoint file does not exist.");
		}
		JobGraph jobToMigrate = createJobGraph(ExecutionMode.MIGRATE);
		jobToMigrate.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointResource.getFile()));

		assertNotNull(jobToMigrate.getJobID());

		ClientUtils.submitJob(clusterClient, jobToMigrate);

		CompletableFuture<JobStatus> jobRunningFuture = FutureUtils.retrySuccessfulWithDelay(
			() -> clusterClient.getJobStatus(jobToMigrate.getJobID()),
			Time.milliseconds(50),
			deadline,
			(jobStatus) -> jobStatus == JobStatus.RUNNING,
			TestingUtils.defaultScheduledExecutor());
		assertEquals(
			JobStatus.RUNNING,
			jobRunningFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));

		// Trigger savepoint
		File targetDirectory = tmpFolder.newFolder();
		String savepointPath = null;

		// FLINK-6918: Retry cancel with savepoint message in case that StreamTasks were not running
		// TODO: The retry logic should be removed once the StreamTask lifecycle has been fixed (see FLINK-4714)
		while (deadline.hasTimeLeft() && savepointPath == null) {
			try {
				savepointPath = clusterClient.cancelWithSavepoint(
					jobToMigrate.getJobID(),
					targetDirectory.getAbsolutePath()).get();
			} catch (Exception e) {
				String exceptionString = ExceptionUtils.stringifyException(e);
				if (!PATTERN_CANCEL_WITH_SAVEPOINT_TOLERATED_EXCEPTIONS.matcher(exceptionString).find()) {
					throw e;
				}
			}
		}

		assertNotNull("Could not take savepoint.", savepointPath);

		CompletableFuture<JobStatus> jobCanceledFuture = FutureUtils.retrySuccessfulWithDelay(
			() -> clusterClient.getJobStatus(jobToMigrate.getJobID()),
			Time.milliseconds(50),
			deadline,
			(jobStatus) -> jobStatus == JobStatus.CANCELED,
			TestingUtils.defaultScheduledExecutor());
		assertEquals(
			JobStatus.CANCELED,
			jobCanceledFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));

		return savepointPath;
	}

Example 19

Source File: AbstractQueryableStateTestBase.java From flink with Apache License 2.0

4 votes

/**
 * Similar tests as {@link #testValueState()} but before submitting the
 * job, we already issue one request which fails.
 */
@Test
public void testQueryNonStartedJobState() throws Exception {
	final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
	final long numElements = 1024L;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStateBackend(stateBackend);
	env.setParallelism(maxParallelism);
	// Very important, because clusterClient is shared between tests and we
	// don't explicitly check that all slots are available before
	// submitting.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));

	DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));

	ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>(
		"any", source.getType(), 	null);

	QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState =
			source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

				private static final long serialVersionUID = 7480503339992214681L;

				@Override
				public Integer getKey(Tuple2<Integer, Long> value) {
					return value.f0;
				}
			}).asQueryableState("hakuna", valueState);

	try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {

		final JobID jobId = autoCancellableJob.getJobId();
		final JobGraph jobGraph = autoCancellableJob.getJobGraph();

		long expected = numElements;

		// query once
		client.getKvState(
				autoCancellableJob.getJobId(),
				queryableState.getQueryableStateName(),
				0,
				BasicTypeInfo.INT_TYPE_INFO,
				valueState);

		ClientUtils.submitJob(clusterClient, jobGraph);
		executeValueQuery(deadline, client, jobId, "hakuna", valueState, expected);
	}
}

Example 20

Source File: AbstractQueryableStateTestBase.java From flink with Apache License 2.0

4 votes

/**
 * Tests simple value state queryable state instance with a default value
 * set. Each source emits (subtaskIndex, 0)..(subtaskIndex, numElements)
 * tuples, the key is mapped to 1 but key 0 is queried which should throw
 * a {@link UnknownKeyOrNamespaceException} exception.
 *
 * @throws UnknownKeyOrNamespaceException thrown due querying a non-existent key
 */
@Test(expected = UnknownKeyOrNamespaceException.class)
public void testValueStateDefault() throws Throwable {
	final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
	final long numElements = 1024L;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStateBackend(stateBackend);
	env.setParallelism(maxParallelism);
	// Very important, because cluster is shared between tests and we
	// don't explicitly check that all slots are available before
	// submitting.
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));

	DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));

	ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>(
			"any", source.getType(), 	Tuple2.of(0, 1337L));

	// only expose key "1"
	QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(
			new KeySelector<Tuple2<Integer, Long>, Integer>() {
				private static final long serialVersionUID = 4509274556892655887L;

				@Override
				public Integer getKey(Tuple2<Integer, Long> value) {
					return 1;
				}
			}).asQueryableState("hakuna", valueState);

	try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {

		final JobID jobId = autoCancellableJob.getJobId();
		final JobGraph jobGraph = autoCancellableJob.getJobGraph();

		ClientUtils.submitJob(clusterClient, jobGraph);

		// Now query
		int key = 0;
		CompletableFuture<ValueState<Tuple2<Integer, Long>>> future = getKvState(
				deadline,
				client,
				jobId,
				queryableState.getQueryableStateName(),
				key,
				BasicTypeInfo.INT_TYPE_INFO,
				valueState,
				true,
				executor);

		try {
			future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
		} catch (ExecutionException | CompletionException e) {
			// get() on a completedExceptionally future wraps the
			// exception in an ExecutionException.
			throw e.getCause();
		}
	}
}