Java Code Examples for org.apache.flink.api.common.time.Deadline#hasTimeLeft()

The following examples show how to use org.apache.flink.api.common.time.Deadline#hasTimeLeft() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SystemProcessingTimeService.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public boolean shutdownServiceUninterruptible(long timeoutMs) {

	final Deadline deadline = Deadline.fromNow(Duration.ofMillis(timeoutMs));

	boolean shutdownComplete = false;
	boolean receivedInterrupt = false;

	do {
		try {
			// wait for a reasonable time for all pending timer threads to finish
			shutdownComplete = shutdownAndAwaitPending(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
		} catch (InterruptedException iex) {
			receivedInterrupt = true;
			LOG.trace("Intercepted attempt to interrupt timer service shutdown.", iex);
		}
	} while (deadline.hasTimeLeft() && !shutdownComplete);

	if (receivedInterrupt) {
		Thread.currentThread().interrupt();
	}

	return shutdownComplete;
}
 
Example 2
Source File: ExecutionGraphTestUtils.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Waits until all executions fulfill the given predicate.
 *
 * @param executionGraph for which to check the executions
 * @param executionPredicate predicate which is to be fulfilled
 * @param maxWaitMillis timeout for the wait operation
 * @throws TimeoutException if the executions did not reach the target state in time
 */
public static void waitForAllExecutionsPredicate(
		ExecutionGraph executionGraph,
		Predicate<AccessExecution> executionPredicate,
		long maxWaitMillis) throws TimeoutException {
	final Predicate<AccessExecutionGraph> allExecutionsPredicate = allExecutionsPredicate(executionPredicate);
	final Deadline deadline = Deadline.fromNow(Duration.ofMillis(maxWaitMillis));
	boolean predicateResult;

	do {
		predicateResult = allExecutionsPredicate.test(executionGraph);

		if (!predicateResult) {
			try {
				Thread.sleep(2L);
			} catch (InterruptedException ignored) {
				Thread.currentThread().interrupt();
			}
		}
	} while (!predicateResult && deadline.hasTimeLeft());

	if (!predicateResult) {
		throw new TimeoutException("Not all executions fulfilled the predicate in time.");
	}
}
 
Example 3
Source File: AutoClosableProcess.java    From flink with Apache License 2.0 6 votes vote down vote up
public void runBlockingWithRetry(final int maxRetries, final Duration attemptTimeout, final Duration globalTimeout) throws IOException {
	int retries = 0;
	final Deadline globalDeadline = Deadline.fromNow(globalTimeout);

	while (true) {
		try {
			runBlocking(attemptTimeout);
			break;
		} catch (Exception e) {
			if (++retries > maxRetries || !globalDeadline.hasTimeLeft()) {
				String errMsg = String.format(
					"Process (%s) exceeded timeout (%s) or number of retries (%s).",
					Arrays.toString(commands), globalTimeout.toMillis(), maxRetries);
				throw new IOException(errMsg, e);
			}
		}
	}
}
 
Example 4
Source File: HadoopRecoverableFsDataOutputStream.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Called when resuming execution after a failure and waits until the lease
 * of the file we are resuming is free.
 *
 * <p>The lease of the file we are resuming writing/committing to may still
 * belong to the process that failed previously and whose state we are
 * recovering.
 *
 * @param path The path to the file we want to resume writing to.
 */
private static boolean waitUntilLeaseIsRevoked(final FileSystem fs, final Path path) throws IOException {
	Preconditions.checkState(fs instanceof DistributedFileSystem);

	final DistributedFileSystem dfs = (DistributedFileSystem) fs;
	dfs.recoverLease(path);

	final Deadline deadline = Deadline.now().plus(Duration.ofMillis(LEASE_TIMEOUT));

	boolean isClosed = dfs.isFileClosed(path);
	while (!isClosed && deadline.hasTimeLeft()) {
		try {
			Thread.sleep(500L);
		} catch (InterruptedException e1) {
			throw new IOException("Recovering the lease failed: ", e1);
		}
		isClosed = dfs.isFileClosed(path);
	}
	return isClosed;
}
 
Example 5
Source File: AbstractQueryableStateTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Retry a query for state for keys between 0 and {@link #maxParallelism} until
 * <tt>expected</tt> equals the value of the result tuple's second field.
 */
private void executeValueQuery(
		final Deadline deadline,
		final QueryableStateClient client,
		final JobID jobId,
		final String queryableStateName,
		final ValueStateDescriptor<Tuple2<Integer, Long>> stateDescriptor,
		final long expected) throws Exception {

	for (int key = 0; key < maxParallelism; key++) {
		boolean success = false;
		while (deadline.hasTimeLeft() && !success) {
			CompletableFuture<ValueState<Tuple2<Integer, Long>>> future = getKvState(
					deadline,
					client,
					jobId,
					queryableStateName,
					key,
					BasicTypeInfo.INT_TYPE_INFO,
					stateDescriptor,
					false,
					executor);

			Tuple2<Integer, Long> value = future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).value();

			assertEquals("Key mismatch", key, value.f0.intValue());
			if (expected == value.f1) {
				success = true;
			} else {
				// Retry
				Thread.sleep(RETRY_TIMEOUT);
			}
		}

		assertTrue("Did not succeed query", success);
	}
}
 
Example 6
Source File: RecordEmitterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {

	TestRecordEmitter emitter = new TestRecordEmitter();

	final TimestampedValue<String> one = new TimestampedValue<>("one", 1);
	final TimestampedValue<String> two = new TimestampedValue<>("two", 2);
	final TimestampedValue<String> five = new TimestampedValue<>("five", 5);
	final TimestampedValue<String> ten = new TimestampedValue<>("ten", 10);

	final RecordEmitter.RecordQueue<TimestampedValue> queue0 = emitter.getQueue(0);
	final RecordEmitter.RecordQueue<TimestampedValue> queue1 = emitter.getQueue(1);

	queue0.put(one);
	queue0.put(five);
	queue0.put(ten);

	queue1.put(two);

	ExecutorService executor = Executors.newSingleThreadExecutor();
	executor.submit(emitter);

	Deadline dl = Deadline.fromNow(Duration.ofSeconds(10));
	while (emitter.results.size() != 4 && dl.hasTimeLeft()) {
		Thread.sleep(10);
	}
	emitter.stop();
	executor.shutdownNow();

	Assert.assertThat(emitter.results, Matchers.contains(one, five, two, ten));
}
 
Example 7
Source File: FlinkPortableClientEntryPoint.java    From beam with Apache License 2.0 5 votes vote down vote up
private void startJobService() throws Exception {
  jobInvokerFactory = new DetachedJobInvokerFactory();
  jobServer =
      FlinkJobServerDriver.fromConfig(
          FlinkJobServerDriver.parseArgs(
              new String[] {"--job-port=" + jobPort, "--artifact-port=0", "--expansion-port=0"}),
          jobInvokerFactory);
  jobServerThread = new Thread(jobServer);
  jobServerThread.start();

  Deadline deadline = Deadline.fromNow(JOB_SERVICE_STARTUP_TIMEOUT);
  while (jobServer.getJobServerUrl() == null && deadline.hasTimeLeft()) {
    try {
      Thread.sleep(500);
    } catch (InterruptedException interruptEx) {
      Thread.currentThread().interrupt();
      throw new RuntimeException(interruptEx);
    }
  }

  if (!jobServerThread.isAlive()) {
    throw new IllegalStateException("Job service thread is not alive");
  }

  if (jobServer.getJobServerUrl() == null) {
    String msg = String.format("Timeout of %s waiting for job service to start.", deadline);
    throw new TimeoutException(msg);
  }
}
 
Example 8
Source File: CommonTestUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void waitUntilCondition(SupplierWithException<Boolean, Exception> condition, Deadline timeout, long retryIntervalMillis) throws Exception {
	while (timeout.hasTimeLeft() && !condition.get()) {
		final long timeLeft = Math.max(0, timeout.timeLeft().toMillis());
		Thread.sleep(Math.min(retryIntervalMillis, timeLeft));
	}

	if (!timeout.hasTimeLeft()) {
		throw new TimeoutException("Condition was not met in given timeout.");
	}
}
 
Example 9
Source File: YarnTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void close() throws Exception {
	Deadline deadline = Deadline.now().plus(Duration.ofSeconds(10));

	boolean isAnyJobRunning = yarnClient.getApplications().stream()
		.anyMatch(YarnTestBase::isApplicationRunning);

	while (deadline.hasTimeLeft() && isAnyJobRunning) {
		try {
			Thread.sleep(500);
		} catch (InterruptedException e) {
			Assert.fail("Should not happen");
		}
		isAnyJobRunning = yarnClient.getApplications().stream()
			.anyMatch(YarnTestBase::isApplicationRunning);
	}

	if (isAnyJobRunning) {
		final List<String> runningApps = yarnClient.getApplications().stream()
			.filter(YarnTestBase::isApplicationRunning)
			.map(app -> "App " + app.getApplicationId() + " is in state " + app.getYarnApplicationState() + '.')
			.collect(Collectors.toList());
		if (!runningApps.isEmpty()) {
			Assert.fail("There is at least one application on the cluster that is not finished." + runningApps);
		}
	}
}
 
Example 10
Source File: AbstractQueryableStateTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Retry a query for state for keys between 0 and {@link #maxParallelism} until
 * <tt>expected</tt> equals the value of the result tuple's second field.
 */
private void executeValueQuery(
		final Deadline deadline,
		final QueryableStateClient client,
		final JobID jobId,
		final String queryableStateName,
		final ValueStateDescriptor<Tuple2<Integer, Long>> stateDescriptor,
		final long expected) throws Exception {

	for (int key = 0; key < maxParallelism; key++) {
		boolean success = false;
		while (deadline.hasTimeLeft() && !success) {
			CompletableFuture<ValueState<Tuple2<Integer, Long>>> future = getKvState(
					deadline,
					client,
					jobId,
					queryableStateName,
					key,
					BasicTypeInfo.INT_TYPE_INFO,
					stateDescriptor,
					false,
					executor);

			Tuple2<Integer, Long> value = future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).value();

			assertEquals("Key mismatch", key, value.f0.intValue());
			if (expected == value.f1) {
				success = true;
			} else {
				// Retry
				Thread.sleep(RETRY_TIMEOUT);
			}
		}

		assertTrue("Did not succeed query", success);
	}
}
 
Example 11
Source File: AbstractQueryableStateTestBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Retry a query for state for keys between 0 and {@link #maxParallelism} until
 * <tt>expected</tt> equals the value of the result tuple's second field.
 */
private void executeValueQuery(
		final Deadline deadline,
		final QueryableStateClient client,
		final JobID jobId,
		final String queryableStateName,
		final ValueStateDescriptor<Tuple2<Integer, Long>> stateDescriptor,
		final long expected) throws Exception {

	for (int key = 0; key < maxParallelism; key++) {
		boolean success = false;
		while (deadline.hasTimeLeft() && !success) {
			CompletableFuture<ValueState<Tuple2<Integer, Long>>> future = getKvState(
					deadline,
					client,
					jobId,
					queryableStateName,
					key,
					BasicTypeInfo.INT_TYPE_INFO,
					stateDescriptor,
					false,
					executor);

			Tuple2<Integer, Long> value = future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).value();

			assertEquals("Key mismatch", key, value.f0.intValue());
			if (expected == value.f1) {
				success = true;
			} else {
				// Retry
				Thread.sleep(RETRY_TIMEOUT);
			}
		}

		assertTrue("Did not succeed query", success);
	}
}
 
Example 12
Source File: YarnTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void close() throws Exception {
	Deadline deadline = Deadline.now().plus(Duration.ofSeconds(10));

	boolean isAnyJobRunning = yarnClient.getApplications().stream()
		.anyMatch(YarnTestBase::isApplicationRunning);

	while (deadline.hasTimeLeft() && isAnyJobRunning) {
		try {
			Thread.sleep(500);
		} catch (InterruptedException e) {
			Assert.fail("Should not happen");
		}
		isAnyJobRunning = yarnClient.getApplications().stream()
			.anyMatch(YarnTestBase::isApplicationRunning);
	}

	if (isAnyJobRunning) {
		final List<String> runningApps = yarnClient.getApplications().stream()
			.filter(YarnTestBase::isApplicationRunning)
			.map(app -> "App " + app.getApplicationId() + " is in state " + app.getYarnApplicationState() + '.')
			.collect(Collectors.toList());
		if (!runningApps.isEmpty()) {
			Assert.fail("There is at least one application on the cluster that is not finished." + runningApps);
		}
	}
}
 
Example 13
Source File: SQLClientKafkaITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private void checkCsvResultFile() throws Exception {
	boolean success = false;
	final Deadline deadline = Deadline.fromNow(Duration.ofSeconds(120));
	while (deadline.hasTimeLeft()) {
		if (Files.exists(result)) {
			List<String> lines = readCsvResultFiles(result);
			if (lines.size() == 4) {
				success = true;
				assertThat(
					lines.toArray(new String[0]),
					arrayContainingInAnyOrder(
						"2018-03-12 08:00:00.000,Alice,This was a warning.,2,Success constant folding.",
						"2018-03-12 09:00:00.000,Bob,This was another warning.,1,Success constant folding.",
						"2018-03-12 09:00:00.000,Steve,This was another info.,2,Success constant folding.",
						"2018-03-12 09:00:00.000,Alice,This was a info.,1,Success constant folding."
					)
				);
				break;
			} else {
				LOG.info("The target CSV {} does not contain enough records, current {} records, left time: {}s",
					result, lines.size(), deadline.timeLeft().getSeconds());
			}
		} else {
			LOG.info("The target CSV {} does not exist now", result);
		}
		Thread.sleep(500);
	}
	Assert.assertTrue("Did not get expected results before timeout.", success);
}
 
Example 14
Source File: FlinkKinesisConsumerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private void awaitRecordCount(ConcurrentLinkedQueue<? extends Object> queue, int count) throws Exception {
	Deadline deadline  = Deadline.fromNow(Duration.ofSeconds(10));
	while (deadline.hasTimeLeft() && queue.size() < count) {
		Thread.sleep(10);
	}
}
 
Example 15
Source File: ZooKeeperLeaderElectionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests repeatedly the reelection of still available LeaderContender. After a contender has
 * been elected as the leader, it is removed. This forces the ZooKeeperLeaderElectionService
 * to elect a new leader.
 */
@Test
public void testZooKeeperReelection() throws Exception {
	Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5L));

	int num = 10;

	ZooKeeperLeaderElectionService[] leaderElectionService = new ZooKeeperLeaderElectionService[num];
	TestingContender[] contenders = new TestingContender[num];
	ZooKeeperLeaderRetrievalService leaderRetrievalService = null;

	TestingListener listener = new TestingListener();

	try {
		leaderRetrievalService = ZooKeeperUtils.createLeaderRetrievalService(client, configuration);

		LOG.debug("Start leader retrieval service for the TestingListener.");

		leaderRetrievalService.start(listener);

		for (int i = 0; i < num; i++) {
			leaderElectionService[i] = ZooKeeperUtils.createLeaderElectionService(client, configuration);
			contenders[i] = new TestingContender(createAddress(i), leaderElectionService[i]);

			LOG.debug("Start leader election service for contender #{}.", i);

			leaderElectionService[i].start(contenders[i]);
		}

		String pattern = TEST_URL + "_" + "(\\d+)";
		Pattern regex = Pattern.compile(pattern);

		int numberSeenLeaders = 0;

		while (deadline.hasTimeLeft() && numberSeenLeaders < num) {
			LOG.debug("Wait for new leader #{}.", numberSeenLeaders);
			String address = listener.waitForNewLeader(deadline.timeLeft().toMillis());

			Matcher m = regex.matcher(address);

			if (m.find()) {
				int index = Integer.parseInt(m.group(1));

				TestingContender contender = contenders[index];

				// check that the retrieval service has retrieved the correct leader
				if (address.equals(createAddress(index)) && listener.getLeaderSessionID().equals(contender.getLeaderSessionID())) {
					// kill the election service of the leader
					LOG.debug("Stop leader election service of contender #{}.", numberSeenLeaders);
					leaderElectionService[index].stop();
					leaderElectionService[index] = null;

					numberSeenLeaders++;
				}
			} else {
				fail("Did not find the leader's index.");
			}
		}

		assertFalse("Did not complete the leader reelection in time.", deadline.isOverdue());
		assertEquals(num, numberSeenLeaders);

	} finally {
		if (leaderRetrievalService != null) {
			leaderRetrievalService.stop();
		}

		for (ZooKeeperLeaderElectionService electionService : leaderElectionService) {
			if (electionService != null) {
				electionService.stop();
			}
		}
	}
}
 
Example 16
Source File: NetworkBufferPool.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public List<MemorySegment> requestMemorySegments() throws IOException {
	synchronized (factoryLock) {
		if (isDestroyed) {
			throw new IllegalStateException("Network buffer pool has already been destroyed.");
		}

		tryRedistributeBuffers();
	}

	final List<MemorySegment> segments = new ArrayList<>(numberOfSegmentsToRequest);
	try {
		final Deadline deadline = Deadline.fromNow(requestSegmentsTimeout);
		while (true) {
			if (isDestroyed) {
				throw new IllegalStateException("Buffer pool is destroyed.");
			}

			final MemorySegment segment = availableMemorySegments.poll(2, TimeUnit.SECONDS);
			if (segment != null) {
				segments.add(segment);
			}

			if (segments.size() >= numberOfSegmentsToRequest) {
				break;
			}

			if (!deadline.hasTimeLeft()) {
				throw new IOException(String.format("Timeout triggered when requesting exclusive buffers: %s, " +
								" or you may increase the timeout which is %dms by setting the key '%s'.",
						getConfigDescription(),
						requestSegmentsTimeout.toMillis(),
						NettyShuffleEnvironmentOptions.NETWORK_EXCLUSIVE_BUFFERS_REQUEST_TIMEOUT_MILLISECONDS.key()));
			}
		}
	} catch (Throwable e) {
		try {
			recycleMemorySegments(segments, numberOfSegmentsToRequest);
		} catch (IOException inner) {
			e.addSuppressed(inner);
		}
		ExceptionUtils.rethrowIOException(e);
	}

	return segments;
}
 
Example 17
Source File: ZooKeeperLeaderElectionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests repeatedly the reelection of still available LeaderContender. After a contender has
 * been elected as the leader, it is removed. This forces the ZooKeeperLeaderElectionService
 * to elect a new leader.
 */
@Test
public void testZooKeeperReelection() throws Exception {
	Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5L));

	int num = 10;

	ZooKeeperLeaderElectionService[] leaderElectionService = new ZooKeeperLeaderElectionService[num];
	TestingContender[] contenders = new TestingContender[num];
	ZooKeeperLeaderRetrievalService leaderRetrievalService = null;

	TestingListener listener = new TestingListener();

	try {
		leaderRetrievalService = ZooKeeperUtils.createLeaderRetrievalService(client, configuration);

		LOG.debug("Start leader retrieval service for the TestingListener.");

		leaderRetrievalService.start(listener);

		for (int i = 0; i < num; i++) {
			leaderElectionService[i] = ZooKeeperUtils.createLeaderElectionService(client, configuration);
			contenders[i] = new TestingContender(TEST_URL + "_" + i, leaderElectionService[i]);

			LOG.debug("Start leader election service for contender #{}.", i);

			leaderElectionService[i].start(contenders[i]);
		}

		String pattern = TEST_URL + "_" + "(\\d+)";
		Pattern regex = Pattern.compile(pattern);

		int numberSeenLeaders = 0;

		while (deadline.hasTimeLeft() && numberSeenLeaders < num) {
			LOG.debug("Wait for new leader #{}.", numberSeenLeaders);
			String address = listener.waitForNewLeader(deadline.timeLeft().toMillis());

			Matcher m = regex.matcher(address);

			if (m.find()) {
				int index = Integer.parseInt(m.group(1));

				TestingContender contender = contenders[index];

				// check that the retrieval service has retrieved the correct leader
				if (address.equals(contender.getAddress()) && listener.getLeaderSessionID().equals(contender.getLeaderSessionID())) {
					// kill the election service of the leader
					LOG.debug("Stop leader election service of contender #{}.", numberSeenLeaders);
					leaderElectionService[index].stop();
					leaderElectionService[index] = null;

					numberSeenLeaders++;
				}
			} else {
				fail("Did not find the leader's index.");
			}
		}

		assertFalse("Did not complete the leader reelection in time.", deadline.isOverdue());
		assertEquals(num, numberSeenLeaders);

	} finally {
		if (leaderRetrievalService != null) {
			leaderRetrievalService.stop();
		}

		for (ZooKeeperLeaderElectionService electionService : leaderElectionService) {
			if (electionService != null) {
				electionService.stop();
			}
		}
	}
}
 
Example 18
Source File: SavepointMigrationTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
@SafeVarargs
protected final void restoreAndExecute(
		StreamExecutionEnvironment env,
		String savepointPath,
		Tuple2<String, Integer>... expectedAccumulators) throws Exception {

	final Deadline deadLine = Deadline.fromNow(Duration.ofMinutes(5));

	ClusterClient<?> client = miniClusterResource.getClusterClient();

	// Submit the job
	JobGraph jobGraph = env.getStreamGraph().getJobGraph();

	jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));

	JobSubmissionResult jobSubmissionResult = ClientUtils.submitJob(client, jobGraph);

	boolean done = false;
	while (deadLine.hasTimeLeft()) {

		// try and get a job result, this will fail if the job already failed. Use this
		// to get out of this loop
		JobID jobId = jobSubmissionResult.getJobID();

		try {
			CompletableFuture<JobStatus> jobStatusFuture = client.getJobStatus(jobSubmissionResult.getJobID());

			JobStatus jobStatus = jobStatusFuture.get(5, TimeUnit.SECONDS);

			assertNotEquals(JobStatus.FAILED, jobStatus);
		} catch (Exception e) {
			fail("Could not connect to job: " + e);
		}

		Thread.sleep(100);
		Map<String, Object> accumulators = client.getAccumulators(jobId).get();

		boolean allDone = true;
		for (Tuple2<String, Integer> acc : expectedAccumulators) {
			Object numFinished = accumulators.get(acc.f0);
			if (numFinished == null) {
				allDone = false;
				break;
			}
			if (!numFinished.equals(acc.f1)) {
				allDone = false;
				break;
			}
		}

		if (allDone) {
			done = true;
			break;
		}
	}

	if (!done) {
		fail("Did not see the expected accumulator results within time limit.");
	}
}
 
Example 19
Source File: ZooKeeperLeaderElectionTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Tests repeatedly the reelection of still available LeaderContender. After a contender has
 * been elected as the leader, it is removed. This forces the ZooKeeperLeaderElectionService
 * to elect a new leader.
 */
@Test
public void testZooKeeperReelection() throws Exception {
	Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5L));

	int num = 10;

	ZooKeeperLeaderElectionService[] leaderElectionService = new ZooKeeperLeaderElectionService[num];
	TestingContender[] contenders = new TestingContender[num];
	ZooKeeperLeaderRetrievalService leaderRetrievalService = null;

	TestingListener listener = new TestingListener();

	try {
		leaderRetrievalService = ZooKeeperUtils.createLeaderRetrievalService(client, configuration);

		LOG.debug("Start leader retrieval service for the TestingListener.");

		leaderRetrievalService.start(listener);

		for (int i = 0; i < num; i++) {
			leaderElectionService[i] = ZooKeeperUtils.createLeaderElectionService(client, configuration);
			contenders[i] = new TestingContender(TEST_URL + "_" + i, leaderElectionService[i]);

			LOG.debug("Start leader election service for contender #{}.", i);

			leaderElectionService[i].start(contenders[i]);
		}

		String pattern = TEST_URL + "_" + "(\\d+)";
		Pattern regex = Pattern.compile(pattern);

		int numberSeenLeaders = 0;

		while (deadline.hasTimeLeft() && numberSeenLeaders < num) {
			LOG.debug("Wait for new leader #{}.", numberSeenLeaders);
			String address = listener.waitForNewLeader(deadline.timeLeft().toMillis());

			Matcher m = regex.matcher(address);

			if (m.find()) {
				int index = Integer.parseInt(m.group(1));

				TestingContender contender = contenders[index];

				// check that the retrieval service has retrieved the correct leader
				if (address.equals(contender.getAddress()) && listener.getLeaderSessionID().equals(contender.getLeaderSessionID())) {
					// kill the election service of the leader
					LOG.debug("Stop leader election service of contender #{}.", numberSeenLeaders);
					leaderElectionService[index].stop();
					leaderElectionService[index] = null;

					numberSeenLeaders++;
				}
			} else {
				fail("Did not find the leader's index.");
			}
		}

		assertFalse("Did not complete the leader reelection in time.", deadline.isOverdue());
		assertEquals(num, numberSeenLeaders);

	} finally {
		if (leaderRetrievalService != null) {
			leaderRetrievalService.stop();
		}

		for (ZooKeeperLeaderElectionService electionService : leaderElectionService) {
			if (electionService != null) {
				electionService.stop();
			}
		}
	}
}
 
Example 20
Source File: AbstractOperatorRestoreTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
private String migrateJob(ClusterClient<?> clusterClient, Deadline deadline) throws Throwable {

		URL savepointResource = AbstractOperatorRestoreTestBase.class.getClassLoader().getResource("operatorstate/" + getMigrationSavepointName());
		if (savepointResource == null) {
			throw new IllegalArgumentException("Savepoint file does not exist.");
		}
		JobGraph jobToMigrate = createJobGraph(ExecutionMode.MIGRATE);
		jobToMigrate.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointResource.getFile()));

		assertNotNull(jobToMigrate.getJobID());

		ClientUtils.submitJob(clusterClient, jobToMigrate);

		CompletableFuture<JobStatus> jobRunningFuture = FutureUtils.retrySuccessfulWithDelay(
			() -> clusterClient.getJobStatus(jobToMigrate.getJobID()),
			Time.milliseconds(50),
			deadline,
			(jobStatus) -> jobStatus == JobStatus.RUNNING,
			TestingUtils.defaultScheduledExecutor());
		assertEquals(
			JobStatus.RUNNING,
			jobRunningFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));

		// Trigger savepoint
		File targetDirectory = tmpFolder.newFolder();
		String savepointPath = null;

		// FLINK-6918: Retry cancel with savepoint message in case that StreamTasks were not running
		// TODO: The retry logic should be removed once the StreamTask lifecycle has been fixed (see FLINK-4714)
		while (deadline.hasTimeLeft() && savepointPath == null) {
			try {
				savepointPath = clusterClient.cancelWithSavepoint(
					jobToMigrate.getJobID(),
					targetDirectory.getAbsolutePath()).get();
			} catch (Exception e) {
				String exceptionString = ExceptionUtils.stringifyException(e);
				if (!PATTERN_CANCEL_WITH_SAVEPOINT_TOLERATED_EXCEPTIONS.matcher(exceptionString).find()) {
					throw e;
				}
			}
		}

		assertNotNull("Could not take savepoint.", savepointPath);

		CompletableFuture<JobStatus> jobCanceledFuture = FutureUtils.retrySuccessfulWithDelay(
			() -> clusterClient.getJobStatus(jobToMigrate.getJobID()),
			Time.milliseconds(50),
			deadline,
			(jobStatus) -> jobStatus == JobStatus.CANCELED,
			TestingUtils.defaultScheduledExecutor());
		assertEquals(
			JobStatus.CANCELED,
			jobCanceledFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));

		return savepointPath;
	}