org.apache.flink.runtime.util.LeaderRetrievalUtils Java Examples

The following examples show how to use org.apache.flink.runtime.util.LeaderRetrievalUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TaskManagerRunner.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private static String determineTaskManagerBindAddressByConnectingToResourceManager(
		final Configuration configuration,
		final HighAvailabilityServices haServices) throws LeaderRetrievalException {

	final Time lookupTimeout = Time.milliseconds(AkkaUtils.getLookupTimeout(configuration).toMillis());

	final InetAddress taskManagerAddress = LeaderRetrievalUtils.findConnectingAddress(
		haServices.getResourceManagerLeaderRetriever(),
		lookupTimeout);

	LOG.info("TaskManager will use hostname/address '{}' ({}) for communication.",
		taskManagerAddress.getHostName(), taskManagerAddress.getHostAddress());

	HostBindPolicy bindPolicy = HostBindPolicy.fromString(configuration.getString(TaskManagerOptions.HOST_BIND_POLICY));
	return bindPolicy == HostBindPolicy.IP ? taskManagerAddress.getHostAddress() : taskManagerAddress.getHostName();
}
 
Example #2
Source File: HighAvailabilityServicesUtils.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static HighAvailabilityServices createAvailableOrEmbeddedServices(
	Configuration config,
	Executor executor) throws Exception {
	HighAvailabilityMode highAvailabilityMode = LeaderRetrievalUtils.getRecoveryMode(config);

	switch (highAvailabilityMode) {
		case NONE:
			return new EmbeddedHaServices(executor);

		case ZOOKEEPER:
			BlobStoreService blobStoreService = BlobUtils.createBlobStoreFromConfig(config);

			return new ZooKeeperHaServices(
				ZooKeeperUtils.startCuratorFramework(config),
				executor,
				config,
				blobStoreService);

		case FACTORY_CLASS:
			return createCustomHAServices(config, executor);

		default:
			throw new Exception("High availability mode " + highAvailabilityMode + " is not supported.");
	}
}
 
Example #3
Source File: LeaderChangeClusterComponentsTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testTaskExecutorsReconnectToClusterWithLeadershipChange() throws Exception {
	final Deadline deadline = Deadline.fromNow(TESTING_TIMEOUT);
	waitUntilTaskExecutorsHaveConnected(NUM_TMS, deadline);
	highAvailabilityServices.revokeResourceManagerLeadership().get();
	highAvailabilityServices.grantResourceManagerLeadership();

	// wait for the ResourceManager to confirm the leadership
	assertThat(
		LeaderRetrievalUtils.retrieveLeaderConnectionInfo(
			highAvailabilityServices.getResourceManagerLeaderRetriever(),
			TESTING_TIMEOUT).getLeaderSessionId(),
		is(notNullValue()));

	waitUntilTaskExecutorsHaveConnected(NUM_TMS, deadline);
}
 
Example #4
Source File: TaskManagerRunner.java    From flink with Apache License 2.0 6 votes vote down vote up
private static String determineTaskManagerBindAddressByConnectingToResourceManager(
		final Configuration configuration,
		final HighAvailabilityServices haServices) throws LeaderRetrievalException {

	final Duration lookupTimeout = AkkaUtils.getLookupTimeout(configuration);

	final InetAddress taskManagerAddress = LeaderRetrievalUtils.findConnectingAddress(
		haServices.getResourceManagerLeaderRetriever(),
		lookupTimeout);

	LOG.info("TaskManager will use hostname/address '{}' ({}) for communication.",
		taskManagerAddress.getHostName(), taskManagerAddress.getHostAddress());

	HostBindPolicy bindPolicy = HostBindPolicy.fromString(configuration.getString(TaskManagerOptions.HOST_BIND_POLICY));
	return bindPolicy == HostBindPolicy.IP ? taskManagerAddress.getHostAddress() : taskManagerAddress.getHostName();
}
 
Example #5
Source File: TaskManagerRunner.java    From flink with Apache License 2.0 6 votes vote down vote up
private static String determineTaskManagerBindAddressByConnectingToResourceManager(
		final Configuration configuration,
		final HighAvailabilityServices haServices) throws LeaderRetrievalException {

	final Time lookupTimeout = Time.milliseconds(AkkaUtils.getLookupTimeout(configuration).toMillis());

	final InetAddress taskManagerAddress = LeaderRetrievalUtils.findConnectingAddress(
		haServices.getResourceManagerLeaderRetriever(),
		lookupTimeout);

	LOG.info("TaskManager will use hostname/address '{}' ({}) for communication.",
		taskManagerAddress.getHostName(), taskManagerAddress.getHostAddress());

	HostBindPolicy bindPolicy = HostBindPolicy.fromString(configuration.getString(TaskManagerOptions.HOST_BIND_POLICY));
	return bindPolicy == HostBindPolicy.IP ? taskManagerAddress.getHostAddress() : taskManagerAddress.getHostName();
}
 
Example #6
Source File: ProcessFailureCancelingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to wait until the {@link Dispatcher} has set its fencing token.
 *
 * @param rpcService to use to connect to the dispatcher
 * @param haServices high availability services to connect to the dispatcher
 * @return {@link DispatcherGateway}
 * @throws Exception if something goes wrong
 */
static DispatcherGateway retrieveDispatcherGateway(RpcService rpcService, HighAvailabilityServices haServices) throws Exception {
	final LeaderConnectionInfo leaderConnectionInfo = LeaderRetrievalUtils.retrieveLeaderConnectionInfo(haServices.getDispatcherLeaderRetriever(), Time.seconds(10L));

	return rpcService.connect(
		leaderConnectionInfo.getAddress(),
		DispatcherId.fromUuid(leaderConnectionInfo.getLeaderSessionID()),
		DispatcherGateway.class).get();
}
 
Example #7
Source File: ZooKeeperLeaderRetrievalTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void run() {
	try {
		result = LeaderRetrievalUtils.findConnectingAddress(
			leaderRetrievalService,
			timeout);
	} catch (Exception e) {
		exception = e;
	}
}
 
Example #8
Source File: ZooKeeperLeaderRetrievalTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the LeaderRetrievalUtils.findConnectingAddress stops trying to find the
 * connecting address if no leader address has been specified. The call should return
 * then InetAddress.getLocalHost().
 */
@Test
public void testTimeoutOfFindConnectingAddress() throws Exception {
	Duration timeout = Duration.ofSeconds(1L);

	LeaderRetrievalService leaderRetrievalService = highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID);
	InetAddress result = LeaderRetrievalUtils.findConnectingAddress(leaderRetrievalService, timeout);

	assertEquals(InetAddress.getLocalHost(), result);
}
 
Example #9
Source File: ProcessFailureCancelingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to wait until the {@link Dispatcher} has set its fencing token.
 *
 * @param rpcService to use to connect to the dispatcher
 * @param haServices high availability services to connect to the dispatcher
 * @return {@link DispatcherGateway}
 * @throws Exception if something goes wrong
 */
static DispatcherGateway retrieveDispatcherGateway(RpcService rpcService, HighAvailabilityServices haServices) throws Exception {
	final LeaderConnectionInfo leaderConnectionInfo = LeaderRetrievalUtils.retrieveLeaderConnectionInfo(
		haServices.getDispatcherLeaderRetriever(),
		Duration.ofSeconds(10L));

	return rpcService.connect(
		leaderConnectionInfo.getAddress(),
		DispatcherId.fromUuid(leaderConnectionInfo.getLeaderSessionId()),
		DispatcherGateway.class).get();
}
 
Example #10
Source File: ZooKeeperLeaderRetrievalTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void run() {
	try {
		result = LeaderRetrievalUtils.findConnectingAddress(
			leaderRetrievalService,
			timeout);
	} catch (Exception e) {
		exception = e;
	}
}
 
Example #11
Source File: ZooKeeperLeaderRetrievalTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the LeaderRetrievalUtils.findConnectingAddress stops trying to find the
 * connecting address if no leader address has been specified. The call should return
 * then InetAddress.getLocalHost().
 */
@Test
public void testTimeoutOfFindConnectingAddress() throws Exception {
	FiniteDuration timeout = new FiniteDuration(1L, TimeUnit.SECONDS);

	LeaderRetrievalService leaderRetrievalService = highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID);
	InetAddress result = LeaderRetrievalUtils.findConnectingAddress(leaderRetrievalService, timeout);

	assertEquals(InetAddress.getLocalHost(), result);
}
 
Example #12
Source File: LeaderChangeClusterComponentsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTaskExecutorsReconnectToClusterWithLeadershipChange() throws Exception {
	final Deadline deadline = Deadline.fromNow(TESTING_TIMEOUT);
	waitUntilTaskExecutorsHaveConnected(NUM_TMS, deadline);
	highAvailabilityServices.revokeResourceManagerLeadership().get();
	highAvailabilityServices.grantResourceManagerLeadership();

	// wait for the ResourceManager to confirm the leadership
	assertThat(LeaderRetrievalUtils.retrieveLeaderConnectionInfo(highAvailabilityServices.getResourceManagerLeaderRetriever(), Time.minutes(TESTING_TIMEOUT.toMinutes())).getLeaderSessionID(), is(notNullValue()));

	waitUntilTaskExecutorsHaveConnected(NUM_TMS, deadline);
}
 
Example #13
Source File: ProcessFailureCancelingITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to wait until the {@link Dispatcher} has set its fencing token.
 *
 * @param rpcService to use to connect to the dispatcher
 * @param haServices high availability services to connect to the dispatcher
 * @return {@link DispatcherGateway}
 * @throws Exception if something goes wrong
 */
static DispatcherGateway retrieveDispatcherGateway(RpcService rpcService, HighAvailabilityServices haServices) throws Exception {
	final LeaderConnectionInfo leaderConnectionInfo = LeaderRetrievalUtils.retrieveLeaderConnectionInfo(haServices.getDispatcherLeaderRetriever(), Time.seconds(10L));

	return rpcService.connect(
		leaderConnectionInfo.getAddress(),
		DispatcherId.fromUuid(leaderConnectionInfo.getLeaderSessionID()),
		DispatcherGateway.class).get();
}
 
Example #14
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the {@link ActorGateway} of the current job manager leader using
 * the {@link LeaderRetrievalService}.
 *
 * @return ActorGateway of the current job manager leader
 * @throws Exception
 */
public ActorGateway getJobManagerGateway() throws FlinkException {
	log.debug("Looking up JobManager");

	try {
		return LeaderRetrievalUtils.retrieveLeaderGateway(
			highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID),
			actorSystemLoader.get(),
			lookupTimeout);
	} catch (LeaderRetrievalException lre) {
		throw new FlinkException("Could not connect to the leading JobManager. Please check that the " +
			"JobManager is running.", lre);
	}
}
 
Example #15
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new ActorSystem or returns an existing one.
 * @return ActorSystem
 * @throws Exception if the ActorSystem could not be created
 */
@Override
public ActorSystem get() throws FlinkException {

	if (!isLoaded()) {
		// start actor system
		log.info("Starting client actor system.");

		final InetAddress ownHostname;
		try {
			ownHostname = LeaderRetrievalUtils.findConnectingAddress(
				highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID),
				timeout);
		} catch (LeaderRetrievalException lre) {
			throw new FlinkException("Could not find out our own hostname by connecting to the " +
				"leading JobManager. Please make sure that the Flink cluster has been started.", lre);
		}

		try {
			actorSystem = BootstrapTools.startActorSystem(
				configuration,
				ownHostname.getCanonicalHostName(),
				0,
				log);
		} catch (Exception e) {
			throw new FlinkException("Could not start the ActorSystem lazily.", e);
		}
	}

	return actorSystem;
}
 
Example #16
Source File: ZooKeeperLeaderRetrievalTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void run() {
	try {
		result = LeaderRetrievalUtils.findConnectingAddress(
			leaderRetrievalService,
			timeout);
	} catch (Exception e) {
		exception = e;
	}
}
 
Example #17
Source File: ZooKeeperLeaderRetrievalTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the LeaderRetrievalUtils.findConnectingAddress stops trying to find the
 * connecting address if no leader address has been specified. The call should return
 * then InetAddress.getLocalHost().
 */
@Test
public void testTimeoutOfFindConnectingAddress() throws Exception {
	FiniteDuration timeout = new FiniteDuration(1L, TimeUnit.SECONDS);

	LeaderRetrievalService leaderRetrievalService = highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID);
	InetAddress result = LeaderRetrievalUtils.findConnectingAddress(leaderRetrievalService, timeout);

	assertEquals(InetAddress.getLocalHost(), result);
}
 
Example #18
Source File: LeaderChangeClusterComponentsTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testTaskExecutorsReconnectToClusterWithLeadershipChange() throws Exception {
	final Deadline deadline = Deadline.fromNow(TESTING_TIMEOUT);
	waitUntilTaskExecutorsHaveConnected(NUM_TMS, deadline);
	highAvailabilityServices.revokeResourceManagerLeadership().get();
	highAvailabilityServices.grantResourceManagerLeadership();

	// wait for the ResourceManager to confirm the leadership
	assertThat(LeaderRetrievalUtils.retrieveLeaderConnectionInfo(highAvailabilityServices.getResourceManagerLeaderRetriever(), Time.minutes(TESTING_TIMEOUT.toMinutes())).getLeaderSessionID(), is(notNullValue()));

	waitUntilTaskExecutorsHaveConnected(NUM_TMS, deadline);
}
 
Example #19
Source File: MiniClusterClient.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public LeaderConnectionInfo getClusterConnectionInfo() throws LeaderRetrievalException {
	return LeaderRetrievalUtils.retrieveLeaderConnectionInfo(
		highAvailabilityServices.getDispatcherLeaderRetriever(),
		timeout);
}
 
Example #20
Source File: RestClusterClient.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public LeaderConnectionInfo getClusterConnectionInfo() throws LeaderRetrievalException {
	return LeaderRetrievalUtils.retrieveLeaderConnectionInfo(
		highAvailabilityServices.getDispatcherLeaderRetriever(),
		timeout);
}
 
Example #21
Source File: HighAvailabilityServicesUtils.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static HighAvailabilityServices createHighAvailabilityServices(
	Configuration configuration,
	Executor executor,
	AddressResolution addressResolution) throws Exception {

	HighAvailabilityMode highAvailabilityMode = LeaderRetrievalUtils.getRecoveryMode(configuration);

	switch (highAvailabilityMode) {
		case NONE:
			final Tuple2<String, Integer> hostnamePort = getJobManagerAddress(configuration);

			final String jobManagerRpcUrl = AkkaRpcServiceUtils.getRpcUrl(
				hostnamePort.f0,
				hostnamePort.f1,
				JobMaster.JOB_MANAGER_NAME,
				addressResolution,
				configuration);
			final String resourceManagerRpcUrl = AkkaRpcServiceUtils.getRpcUrl(
				hostnamePort.f0,
				hostnamePort.f1,
				ResourceManager.RESOURCE_MANAGER_NAME,
				addressResolution,
				configuration);
			final String dispatcherRpcUrl = AkkaRpcServiceUtils.getRpcUrl(
				hostnamePort.f0,
				hostnamePort.f1,
				Dispatcher.DISPATCHER_NAME,
				addressResolution,
				configuration);

			final String address = checkNotNull(configuration.getString(RestOptions.ADDRESS),
				"%s must be set",
				RestOptions.ADDRESS.key());
			final int port = configuration.getInteger(RestOptions.PORT);
			final boolean enableSSL = SSLUtils.isRestSSLEnabled(configuration);
			final String protocol = enableSSL ? "https://" : "http://";

			return new StandaloneHaServices(
				resourceManagerRpcUrl,
				dispatcherRpcUrl,
				jobManagerRpcUrl,
				String.format("%s%s:%s", protocol, address, port));
		case ZOOKEEPER:
			BlobStoreService blobStoreService = BlobUtils.createBlobStoreFromConfig(configuration);

			return new ZooKeeperHaServices(
				ZooKeeperUtils.startCuratorFramework(configuration),
				executor,
				configuration,
				blobStoreService);

		case FACTORY_CLASS:
			return createCustomHAServices(configuration, executor);

		default:
			throw new Exception("Recovery mode " + highAvailabilityMode + " is not supported.");
	}
}
 
Example #22
Source File: EmbeddedHaServicesTest.java    From flink with Apache License 2.0 4 votes vote down vote up
private void runLeaderRetrievalTest(LeaderElectionService leaderElectionService, LeaderRetrievalService leaderRetrievalService) throws Exception {
	LeaderRetrievalUtils.LeaderConnectionInfoListener leaderRetrievalListener = new LeaderRetrievalUtils.LeaderConnectionInfoListener();
	TestingLeaderContender leaderContender = new TestingLeaderContender();

	leaderRetrievalService.start(leaderRetrievalListener);
	leaderElectionService.start(leaderContender);

	final UUID leaderId = leaderContender.getLeaderSessionFuture().get();

	leaderElectionService.confirmLeadership(leaderId, ADDRESS);

	final LeaderConnectionInfo leaderConnectionInfo = leaderRetrievalListener.getLeaderConnectionInfoFuture().get();

	assertThat(leaderConnectionInfo.getAddress(), is(ADDRESS));
	assertThat(leaderConnectionInfo.getLeaderSessionId(), is(leaderId));
}
 
Example #23
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Gets the current cluster connection info (may change in case of a HA setup).
 *
 * @return The the connection info to the leader component of the cluster
 * @throws LeaderRetrievalException if the leader could not be retrieved
 */
public LeaderConnectionInfo getClusterConnectionInfo() throws LeaderRetrievalException {
	return LeaderRetrievalUtils.retrieveLeaderConnectionInfo(
		highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID),
		timeout);
}
 
Example #24
Source File: ClusterClient.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Gets the current cluster connection info (may change in case of a HA setup).
 *
 * @return The the connection info to the leader component of the cluster
 * @throws LeaderRetrievalException if the leader could not be retrieved
 */
public LeaderConnectionInfo getClusterConnectionInfo() throws LeaderRetrievalException {
	return LeaderRetrievalUtils.retrieveLeaderConnectionInfo(
		highAvailabilityServices.getDispatcherLeaderRetriever(),
		timeout);
}