Java Code Examples for org.apache.helix.HelixManager#connect()

The following examples show how to use org.apache.helix.HelixManager#connect() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ClusterIntegrationTestUtils.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * A utility method that creates a partial instance structure in ZK.
 */
public static void createPartialInstanceStructure(HelixManager helixManager, String zkConnectString) {
  //Connect and disconnect the helixManager to create a Helix Instance set up.
  try {
    helixManager.connect();
    helixManager.disconnect();
  } catch (Exception e) {
    Assert.fail("Failed to connect to ZK");
  }

  //Delete ERRORS/HISTORY/STATUSUPDATES znodes under INSTANCES to simulate partial instance set up.
  ZkClient zkClient = new ZkClient(zkConnectString);
  zkClient.delete(PropertyPathBuilder.instanceError(helixManager.getClusterName(), helixManager.getInstanceName()));
  zkClient.delete(PropertyPathBuilder.instanceHistory(helixManager.getClusterName(), helixManager.getInstanceName()));
  zkClient.delete(PropertyPathBuilder.instanceStatusUpdate(helixManager.getClusterName(), helixManager.getInstanceName()));
}
 
Example 2
Source File: GobblinTaskRunnerTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Test (groups = {"disabledOnTravis"})
public void testTaskAssignmentAfterHelixConnectionRetry()
    throws Exception {
  Config jobConfigOverrides = ClusterIntegrationTestUtils.buildSleepingJob(JOB_ID, TASK_STATE_FILE);
  this.suite = new TaskAssignmentAfterConnectionRetry(jobConfigOverrides);

  String zkConnectString = suite.getManagerConfig().getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY);
  String clusterName = suite.getManagerConfig().getString(GobblinClusterConfigurationKeys.HELIX_CLUSTER_NAME_KEY);
  //A test manager instance for observing the state of the cluster
  HelixManager helixManager = HelixManagerFactory.getZKHelixManager(clusterName, "TestManager", InstanceType.SPECTATOR, zkConnectString);

  suite.startCluster();

  helixManager.connect();

  //Ensure that Helix has created a workflow
  AssertWithBackoff.create().maxSleepMs(1000).backoffFactor(1).
      assertTrue(ClusterIntegrationTest.isTaskStarted(helixManager, JOB_ID), "Waiting for the job to start...");

  //Ensure that the SleepingTask is running
  AssertWithBackoff.create().maxSleepMs(100).timeoutMs(2000).backoffFactor(1).
      assertTrue(ClusterIntegrationTest.isTaskRunning(TASK_STATE_FILE),"Waiting for the task to enter running state");

  helixManager.disconnect();
}
 
Example 3
Source File: TestResourceGroupEndtoEnd.java    From helix with Apache License 2.0 6 votes vote down vote up
public HelixManager start() throws Exception {
  HelixManager manager = null;
  // zk cluster manager
  if (_clusterMangerType.equalsIgnoreCase("zk")) {
    manager =
        HelixManagerFactory.getZKHelixManager(_clusterName, _instanceName,
            InstanceType.PARTICIPANT, _zkConnectString);
  } else {
    throw new IllegalArgumentException("Unsupported cluster manager type:" + _clusterMangerType);
  }

  MockOnlineOfflineStateModelFactory stateModelFactory2 =
      new MockOnlineOfflineStateModelFactory(_transDelayInMs, _resourceName, _resourceTag,
          _instanceName);
  // genericStateMachineHandler = new StateMachineEngine();
  StateMachineEngine stateMach = manager.getStateMachineEngine();
  stateMach.registerStateModelFactory("OnlineOffline", stateModelFactory2);

  manager.connect();
  //manager.getMessagingService().registerMessageHandlerFactory(MessageType.STATE_TRANSITION.name(), genericStateMachineHandler);
  return manager;
}
 
Example 4
Source File: TestControllerHistory.java    From helix with Apache License 2.0 6 votes vote down vote up
@Test()
public void testControllerLeaderHistory() throws Exception {
  HelixManager manager = HelixManagerFactory
      .getZKHelixManager(CLUSTER_NAME, "admin", InstanceType.ADMINISTRATOR, ZK_ADDR);
  manager.connect();

  PropertyKey.Builder keyBuilder = new PropertyKey.Builder(CLUSTER_NAME);
  PropertyKey propertyKey = keyBuilder.controllerLeaderHistory();
  ControllerHistory controllerHistory = manager.getHelixDataAccessor().getProperty(propertyKey);
  Assert.assertNotNull(controllerHistory);
  List<String> list = controllerHistory.getRecord().getListField("HISTORY");
  Assert.assertEquals(list.size(), 1);

  for (int i = 0; i <= 12; i++) {
    _controller.syncStop();
    _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, "Controller-" + i);
    _controller.syncStart();
  }

  controllerHistory = manager.getHelixDataAccessor().getProperty(propertyKey);
  Assert.assertNotNull(controllerHistory);
  list = controllerHistory.getRecord().getListField("HISTORY");
  Assert.assertEquals(list.size(), 10);
  manager.disconnect();
}
 
Example 5
Source File: ServiceDiscovery.java    From helix with Apache License 2.0 6 votes vote down vote up
public boolean register(final String serviceId, final ServiceMetadata serviceMetadata)
    throws Exception {
  HelixManager helixManager =
      HelixManagerFactory.getZKHelixManager(cluster, serviceId, InstanceType.PARTICIPANT,
          zkAddress);
  LiveInstanceInfoProvider liveInstanceInfoProvider = new LiveInstanceInfoProvider() {
    @Override
    public ZNRecord getAdditionalLiveInstanceInfo() {
      // serialize serviceMetadata to ZNRecord
      ZNRecord rec = new ZNRecord(serviceId);
      rec.setSimpleField("HOST", serviceMetadata.getHost());
      rec.setSimpleField("PORT", String.valueOf(serviceMetadata.getPort()));
      rec.setSimpleField("SERVICE_NAME", serviceMetadata.getServiceName());
      return rec;
    }
  };
  helixManager.setLiveInstanceInfoProvider(liveInstanceInfoProvider);
  helixManager.connect();
  serviceMap.put(serviceId, helixManager);
  refreshCache();
  return true;
}
 
Example 6
Source File: PerfBenchmarkDriver.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
/**
 * Register and connect to Helix cluster as Spectator role.
 */
private HelixManager registerAndConnectAsHelixSpectator(String instanceId) {
  HelixManager helixManager =
      HelixManagerFactory.getZKHelixManager(_clusterName, instanceId, InstanceType.SPECTATOR, _zkAddress);

  try {
    helixManager.connect();
    return helixManager;
  } catch (Exception e) {
    String errorMsg =
        String.format("Exception when connecting the instance %s as Spectator role to Helix.", instanceId);
    LOGGER.error(errorMsg, e);
    throw new RuntimeException(errorMsg);
  }
}
 
Example 7
Source File: MockSpectatorProcess.java    From helix with Apache License 2.0 5 votes vote down vote up
public void start() {

    try {
      HelixManager manager =
          HelixManagerFactory.getZKHelixManager(clusterName, null, InstanceType.SPECTATOR,
              zkConnectString);

      manager.connect();
      manager.addExternalViewChangeListener(_routingTableProvider);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
 
Example 8
Source File: TestZeroReplicaAvoidance.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void testDelayedRebalancer() throws Exception {
  System.out.println("START testDelayedRebalancer at " + new Date(System.currentTimeMillis()));
  HelixManager manager =
      HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, null, InstanceType.SPECTATOR, ZK_ADDR);
  manager.connect();
  manager.addExternalViewChangeListener(this);
  manager.addIdealStateChangeListener(this);
  enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true);

  // Start half number of nodes.
  int i = 0;
  for (; i < NUM_NODE / 2; i++) {
    _participants.get(i).syncStart();
  }

  int replica = 3;
  int partition = 30;
  for (String stateModel : TestStateModels) {
    String db = "Test-DB-" + stateModel;
    createResourceWithDelayedRebalance(CLUSTER_NAME, db, stateModel, partition, replica, replica,
        0);
  }
  Assert.assertTrue(_clusterVerifier.verifyByPolling(50000L, 100L));

  _startListen = true;
  DelayedTransition.setDelay(5);

  // add the other half of nodes.
  for (; i < NUM_NODE; i++) {
    _participants.get(i).syncStart();
  }
  Assert.assertTrue(_clusterVerifier.verify(70000L));
  Assert.assertTrue(_testSuccess);

  if (manager.isConnected()) {
    manager.disconnect();
  }
  System.out.println("END testDelayedRebalancer at " + new Date(System.currentTimeMillis()));
}
 
Example 9
Source File: TestCorrectnessOnConnectivityLoss.java    From helix with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
@Test
public void testSpectator() throws Exception {
  Map<String, Integer> stateReachedCounts = Maps.newHashMap();
  HelixManager participant =
      HelixManagerFactory.getZKHelixManager(_clusterName, "localhost_12918",
          InstanceType.PARTICIPANT, ZK_ADDR);
  participant.getStateMachineEngine().registerStateModelFactory("OnlineOffline",
      new MyStateModelFactory(stateReachedCounts));
  participant.connect();

  RoutingTableProvider routingTableProvider = new RoutingTableProvider();
  try {
    HelixManager spectator = HelixManagerFactory
        .getZKHelixManager(_clusterName, "spectator", InstanceType.SPECTATOR, ZK_ADDR);
    spectator.connect();
    spectator.addConfigChangeListener(routingTableProvider);
    spectator.addExternalViewChangeListener(routingTableProvider);
    Thread.sleep(1000);

    // Now let's stop the ZK server; this should do nothing
    TestHelper.stopZkServer(_zkServer);
    Thread.sleep(1000);

    // Verify routing table still works
    Assert.assertEquals(routingTableProvider.getInstances("resource0", "ONLINE").size(), 1);
    Assert.assertEquals(routingTableProvider.getInstances("resource0", "OFFLINE").size(), 0);
  } finally {
    routingTableProvider.shutdown();
    if (participant.isConnected()) {
      participant.disconnect();
    }
  }
}
 
Example 10
Source File: TestCorrectnessOnConnectivityLoss.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void testParticipant() throws Exception {
  Map<String, Integer> stateReachedCounts = Maps.newHashMap();
  HelixManager participant =
      HelixManagerFactory.getZKHelixManager(_clusterName, "localhost_12918",
          InstanceType.PARTICIPANT, ZK_ADDR);
  participant.getStateMachineEngine().registerStateModelFactory("OnlineOffline",
      new MyStateModelFactory(stateReachedCounts));
  participant.connect();

  Thread.sleep(1000);

  // Ensure that the external view coalesces
  boolean result =
      ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR,
          _clusterName));
  Assert.assertTrue(result);

  // Ensure that there was only one state transition
  Assert.assertEquals(stateReachedCounts.size(), 1);
  Assert.assertTrue(stateReachedCounts.containsKey("ONLINE"));
  Assert.assertEquals(stateReachedCounts.get("ONLINE").intValue(), 1);

  // Now let's stop the ZK server; this should do nothing
  TestHelper.stopZkServer(_zkServer);
  Thread.sleep(1000);

  // Verify no change
  Assert.assertEquals(stateReachedCounts.size(), 1);
  Assert.assertTrue(stateReachedCounts.containsKey("ONLINE"));
  Assert.assertEquals(stateReachedCounts.get("ONLINE").intValue(), 1);

  if (participant.isConnected()) {
    participant.disconnect();
  }
}
 
Example 11
Source File: TestBatchAddJobs.java    From helix with Apache License 2.0 5 votes vote down vote up
public SubmitJobTask(String zkAddress, int index) throws Exception {
  HelixManager manager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, "Administrator",
      InstanceType.ADMINISTRATOR, zkAddress);
  manager.connect();
  _driver = new TaskDriver(manager);
  _jobPrefixName = "JOB_" + index + "#";
}
 
Example 12
Source File: DummyParticipant.java    From helix with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
  if (args.length < 3) {
    System.err.println("USAGE: DummyParticipant zkAddress clusterName instanceName");
    System.exit(1);
  }

  String zkAddr = args[0];
  String clusterName = args[1];
  String instanceName = args[2];

  HelixManager manager = null;
  try {
    manager =
        HelixManagerFactory.getZKHelixManager(clusterName, instanceName,
            InstanceType.PARTICIPANT, zkAddr);

    StateMachineEngine stateMach = manager.getStateMachineEngine();
    DummyMSModelFactory msModelFactory = new DummyMSModelFactory();
    stateMach.registerStateModelFactory("MasterSlave", msModelFactory);

    manager.connect();

    Thread.currentThread().join();
  } catch (Exception e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
  } finally {
    if (manager != null) {
      manager.disconnect();
    }
  }
}
 
Example 13
Source File: WorkflowsResource.java    From helix with Apache License 2.0 4 votes vote down vote up
@Override
public Representation post(Representation entity) {
  try {
    String clusterName = (String) getRequest().getAttributes().get("clusterName");
    Form form = new Form(entity);

    // Get the workflow and submit it
    if (form.size() < 1) {
      throw new HelixException("yaml workflow is required!");
    }
    Parameter payload = form.get(0);
    String yamlPayload = payload.getName();
    if (yamlPayload == null) {
      throw new HelixException("yaml workflow is required!");
    }
    String zkAddr =
        (String) getContext().getAttributes().get(RestAdminApplication.ZKSERVERADDRESS);
    HelixManager manager =
        HelixManagerFactory.getZKHelixManager(clusterName, null, InstanceType.ADMINISTRATOR,
            zkAddr);
    manager.connect();
    try {
      Workflow workflow = Workflow.parse(yamlPayload);
      TaskDriver driver = new TaskDriver(manager);
      driver.start(workflow);
    } finally {
      manager.disconnect();
    }

    getResponse().setEntity(getHostedEntitiesRepresentation(clusterName));
    getResponse().setStatus(Status.SUCCESS_OK);
  }

  catch (Exception e) {
    getResponse().setEntity(ClusterRepresentationUtil.getErrorAsJsonStringFromException(e),
        MediaType.APPLICATION_JSON);
    getResponse().setStatus(Status.SUCCESS_OK);
    LOG.error("Error in posting " + entity, e);
  }
  return null;
}
 
Example 14
Source File: MetricCollectorHAControllerTest.java    From ambari-metrics with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 180000)
public void testHAControllerDistributedAggregation() throws Exception {
  MetricCollectorHAController haController = new MetricCollectorHAController(configuration);
  haController.initializeHAController();
  // Wait for task assignment
  Thread.sleep(10000);

  Assert.assertTrue(haController.isInitialized());
  Assert.assertEquals(1, haController.getLiveInstanceHostNames().size());
  Assert.assertTrue(haController.getAggregationTaskRunner().performsClusterAggregation());
  Assert.assertTrue(haController.getAggregationTaskRunner().performsHostAggregation());

  // Add new instance
  InstanceConfig instanceConfig2 = new InstanceConfig("h2_12001");
  haController.admin.addInstance(CLUSTER_NAME, instanceConfig2);
  HelixManager manager2 = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME,
    instanceConfig2.getInstanceName(),
    InstanceType.PARTICIPANT, haController.zkConnectUrl);
  manager2.getStateMachineEngine().registerStateModelFactory(DEFAULT_STATE_MODEL,
    new OnlineOfflineStateModelFactory(instanceConfig2.getInstanceName(),
      new AggregationTaskRunner(instanceConfig2.getInstanceName(), "", CLUSTER_NAME)));
  manager2.connect();
  haController.admin.rebalance(CLUSTER_NAME, METRIC_AGGREGATORS, 1);

  // Wait on re-assignment of partitions
  Thread.sleep(10000);
  Assert.assertEquals(2, haController.getLiveInstanceHostNames().size());

  ExternalView view = haController.admin.getResourceExternalView(CLUSTER_NAME, METRIC_AGGREGATORS);

  Map<String, String> partitionInstanceMap = new HashMap<>();

  for (String partition : view.getPartitionSet()) {
    Map<String, String> states = view.getStateMap(partition);
    // (instance, state) pairs
    for (Map.Entry<String, String> stateEntry : states.entrySet()) {
      partitionInstanceMap.put(partition, stateEntry.getKey());
      Assert.assertEquals("ONLINE", stateEntry.getValue());
    }
  }
  // Re-assigned partitions
  Assert.assertEquals(2, partitionInstanceMap.size());

  haController.getAggregationTaskRunner().stop();
  haController.manager.disconnect();
}
 
Example 15
Source File: TestParticipantManager.java    From helix with Apache License 2.0 4 votes vote down vote up
@Test
public void simpleIntegrationTest() throws Exception {
  int n = 1;

  TestHelper.setupCluster(clusterName, ZK_ADDR, 12918, // participant port
      "localhost", // participant name prefix
      "TestDB", // resource name prefix
      1, // resources
      4, // partitions per resource
      n, // number of nodes
      1, // replicas
      "MasterSlave", true); // do rebalance

  HelixManager participant =
      new ZKHelixManager(clusterName, "localhost_12918", InstanceType.PARTICIPANT, ZK_ADDR);
  participant.getStateMachineEngine().registerStateModelFactory("MasterSlave",
      new MockMSModelFactory());
  participant.connect();

  HelixManager controller =
      new ZKHelixManager(clusterName, "controller_0", InstanceType.CONTROLLER, ZK_ADDR);
  controller.connect();

  verifyHelixManagerMetrics(InstanceType.PARTICIPANT, MonitorLevel.DEFAULT,
      participant.getInstanceName());
  verifyHelixManagerMetrics(InstanceType.CONTROLLER, MonitorLevel.DEFAULT,
      controller.getInstanceName());

  BestPossibleExternalViewVerifier verifier =
      new BestPossibleExternalViewVerifier.Builder(clusterName).setZkClient(_gZkClient)
          .setZkAddr(ZK_ADDR).build();
  Assert.assertTrue(verifier.verifyByPolling());

  // cleanup
  controller.disconnect();
  participant.disconnect();

  // verify all live-instances and leader nodes are gone
  ZKHelixDataAccessor accessor =
      new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
  PropertyKey.Builder keyBuilder = accessor.keyBuilder();
  Assert.assertNull(accessor.getProperty(keyBuilder.liveInstance("localhost_12918")));
  Assert.assertNull(accessor.getProperty(keyBuilder.controllerLeader()));
}
 
Example 16
Source File: TestResourceAccessor.java    From helix with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a setup where the health API can be tested.
 * @param clusterName
 * @param resourceName
 * @param idealStateParams
 * @param partitionReplicaStates maps partitionName to its replicas' states
 * @throws Exception
 */
private void createDummyMapping(String clusterName, String resourceName,
    Map<String, String> idealStateParams, Map<String, List<String>> partitionReplicaStates)
    throws Exception {
  IdealState idealState = new IdealState(resourceName);
  idealState.setMinActiveReplicas(Integer.parseInt(idealStateParams.get("MinActiveReplicas"))); // 2
  idealState.setStateModelDefRef(idealStateParams.get("StateModelDefRef")); // MasterSlave
  idealState.setMaxPartitionsPerInstance(
      Integer.parseInt(idealStateParams.get("MaxPartitionsPerInstance"))); // 3
  idealState.setReplicas(idealStateParams.get("Replicas")); // 3
  idealState.setNumPartitions(Integer.parseInt(idealStateParams.get("NumPartitions"))); // 3
  idealState.enable(false);

  Map<String, List<String>> partitionNames = new LinkedHashMap<>();
  List<String> dummyPrefList = new ArrayList<>();

  for (int i = 0; i < Integer.parseInt(idealStateParams.get("MaxPartitionsPerInstance")); i++) {
    dummyPrefList.add(ANY_INSTANCE);
    partitionNames.put("p" + i, dummyPrefList);
  }
  idealState.getRecord().getListFields().putAll(partitionNames);

  if (!_gSetupTool.getClusterManagementTool().getClusters().contains(clusterName)) {
    _gSetupTool.getClusterManagementTool().addCluster(clusterName);
  }
  _gSetupTool.getClusterManagementTool().setResourceIdealState(clusterName, resourceName,
      idealState);

  // Set ExternalView's replica states for a given parameter map
  ExternalView externalView = new ExternalView(resourceName);

  Map<String, Map<String, String>> mappingCurrent = new LinkedHashMap<>();

  List<String> partitionReplicaStatesList = new ArrayList<>(partitionReplicaStates.keySet());
  for (int k = 0; k < partitionReplicaStatesList.size(); k++) {
    Map<String, String> replicaStatesForPartition = new LinkedHashMap<>();
    List<String> replicaStateList = partitionReplicaStates.get(partitionReplicaStatesList.get(k));
    for (int i = 0; i < replicaStateList.size(); i++) {
      replicaStatesForPartition.put("r" + i, replicaStateList.get(i));
    }
    mappingCurrent.put("p" + k, replicaStatesForPartition);
  }

  externalView.getRecord().getMapFields().putAll(mappingCurrent);

  HelixManager helixManager = HelixManagerFactory.getZKHelixManager(clusterName, "p1",
      InstanceType.ADMINISTRATOR, ZK_ADDR);
  helixManager.connect();
  HelixDataAccessor helixDataAccessor = helixManager.getHelixDataAccessor();
  helixDataAccessor.setProperty(helixDataAccessor.keyBuilder().externalView(resourceName),
      externalView);
  System.out.println("End test :" + TestHelper.getTestMethodName());
}
 
Example 17
Source File: TestZKLiveInstanceData.java    From helix with Apache License 2.0 4 votes vote down vote up
@Test
public void testDataChange() throws Exception {
  // Create an admin and add LiveInstanceChange listener to it
  HelixManager adminManager =
      HelixManagerFactory.getZKHelixManager(clusterName, null, InstanceType.ADMINISTRATOR,
          ZK_ADDR);
  adminManager.connect();
  final BlockingQueue<List<LiveInstance>> changeList =
      new LinkedBlockingQueue<List<LiveInstance>>();

  adminManager.addLiveInstanceChangeListener(new LiveInstanceChangeListener() {
    @Override
    public void onLiveInstanceChange(List<LiveInstance> liveInstances,
        NotificationContext changeContext) {
      // The queue is basically unbounded, so shouldn't throw exception when calling
      // "add".
      changeList.add(deepCopy(liveInstances));
    }
  });

  // Check the initial condition
  List<LiveInstance> instances = changeList.poll(1, TimeUnit.SECONDS);
  Assert.assertNotNull(instances, "Expecting a list of live instance");
  Assert.assertTrue(instances.isEmpty(), "Expecting an empty list of live instance");
  // Join as participant, should trigger a live instance change event
  HelixManager manager =
      HelixManagerFactory.getZKHelixManager(clusterName, "localhost_54321",
          InstanceType.PARTICIPANT, ZK_ADDR);
  manager.connect();
  instances = changeList.poll(1, TimeUnit.SECONDS);
  Assert.assertNotNull(instances, "Expecting a list of live instance");
  Assert.assertEquals(instances.size(), 1, "Expecting one live instance");
  Assert.assertEquals(instances.get(0).getInstanceName(), manager.getInstanceName());
  // Update data in the live instance node, should trigger another live instance change
  // event
  HelixDataAccessor helixDataAccessor = manager.getHelixDataAccessor();
  PropertyKey propertyKey =
      helixDataAccessor.keyBuilder().liveInstance(manager.getInstanceName());
  LiveInstance instance = helixDataAccessor.getProperty(propertyKey);

  Map<String, String> map = new TreeMap<String, String>();
  map.put("k1", "v1");
  instance.getRecord().setMapField("test", map);
  Assert.assertTrue(helixDataAccessor.updateProperty(propertyKey, instance),
      "Failed to update live instance node");

  instances = changeList.poll(1, TimeUnit.SECONDS);
  Assert.assertNotNull(instances, "Expecting a list of live instance");
  Assert.assertEquals(instances.get(0).getRecord().getMapField("test"), map, "Wrong map data.");
  manager.disconnect();
  Thread.sleep(1000); // wait for callback finish

  instances = changeList.poll(1, TimeUnit.SECONDS);
  Assert.assertNotNull(instances, "Expecting a list of live instance");
  Assert.assertTrue(instances.isEmpty(), "Expecting an empty list of live instance");

  adminManager.disconnect();

}
 
Example 18
Source File: TestZeroReplicaAvoidance.java    From helix with Apache License 2.0 4 votes vote down vote up
@Test
public void testWagedRebalancer() throws Exception {
  System.out.println("START testWagedRebalancer at " + new Date(System.currentTimeMillis()));
  HelixManager manager =
      HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, null, InstanceType.SPECTATOR, ZK_ADDR);
  manager.connect();
  manager.addExternalViewChangeListener(this);
  manager.addIdealStateChangeListener(this);
  enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true);

  // Start half number of nodes.
  int i = 0;
  for (; i < NUM_NODE / 2; i++) {
    _participants.get(i).syncStart();
  }

  int replica = 3;
  int partition = 30;
  for (String stateModel : TestStateModels) {
    String db = "Test-DB-" + stateModel;
    createResourceWithWagedRebalance(CLUSTER_NAME, db, stateModel, partition, replica, replica);
  }
  // TODO remove this sleep after fix https://github.com/apache/helix/issues/526
  Thread.sleep(1000);
  Assert.assertTrue(_clusterVerifier.verifyByPolling(50000L, 100L));

  _startListen = true;
  DelayedTransition.setDelay(5);

  // add the other half of nodes.
  for (; i < NUM_NODE; i++) {
    _participants.get(i).syncStart();
  }
  Assert.assertTrue(_clusterVerifier.verify(70000L));
  Assert.assertTrue(_testSuccess);

  if (manager.isConnected()) {
    manager.disconnect();
  }
  System.out.println("END testWagedRebalancer at " + new Date(System.currentTimeMillis()));
}
 
Example 19
Source File: ClusterIntegrationTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
void testJobShouldGetCancelled() throws Exception {
  // Cancellation usually needs long time to successfully be executed, therefore setting the sleeping time to 100.
  Config jobConfigOverrides = ClusterIntegrationTestUtils.buildSleepingJob(IntegrationJobCancelSuite.JOB_ID,
      IntegrationJobCancelSuite.TASK_STATE_FILE)
      .withValue(SleepingTask.SLEEP_TIME_IN_SECONDS, ConfigValueFactory.fromAnyRef(100));
  this.suite = new IntegrationJobCancelSuite(jobConfigOverrides);
  HelixManager helixManager = getHelixManager();
  suite.startCluster();
  helixManager.connect();

  ExecutorService executor = Executors.newSingleThreadExecutor();
  Runnable cancelAfterTaskInit = () -> {
    try {
      TaskDriver taskDriver = new TaskDriver(helixManager);
      // The actual cancellation needs to be executed in separated thread to make the cancel of helix is not blocked by
      // SleepingTask's thread in its own thread.
      // Issue the cancel after ensuring the workflow is created and the SleepingTask is running
      AssertWithBackoff.create().maxSleepMs(1000).backoffFactor(1).
          assertTrue(isTaskStarted(helixManager, IntegrationJobCancelSuite.JOB_ID), "Waiting for the job to start...");

      AssertWithBackoff.create().maxSleepMs(100).timeoutMs(2000).backoffFactor(1).
          assertTrue(isTaskRunning(IntegrationJobCancelSuite.TASK_STATE_FILE),
              "Waiting for the task to enter running state");

      log.info("Stopping the job");
      taskDriver.stop(IntegrationJobCancelSuite.JOB_ID);
      suite.shutdownCluster();
    } catch (Exception e) {
      throw new RuntimeException("Failure in canceling tasks");
    }
  };

  FutureTask<String> futureTask = new FutureTask<String>( cancelAfterTaskInit, "cancelled");
  executor.submit(futureTask);

  AssertWithBackoff assertWithBackoff = AssertWithBackoff.create().backoffFactor(1).maxSleepMs(1000).timeoutMs(500000);
  assertWithBackoff.assertTrue(new Predicate<Void>() {
    @Override
    public boolean apply(Void input) {
      return futureTask.isDone();
    }
  }, "waiting for future to complete");

  Assert.assertEquals(futureTask.get(), "cancelled");
  suite.waitForAndVerifyOutputFiles();
}
 
Example 20
Source File: DatacenterInitializer.java    From ambry with Apache License 2.0 4 votes vote down vote up
/**
 * Perform initialization for a helix-managed datacenter of servers.
 * @return the {@link DcInfo} for the datacenter.
 * @throws Exception if something went wrong during startup
 */
private DcInfo initializeHelixDatacenter() throws Exception {
  // For now, the first ZK endpoint (if there are more than one endpoints) will be adopted by default for initialization.
  // Note that, Ambry currently doesn't support multiple spectators, because there should be only one source of truth.
  String zkConnectStr = dcZkInfo.getZkConnectStrs().get(0);
  HelixManager manager;
  if (dcZkInfo.getDcName().equals(clusterMapConfig.clusterMapDatacenterName)) {
    manager = Objects.requireNonNull(localManager, "localManager should have been set");
  } else {
    manager = helixFactory.getZKHelixManager(clusterMapConfig.clusterMapClusterName, selfInstanceName,
        InstanceType.SPECTATOR, zkConnectStr);
    logger.info("Connecting to Helix manager at {}", zkConnectStr);
    manager.connect();
    logger.info("Established connection to Helix manager at {}", zkConnectStr);
  }
  HelixClusterChangeHandler clusterChangeHandler;
  String clusterChangeHandlerType = clusterMapConfig.clusterMapClusterChangeHandlerType;
  if (clusterChangeHandlerType.equals(SimpleClusterChangeHandler.class.getSimpleName())) {
    clusterChangeHandler =
        new SimpleClusterChangeHandler(clusterMapConfig, dcName, selfInstanceName, partitionOverrideInfoMap,
            partitionMap, partitionNameToAmbryPartition, ambryPartitionToAmbryReplicas, helixClusterManagerCallback,
            helixClusterManagerMetrics, this::onInitializationFailure, sealedStateChangeCounter);
  } else if (clusterChangeHandlerType.equals(DynamicClusterChangeHandler.class.getSimpleName())) {
    clusterChangeHandler =
        new DynamicClusterChangeHandler(clusterMapConfig, dcName, selfInstanceName, partitionOverrideInfoMap,
            helixClusterManagerCallback, clusterChangeHandlerCallback, helixClusterManagerMetrics,
            this::onInitializationFailure, sealedStateChangeCounter);
  } else {
    throw new IllegalArgumentException("Unsupported cluster change handler type: " + clusterChangeHandlerType);
  }
  // Create RoutingTableProvider of each DC to keep track of partition(replicas) state. Here, we use current
  // state based RoutingTableProvider to remove dependency on Helix's pipeline and reduce notification latency.
  logger.info("Creating routing table provider associated with Helix manager at {}", zkConnectStr);
  RoutingTableProvider routingTableProvider = new RoutingTableProvider(manager, PropertyType.CURRENTSTATES);
  logger.info("Routing table provider is created in {}", dcName);
  routingTableProvider.addRoutingTableChangeListener(clusterChangeHandler, null);
  logger.info("Registered routing table change listeners in {}", dcName);

  // The initial instance config change notification is required to populate the static cluster
  // information, and only after that is complete do we want the live instance change notification to
  // come in. We do not need to do anything extra to ensure this, however, since Helix provides the initial
  // notification for a change from within the same thread that adds the listener, in the context of the add
  // call. Therefore, when the call to add a listener returns, the initial notification will have been
  // received and handled.
  DataNodeConfigSource dataNodeConfigSource = new InstanceConfigToDataNodeConfigAdapter(manager, clusterMapConfig);
  dataNodeConfigSource.addDataNodeConfigChangeListener(clusterChangeHandler);
  logger.info("Registered instance config change listeners for Helix manager at {}", zkConnectStr);
  manager.addIdealStateChangeListener(clusterChangeHandler);
  logger.info("Registered ideal state change listeners for Helix manager at {}", zkConnectStr);
  // Now register listeners to get notified on live instance change in every datacenter.
  manager.addLiveInstanceChangeListener(clusterChangeHandler);
  logger.info("Registered live instance change listeners for Helix manager at {}", zkConnectStr);

  // in case initial event occurs before adding routing table listener, here we explicitly set snapshot in
  // ClusterChangeHandler. The reason is, if listener missed initial event, snapshot inside routing table
  // provider should be already populated.
  clusterChangeHandler.setRoutingTableSnapshot(routingTableProvider.getRoutingTableSnapshot());
  // the initial routing table change should populate the instanceConfigs. If it's empty that means initial
  // change didn't come and thread should wait on the init latch to ensure routing table snapshot is non-empty
  if (clusterChangeHandler.getRoutingTableSnapshot().getInstanceConfigs().isEmpty()) {
    // Periodic refresh in routing table provider is enabled by default. In worst case, routerUpdater should
    // trigger routing table change within 5 minutes
    logger.info("Routing table snapshot in {} is currently empty. Waiting for initial notification.", dcName);
    clusterChangeHandler.waitForInitNotification();
  }

  if (!clusterMapConfig.clustermapListenCrossColo && manager != localManager) {
    manager.disconnect();
    logger.info("Stopped listening to cross colo ZK server {}", zkConnectStr);
  }

  return new HelixDcInfo(dcName, dcZkInfo, manager, clusterChangeHandler);
}