Example 1
6 votes vote down vote up
static List<ZkNodeProps> getReplicasOfNode(String source, ClusterState state) {
  List<ZkNodeProps> sourceReplicas = new ArrayList<>();
  for (Map.Entry<String, DocCollection> e : state.getCollectionsMap().entrySet()) {
    for (Slice slice : e.getValue().getSlices()) {
      for (Replica replica : slice.getReplicas()) {
        if (source.equals(replica.getNodeName())) {
          ZkNodeProps props = new ZkNodeProps(
              COLLECTION_PROP, e.getKey(),
              SHARD_ID_PROP, slice.getName(),
              ZkStateReader.CORE_NAME_PROP, replica.getCoreName(),
              ZkStateReader.REPLICA_PROP, replica.getName(),
              ZkStateReader.REPLICA_TYPE, replica.getType().name(),
              ZkStateReader.LEADER_PROP, String.valueOf(replica.equals(slice.getLeader())),
              CoreAdminParams.NODE, source);
  return sourceReplicas;
Example 2
6 votes vote down vote up
public static String getAssignedCoreNodeName(DocCollection collection, String forNodeName, String forCoreName) {
  Collection<Slice> slices = collection != null ? collection.getSlices() : null;
  if (slices != null) {
    for (Slice slice : slices) {
      for (Replica replica : slice.getReplicas()) {
        String nodeName = replica.getStr(ZkStateReader.NODE_NAME_PROP);
        String core = replica.getStr(ZkStateReader.CORE_NAME_PROP);

        if (nodeName.equals(forNodeName) && core.equals(forCoreName)) {
          return replica.getName();
  return null;
Example 3
5 votes vote down vote up
public ReplicaInfo(String coll, String shard, Replica r, Map<String, Object> vals) { = r.getName();
  this.core = r.getCoreName();
  this.collection = coll;
  this.shard = shard;
  this.type = r.getType();
  this.node = r.getNodeName();
  boolean maybeLeader = r.getBool(LEADER_PROP, false);
  if (vals != null) {
    maybeLeader = "true".equals(String.valueOf(vals.getOrDefault(LEADER_PROP, maybeLeader)));
  this.isLeader = maybeLeader;
Example 4
5 votes vote down vote up
private String checkCollectionExpectations(String collectionName, List<Integer> numShardsNumReplicaList, List<String> nodesAllowedToRunShards) {
    ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
    int expectedSlices = numShardsNumReplicaList.get(0);
    // The Math.min thing is here, because we expect replication-factor to be reduced to if there are not enough live nodes to spread all shards of a collection over different nodes
    int expectedShardsPerSlice = numShardsNumReplicaList.get(1);
    int expectedTotalShards = expectedSlices * expectedShardsPerSlice;

//      Map<String,DocCollection> collections = clusterState
//          .getCollectionStates();
      if (clusterState.hasCollection(collectionName)) {
        Map<String,Slice> slices = clusterState.getCollection(collectionName).getSlicesMap();
        // did we find expectedSlices slices/shards?
      if (slices.size() != expectedSlices) {
        return "Found new collection " + collectionName + ", but mismatch on number of slices. Expected: " + expectedSlices + ", actual: " + slices.size();
      int totalShards = 0;
      for (String sliceName : slices.keySet()) {
        for (Replica replica : slices.get(sliceName).getReplicas()) {
          if (nodesAllowedToRunShards != null && !nodesAllowedToRunShards.contains(replica.getStr(ZkStateReader.NODE_NAME_PROP))) {
            return "Shard " + replica.getName() + " created on node " + replica.getNodeName() + " not allowed to run shards for the created collection " + collectionName;
        totalShards += slices.get(sliceName).getReplicas().size();
      if (totalShards != expectedTotalShards) {
        return "Found new collection " + collectionName + " with correct number of slices, but mismatch on number of shards. Expected: " + expectedTotalShards + ", actual: " + totalShards;
      return null;
    } else {
      return "Could not find new collection " + collectionName;
Example 5
5 votes vote down vote up
String waitForCoreNodeName(String collectionName, String msgNodeName, String msgCore) {
  int retryCount = 320;
  while (retryCount-- > 0) {
    final DocCollection docCollection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
    if (docCollection != null && docCollection.getSlicesMap() != null) {
      Map<String,Slice> slicesMap = docCollection.getSlicesMap();
      for (Slice slice : slicesMap.values()) {
        for (Replica replica : slice.getReplicas()) {
          // TODO: for really large clusters, we could 'index' on this

          String nodeName = replica.getStr(ZkStateReader.NODE_NAME_PROP);
          String core = replica.getStr(ZkStateReader.CORE_NAME_PROP);

          if (nodeName.equals(msgNodeName) && core.equals(msgCore)) {
            return replica.getName();
    try {
    } catch (InterruptedException e) {
  throw new SolrException(ErrorCode.SERVER_ERROR, "Could not find coreNodeName");
Example 6
5 votes vote down vote up
private List<ZkNodeProps> getReplicasForSlice(String collectionName, Slice slice) {
  List<ZkNodeProps> sourceReplicas = new ArrayList<>();
  for (Replica replica : slice.getReplicas()) {
    ZkNodeProps props = new ZkNodeProps(
        COLLECTION_PROP, collectionName,
        SHARD_ID_PROP, slice.getName(),
        ZkStateReader.CORE_NAME_PROP, replica.getCoreName(),
        ZkStateReader.REPLICA_PROP, replica.getName(),
        CoreAdminParams.NODE, replica.getNodeName());
  return sourceReplicas;
Example 7
5 votes vote down vote up
private void verifyLeaderAssignment(CloudSolrClient client, String collectionName)
    throws InterruptedException, KeeperException {
  String lastFailMsg = "";
  for (int idx = 0; idx < 300; ++idx) { // Keep trying while Overseer writes the ZK state for up to 30 seconds.
    lastFailMsg = "";
    ClusterState clusterState = client.getZkStateReader().getClusterState();
    for (Slice slice : clusterState.getCollection(collectionName).getSlices()) {
      Boolean foundLeader = false;
      Boolean foundPreferred = false;
      for (Replica replica : slice.getReplicas()) {
        Boolean isLeader = replica.getBool("leader", false);
        Boolean isPreferred = replica.getBool("property.preferredleader", false);
        if (isLeader != isPreferred) {
          lastFailMsg = "Replica should NOT have preferredLeader != leader. Preferred: " + isPreferred.toString() +
              " leader is " + isLeader.toString();
        if (foundLeader && isLeader) {
          lastFailMsg = "There should only be a single leader in _any_ shard! Replica " + replica.getName() +
              " is the second leader in slice " + slice.getName();
        if (foundPreferred && isPreferred) {
          lastFailMsg = "There should only be a single preferredLeader in _any_ shard! Replica " + replica.getName() +
              " is the second preferredLeader in slice " + slice.getName();
        foundLeader = foundLeader ? foundLeader : isLeader;
        foundPreferred = foundPreferred ? foundPreferred : isPreferred;
    if (lastFailMsg.length() == 0) return;
Example 8
5 votes vote down vote up
private String getShardId(DocCollection collection, String coreNodeName) {
  if (collection == null) return null;
  Map<String,Slice> slices = collection.getSlicesMap();
  if (slices != null) {
    for (Slice slice : slices.values()) {
      for (Replica replica : slice.getReplicas()) {
        String cnn = replica.getName();
        if (coreNodeName.equals(cnn)) {
          return slice.getName();
  return null;
Example 9
4 votes vote down vote up
 * <p>
 * Helper method that returns true if the Runnable managed by this factory 
 * should be responsible of doing periodical deletes.
 * </p>
 * <p>
 * In simple standalone installations this method always returns true, 
 * but in cloud mode it will be true if and only if we are currently the leader 
 * of the (active) slice with the first name (lexicographically).
 * </p>
 * <p>
 * If this method returns false, it may have also logged a message letting the user 
 * know why we aren't attempting period deletion (but it will attempt to not log 
 * this excessively)
 * </p>
private boolean iAmInChargeOfPeriodicDeletes() {
  ZkController zk = core.getCoreContainer().getZkController();

  if (null == zk) return true;
  // This is a lot simpler then doing our own "leader" election across all replicas 
  // of all shards since:
  //   a) we already have a per shard leader
  //   b) shard names must be unique
  //   c) ClusterState is already being "watched" by ZkController, no additional zk hits
  //   d) there might be multiple instances of this factory (in multiple chains) per 
  //      collection, so picking an ephemeral node name for our election would be tricky

  CloudDescriptor desc = core.getCoreDescriptor().getCloudDescriptor();
  String col = desc.getCollectionName();

  DocCollection docCollection = zk.getClusterState().getCollection(col);
  if (docCollection.getActiveSlicesArr().length == 0) {
    log.error("Collection {} has no active Slices?", col);
    return false;
  List<Slice> slices = new ArrayList<>(Arrays.asList(docCollection.getActiveSlicesArr()));
  Collections.sort(slices, COMPARE_SLICES_BY_NAME);
  Replica firstSliceLeader = slices.get(0).getLeader();
  if (null == firstSliceLeader) {
    log.warn("Slice in charge of periodic deletes for {} does not currently have a leader",
    return false;
  String leaderInCharge = firstSliceLeader.getName();
  String myCoreNodeName = desc.getCoreNodeName();
  boolean inChargeOfDeletesRightNow = leaderInCharge.equals(myCoreNodeName);

  if (previouslyInChargeOfDeletes && ! inChargeOfDeletesRightNow) {
    // don't spam the logs constantly, just log when we know that we're not the guy
    // (the first time -- or anytime we were, but no longer are)"Not currently in charge of periodic deletes for this collection, {}",
             "will not trigger delete or log again until this changes");

  previouslyInChargeOfDeletes = inChargeOfDeletesRightNow;
  return inChargeOfDeletesRightNow;
Example 10
4 votes vote down vote up
protected void doDeleteByQuery(DeleteUpdateCommand cmd) throws IOException {

  // NONE: we are the first to receive this deleteByQuery
  //       - it must be forwarded to the leader of every shard
  // TO:   we are a leader receiving a forwarded deleteByQuery... we must:
  //       - block all updates (use VersionInfo)
  //       - flush *all* updates going to our replicas
  //       - forward the DBQ to our replicas and wait for the response
  //       - log + execute the local DBQ
  // FROM: we are a replica receiving a DBQ from our leader
  //       - log + execute the local DBQ
  DistribPhase phase = DistribPhase.parseParam(req.getParams().get(DISTRIB_UPDATE_PARAM));

  DocCollection coll = clusterState.getCollection(collection);

  if (DistribPhase.NONE == phase) {
    if (rollupReplicationTracker == null) {
      rollupReplicationTracker = new RollupRequestReplicationTracker();
    boolean leaderForAnyShard = false;  // start off by assuming we are not a leader for any shard

    ModifiableSolrParams outParams = new ModifiableSolrParams(filterParams(req.getParams()));
    outParams.set(DISTRIB_UPDATE_PARAM, DistribPhase.TOLEADER.toString());
    outParams.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl(
        zkController.getBaseUrl(), req.getCore().getName()));

    SolrParams params = req.getParams();
    String route = params.get(ShardParams._ROUTE_);
    Collection<Slice> slices = coll.getRouter().getSearchSlices(route, params, coll);

    List<SolrCmdDistributor.Node> leaders =  new ArrayList<>(slices.size());
    for (Slice slice : slices) {
      String sliceName = slice.getName();
      Replica leader;
      try {
        leader = zkController.getZkStateReader().getLeaderRetry(collection, sliceName);
      } catch (InterruptedException e) {
        throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Exception finding leader for shard " + sliceName, e);

      // TODO: What if leaders changed in the meantime?
      // should we send out slice-at-a-time and if a node returns "hey, I'm not a leader" (or we get an error because it went down) then look up the new leader?

      // Am I the leader for this slice?
      ZkCoreNodeProps coreLeaderProps = new ZkCoreNodeProps(leader);
      String leaderCoreNodeName = leader.getName();
      String coreNodeName = cloudDesc.getCoreNodeName();
      isLeader = coreNodeName.equals(leaderCoreNodeName);

      if (isLeader) {
        // don't forward to ourself
        leaderForAnyShard = true;
      } else {
        leaders.add(new SolrCmdDistributor.ForwardNode(coreLeaderProps, zkController.getZkStateReader(), collection, sliceName, maxRetriesOnForward));

    outParams.remove("commit"); // this will be distributed from the local commit

    if (params.get(UpdateRequest.MIN_REPFACT) != null) {
      // TODO: Kept this for rolling upgrades. Remove in Solr 9
      outParams.add(UpdateRequest.MIN_REPFACT, req.getParams().get(UpdateRequest.MIN_REPFACT));
    cmdDistrib.distribDelete(cmd, leaders, outParams, false, rollupReplicationTracker, null);

    if (!leaderForAnyShard) {

    // change the phase to TOLEADER so we look up and forward to our own replicas (if any)
    phase = DistribPhase.TOLEADER;
  List<SolrCmdDistributor.Node> replicas = null;

  if (DistribPhase.TOLEADER == phase) {
    // This core should be a leader
    isLeader = true;
    replicas = setupRequestForDBQ();
  } else if (DistribPhase.FROMLEADER == phase) {
    isLeader = false;

  // check if client has requested minimum replication factor information. will set replicationTracker to null if
  // we aren't the leader or subShardLeader
  super.doDeleteByQuery(cmd, replicas, coll);
Example 11
4 votes vote down vote up
 * See if coreNodeName has been taken over by another baseUrl and unload core
 * + throw exception if it has been.
public static void checkSharedFSFailoverReplaced(CoreContainer cc, CoreDescriptor desc) {
  if (!cc.isSharedFs(desc)) return;

  ZkController zkController = cc.getZkController();
  String thisCnn = zkController.getCoreNodeName(desc);
  String thisBaseUrl = zkController.getBaseUrl();

  log.debug("checkSharedFSFailoverReplaced running for coreNodeName={} baseUrl={}", thisCnn, thisBaseUrl);

  // if we see our core node name on a different base url, unload
  final DocCollection docCollection = zkController.getClusterState().getCollectionOrNull(desc.getCloudDescriptor().getCollectionName());
  if (docCollection != null && docCollection.getSlicesMap() != null) {
    Map<String,Slice> slicesMap = docCollection.getSlicesMap();
    for (Slice slice : slicesMap.values()) {
      for (Replica replica : slice.getReplicas()) {

        String cnn = replica.getName();
        String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
        log.debug("compare against coreNodeName={} baseUrl={}", cnn, baseUrl);

        if (thisCnn != null && thisCnn.equals(cnn)
            && !thisBaseUrl.equals(baseUrl)) {
          if (cc.getLoadedCoreNames().contains(desc.getName())) {

          try {
          } catch (IOException e) {
            SolrException.log(log, "Failed to delete instance dir for core:"
                + desc.getName() + " dir:" + desc.getInstanceDir());
              new SolrException(ErrorCode.SERVER_ERROR, "Will not load SolrCore " + desc.getName()
                  + " because it has been replaced due to failover.")); // logOk
          throw new SolrException(ErrorCode.SERVER_ERROR,
              "Will not load SolrCore " + desc.getName()
                  + " because it has been replaced due to failover.");
Example 12
4 votes vote down vote up
public List<ZkWriteCommand> downNode(ClusterState clusterState, ZkNodeProps message) {
  List<ZkWriteCommand> zkWriteCommands = new ArrayList<>();
  String nodeName = message.getStr(ZkStateReader.NODE_NAME_PROP);

  log.debug("DownNode state invoked for node: {}", nodeName);

  Map<String, DocCollection> collections = clusterState.getCollectionsMap();
  for (Map.Entry<String, DocCollection> entry : collections.entrySet()) {
    String collection = entry.getKey();
    DocCollection docCollection = entry.getValue();

    Map<String,Slice> slicesCopy = new LinkedHashMap<>(docCollection.getSlicesMap());

    boolean needToUpdateCollection = false;
    for (Entry<String, Slice> sliceEntry : slicesCopy.entrySet()) {
      Slice slice = sliceEntry.getValue();
      Map<String, Replica> newReplicas = slice.getReplicasCopy();

      Collection<Replica> replicas = slice.getReplicas();
      for (Replica replica : replicas) {
        String rNodeName = replica.getNodeName();
        if (rNodeName == null) {
          throw new RuntimeException("Replica without node name! " + replica);
        if (rNodeName.equals(nodeName)) {
          log.debug("Update replica state for {} to {}", replica, Replica.State.DOWN);
          Map<String, Object> props = replica.shallowCopy();
          props.put(ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
          Replica newReplica = new Replica(replica.getName(), props, collection, slice.getName());
          newReplicas.put(replica.getName(), newReplica);
          needToUpdateCollection = true;

      Slice newSlice = new Slice(slice.getName(), newReplicas, slice.shallowCopy(),collection);
      slicesCopy.put(slice.getName(), newSlice);

    if (needToUpdateCollection) {
      zkWriteCommands.add(new ZkWriteCommand(collection, docCollection.copyWithSlices(slicesCopy)));

  return zkWriteCommands;
Example 13
4 votes vote down vote up
public void deleteReplicaFromClusterState() throws Exception {
  final String collectionName = "deleteFromClusterStateCollection";
  CollectionAdminRequest.createCollection(collectionName, "conf", 1, 3)
  cluster.waitForActiveCollection(collectionName, 1, 3);
  cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "1"));
  cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "2"));

  cluster.waitForActiveCollection(collectionName, 1, 3);

  Slice shard = getCollectionState(collectionName).getSlice("shard1");

  // don't choose the leader to shutdown, it just complicates things unnecessarily
  Replica replica = getRandomReplica(shard, (r) ->
                                     ( r.getState() == Replica.State.ACTIVE &&
                                       ! r.equals(shard.getLeader())));
  JettySolrRunner replicaJetty = cluster.getReplicaJetty(replica);
  ZkStateReaderAccessor accessor = new ZkStateReaderAccessor(replicaJetty.getCoreContainer().getZkController().getZkStateReader());

  final long preDeleteWatcherCount = countUnloadCoreOnDeletedWatchers

  ZkNodeProps m = new ZkNodeProps(
      Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(),
      ZkStateReader.CORE_NAME_PROP, replica.getCoreName(),
      ZkStateReader.NODE_NAME_PROP, replica.getNodeName(),
      ZkStateReader.COLLECTION_PROP, collectionName,
      ZkStateReader.CORE_NODE_NAME_PROP, replica.getName(),
      ZkStateReader.BASE_URL_PROP, replica.getBaseUrl());


  waitForState("Timeout waiting for replica get deleted", collectionName,
      (liveNodes, collectionState) -> collectionState.getSlice("shard1").getReplicas().size() == 2);

  TimeOut timeOut = new TimeOut(60, TimeUnit.SECONDS, TimeSource.NANO_TIME);
  timeOut.waitFor("Waiting for replica get unloaded", () ->
      replicaJetty.getCoreContainer().getCoreDescriptor(replica.getCoreName()) == null
  // the core should no longer have a watch collection state since it was removed
  timeOut = new TimeOut(60, TimeUnit.SECONDS, TimeSource.NANO_TIME);
  timeOut.waitFor("Waiting for core's watcher to be removed", () -> {
      final long postDeleteWatcherCount = countUnloadCoreOnDeletedWatchers
        (accessor.getStateWatchers(collectionName));"preDeleteWatcherCount={} vs postDeleteWatcherCount={}",
               preDeleteWatcherCount, postDeleteWatcherCount);
      return (preDeleteWatcherCount - 1L == postDeleteWatcherCount);
Example 14
4 votes vote down vote up
private CollectionAdminRequest.MoveReplica createMoveReplicaRequest(String coll, Replica replica, String targetNode) {
  return new CollectionAdminRequest.MoveReplica(coll, replica.getName(), targetNode);
Example 15
4 votes vote down vote up
public void test() throws IOException, SolrServerException, KeeperException, InterruptedException {
  Set<String> coreNames = new HashSet<>();
  Set<String> coreNodeNames = new HashSet<>();

  int numOperations = random().nextInt(15) + 15;
  int numLiveReplicas = 4;

  boolean clearedCounter = false;
  for (int i = 0; i < numOperations; i++) {
    if (log.isInfoEnabled()) {"Collection counter={} i={}", getCounter(), i);
    boolean deleteReplica = random().nextBoolean() && numLiveReplicas > 1;
    // No need to clear counter more than one time
    if (random().nextBoolean() && i > 5 && !clearedCounter) {"Clear collection counter");
      // clear counter
      cluster.getZkClient().delete("/collections/"+COLLECTION+"/counter", -1, true);
      clearedCounter = true;
    if (deleteReplica) {
      cluster.waitForActiveCollection(COLLECTION, 1, numLiveReplicas);
      DocCollection dc = getCollectionState(COLLECTION);
      Replica replica = getRandomReplica(dc.getSlice("shard1"), (r) -> r.getState() == Replica.State.ACTIVE);
      CollectionAdminRequest.deleteReplica(COLLECTION, "shard1", replica.getName()).process(cluster.getSolrClient());
    } else {
      CollectionAdminResponse response = CollectionAdminRequest.addReplicaToShard(COLLECTION, "shard1")
      String coreName = response.getCollectionCoresStatus()
      assertFalse("Core name is not unique coreName=" + coreName + " " + coreNames, coreNames.contains(coreName));
      cluster.waitForActiveCollection(COLLECTION, 1, numLiveReplicas);

      Replica newReplica = getCollectionState(COLLECTION).getReplicas().stream()
          .filter(r -> r.getCoreName().equals(coreName))
      String coreNodeName = newReplica.getName();
      assertFalse("Core node name is not unique", coreNodeNames.contains(coreName));