Java Code Examples for org.apache.solr.common.cloud.Replica#State

The following examples show how to use org.apache.solr.common.cloud.Replica#State . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OverseerCollectionMessageHandler.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Send request to all replicas of a collection
 * @return List of replicas which is not live for receiving the request
 */
List<Replica> collectionCmd(ZkNodeProps message, ModifiableSolrParams params,
                   NamedList<Object> results, Replica.State stateMatcher, String asyncId, Set<String> okayExceptions) {
  log.info("Executing Collection Cmd={}, asyncId={}", params, asyncId);
  String collectionName = message.getStr(NAME);
  @SuppressWarnings("deprecation")
  ShardHandler shardHandler = shardHandlerFactory.getShardHandler(overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());

  ClusterState clusterState = zkStateReader.getClusterState();
  DocCollection coll = clusterState.getCollection(collectionName);
  List<Replica> notLivesReplicas = new ArrayList<>();
  final ShardRequestTracker shardRequestTracker = new ShardRequestTracker(asyncId);
  for (Slice slice : coll.getSlices()) {
    notLivesReplicas.addAll(shardRequestTracker.sliceCmd(clusterState, params, stateMatcher, slice, shardHandler));
  }

  shardRequestTracker.processResponses(results, shardHandler, false, null, okayExceptions);
  return notLivesReplicas;
}
 
Example 2
Source File: HttpPartitionTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected List<Replica> getActiveOrRecoveringReplicas(String testCollectionName, String shardId) throws Exception {    
  Map<String,Replica> activeReplicas = new HashMap<String,Replica>();    
  ZkStateReader zkr = cloudClient.getZkStateReader();
  ClusterState cs = zkr.getClusterState();
  assertNotNull(cs);
  for (Slice shard : cs.getCollection(testCollectionName).getActiveSlices()) {
    if (shard.getName().equals(shardId)) {
      for (Replica replica : shard.getReplicas()) {
        final Replica.State state = replica.getState();
        if (state == Replica.State.ACTIVE || state == Replica.State.RECOVERING) {
          activeReplicas.put(replica.getName(), replica);
        }
      }
    }
  }        
  List<Replica> replicas = new ArrayList<Replica>();
  replicas.addAll(activeReplicas.values());
  return replicas;
}
 
Example 3
Source File: OverseerCollectionMessageHandler.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Send request to all replicas of a slice
 * @return List of replicas which is not live for receiving the request
 */
public List<Replica> sliceCmd(ClusterState clusterState, ModifiableSolrParams params, Replica.State stateMatcher,
              Slice slice, ShardHandler shardHandler) {
  List<Replica> notLiveReplicas = new ArrayList<>();
  for (Replica replica : slice.getReplicas()) {
    if ((stateMatcher == null || Replica.State.getState(replica.getStr(ZkStateReader.STATE_PROP)) == stateMatcher)) {
      if (clusterState.liveNodesContain(replica.getStr(ZkStateReader.NODE_NAME_PROP))) {
        // For thread safety, only simple clone the ModifiableSolrParams
        ModifiableSolrParams cloneParams = new ModifiableSolrParams();
        cloneParams.add(params);
        cloneParams.set(CoreAdminParams.CORE, replica.getStr(ZkStateReader.CORE_NAME_PROP));

        sendShardRequest(replica.getStr(ZkStateReader.NODE_NAME_PROP), cloneParams, shardHandler);
      } else {
        notLiveReplicas.add(replica);
      }
    }
  }
  return notLiveReplicas;
}
 
Example 4
Source File: ChaosMonkey.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private int checkIfKillIsLegal(String sliceName, int numActive) throws KeeperException, InterruptedException {
  for (CloudJettyRunner cloudJetty : shardToJetty.get(sliceName)) {
    
    // get latest cloud state
    zkStateReader.forceUpdateCollection(collection);
    
    DocCollection docCollection = zkStateReader.getClusterState().getCollection(collection);
    
    Slice slice = docCollection.getSlice(sliceName);
    
    ZkNodeProps props = slice.getReplicasMap().get(cloudJetty.coreNodeName);
    if (props == null) {
      throw new RuntimeException("shard name " + cloudJetty.coreNodeName + " not found in " + slice.getReplicasMap().keySet());
    }
    
    final Replica.State state = Replica.State.getState(props.getStr(ZkStateReader.STATE_PROP));
    final String nodeName = props.getStr(ZkStateReader.NODE_NAME_PROP);
    
    if (cloudJetty.jetty.isRunning()
        && state == Replica.State.ACTIVE
        && zkStateReader.getClusterState().liveNodesContain(nodeName)) {
      numActive++;
    }
  }
  return numActive;
}
 
Example 5
Source File: ChaosMonkey.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private boolean canKillIndexer(String sliceName) throws KeeperException, InterruptedException {
  int numIndexersFoundInShard = 0;
  for (CloudJettyRunner cloudJetty : shardToJetty.get(sliceName)) {
    
    // get latest cloud state
    zkStateReader.forceUpdateCollection(collection);
    
    DocCollection docCollection = zkStateReader.getClusterState().getCollection(collection);
    
    Slice slice = docCollection.getSlice(sliceName);
    
    ZkNodeProps props = slice.getReplicasMap().get(cloudJetty.coreNodeName);
    if (props == null) {
      throw new RuntimeException("shard name " + cloudJetty.coreNodeName + " not found in " + slice.getReplicasMap().keySet());
    }
    
    final Replica.State state = Replica.State.getState(props.getStr(ZkStateReader.STATE_PROP));
    final Replica.Type replicaType = Replica.Type.valueOf(props.getStr(ZkStateReader.REPLICA_TYPE));
    final String nodeName = props.getStr(ZkStateReader.NODE_NAME_PROP);
    
    if (cloudJetty.jetty.isRunning()
        && state == Replica.State.ACTIVE
        && (replicaType == Replica.Type.TLOG || replicaType == Replica.Type.NRT) 
        && zkStateReader.getClusterState().liveNodesContain(nodeName)) {
      numIndexersFoundInShard++;
    }
  }
  return numIndexersFoundInShard > 1;
}
 
Example 6
Source File: AbstractDistribZkTestBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static void verifyReplicaStatus(ZkStateReader reader, String collection, String shard, String coreNodeName,
    Replica.State expectedState) throws InterruptedException, TimeoutException {
  log.info("verifyReplicaStatus ({}) shard={} coreNodeName={}", collection, shard, coreNodeName);
  reader.waitForState(collection, 15000, TimeUnit.MILLISECONDS,
      (collectionState) -> collectionState != null && collectionState.getSlice(shard) != null
          && collectionState.getSlice(shard).getReplicasMap().get(coreNodeName) != null
          && collectionState.getSlice(shard).getReplicasMap().get(coreNodeName).getState() == expectedState);
}
 
Example 7
Source File: ReplicaInfo.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public Replica.State getState() {
  if (variables.get(ZkStateReader.STATE_PROP) != null) {
    return Replica.State.getState((String) variables.get(ZkStateReader.STATE_PROP));
  } else {
    // default to ACTIVE
    variables.put(ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString());
    return Replica.State.ACTIVE;
  }
}
 
Example 8
Source File: SimClusterStateProvider.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void setReplicaStates(String nodeId, Replica.State state, Set<String> changedCollections) {
  @SuppressWarnings({"unchecked"})
  List<ReplicaInfo> replicas = nodeReplicaMap.computeIfAbsent(nodeId, Utils.NEW_SYNCHRONIZED_ARRAYLIST_FUN);
  synchronized (replicas) {
    replicas.forEach(r -> {
      r.getVariables().put(ZkStateReader.STATE_PROP, state.toString());
      if (state != Replica.State.ACTIVE) {
        r.getVariables().remove(ZkStateReader.LEADER_PROP);
      }
      changedCollections.add(r.getCollection());
    });
  }
}
 
Example 9
Source File: HealthCheckHandlerTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private CloudDescriptor mockCD(String collection, String name, String shardId, boolean registered, Replica.State state) {
  Properties props = new Properties();
  props.put(CoreDescriptor.CORE_SHARD, shardId);
  props.put(CoreDescriptor.CORE_COLLECTION, collection);
  props.put(CoreDescriptor.CORE_NODE_NAME, name);
  CloudDescriptor cd = new CloudDescriptor(null, name, props);
  cd.setHasRegistered(registered);
  cd.setLastPublished(state);
  return cd;
}
 
Example 10
Source File: TestPullReplica.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Ignore("Ignore until I figure out a way to reliably record state transitions")
  public void testPullReplicaStates() throws Exception {
    // Validate that pull replicas go through the correct states when starting, stopping, reconnecting
    CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1, 0, 0)
      .setMaxShardsPerNode(100)
      .process(cluster.getSolrClient());
//    cluster.getSolrClient().getZkStateReader().registerCore(collectionName); //TODO: Is this needed?
    waitForState("Replica not added", collectionName, activeReplicaCount(1, 0, 0));
    addDocs(500);
    List<Replica.State> statesSeen = new ArrayList<>(3);
    cluster.getSolrClient().registerCollectionStateWatcher(collectionName, (liveNodes, collectionState) -> {
      Replica r = collectionState.getSlice("shard1").getReplica("core_node2");
      log.info("CollectionStateWatcher state change: {}", r);
      if (r == null) {
        return false;
      }
      statesSeen.add(r.getState());
      if (log.isInfoEnabled()) {
        log.info("CollectionStateWatcher saw state: {}", r.getState());
      }
      return r.getState() == Replica.State.ACTIVE;
    });
    CollectionAdminRequest.addReplicaToShard(collectionName, "shard1", Replica.Type.PULL).process(cluster.getSolrClient());
    waitForState("Replica not added", collectionName, activeReplicaCount(1, 0, 1));
    zkClient().printLayoutToStream(System.out);
    if (log.isInfoEnabled()) {
      log.info("Saw states: {}", Arrays.toString(statesSeen.toArray()));
    }
    assertEquals("Expecting DOWN->RECOVERING->ACTIVE but saw: " + Arrays.toString(statesSeen.toArray()), 3, statesSeen.size());
    assertEquals("Expecting DOWN->RECOVERING->ACTIVE but saw: " + Arrays.toString(statesSeen.toArray()), Replica.State.DOWN, statesSeen.get(0));
    assertEquals("Expecting DOWN->RECOVERING->ACTIVE but saw: " + Arrays.toString(statesSeen.toArray()), Replica.State.RECOVERING, statesSeen.get(0));
    assertEquals("Expecting DOWN->RECOVERING->ACTIVE but saw: " + Arrays.toString(statesSeen.toArray()), Replica.State.ACTIVE, statesSeen.get(0));
  }
 
Example 11
Source File: ZookeeperInfoHandler.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Walk the collection state JSON object to see if it has any replicas that match
 * the state the user is filtering by.
 */
@SuppressWarnings("unchecked")
final boolean matchesStatusFilter(Map<String, Object> collectionState, Set<String> liveNodes) {

  if (filterType != FilterType.status || filter == null || filter.length() == 0)
    return true; // no status filter, so all match

  boolean isHealthy = true; // means all replicas for all shards active
  boolean hasDownedShard = false; // means one or more shards is down
  boolean replicaInRecovery = false;

  Map<String, Object> shards = (Map<String, Object>) collectionState.get("shards");
  for (Object o : shards.values()) {
    boolean hasActive = false;
    Map<String, Object> shard = (Map<String, Object>) o;
    Map<String, Object> replicas = (Map<String, Object>) shard.get("replicas");
    for (Object value : replicas.values()) {
      Map<String, Object> replicaState = (Map<String, Object>) value;
      Replica.State coreState = Replica.State.getState((String) replicaState.get(ZkStateReader.STATE_PROP));
      String nodeName = (String) replicaState.get("node_name");

      // state can lie to you if the node is offline, so need to reconcile with live_nodes too
      if (!liveNodes.contains(nodeName))
        coreState = Replica.State.DOWN; // not on a live node, so must be down

      if (coreState == Replica.State.ACTIVE) {
        hasActive = true; // assumed no replicas active and found one that is for this shard
      } else {
        if (coreState == Replica.State.RECOVERING) {
          replicaInRecovery = true;
        }
        isHealthy = false; // assumed healthy and found one replica that is not
      }
    }

    if (!hasActive)
      hasDownedShard = true; // this is bad
  }

  if ("healthy".equals(filter)) {
    return isHealthy;
  } else if ("degraded".equals(filter)) {
    return !hasDownedShard && !isHealthy; // means no shards offline but not 100% healthy either
  } else if ("downed_shard".equals(filter)) {
    return hasDownedShard;
  } else if (Replica.State.getState(filter) == Replica.State.RECOVERING) {
    return !isHealthy && replicaInRecovery;
  }

  return true;
}
 
Example 12
Source File: ZkController.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void publish(final CoreDescriptor cd, final Replica.State state) throws Exception {
  publish(cd, state, true, false);
}
 
Example 13
Source File: ZkController.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Publish core state to overseer.
 */
public void publish(final CoreDescriptor cd, final Replica.State state, boolean updateLastState, boolean forcePublish) throws Exception {
  if (!forcePublish) {
    try (SolrCore core = cc.getCore(cd.getName())) {
      if (core == null || core.isClosed()) {
        return;
      }
    }
  }
  MDCLoggingContext.setCoreDescriptor(cc, cd);
  try {
    String collection = cd.getCloudDescriptor().getCollectionName();

    log.debug("publishing state={}", state);
    // System.out.println(Thread.currentThread().getStackTrace()[3]);
    Integer numShards = cd.getCloudDescriptor().getNumShards();
    if (numShards == null) { // XXX sys prop hack
      log.debug("numShards not found on descriptor - reading it from system property");
      numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP);
    }

    assert collection != null && collection.length() > 0;

    String shardId = cd.getCloudDescriptor().getShardId();

    String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();

    Map<String,Object> props = new HashMap<>();
    props.put(Overseer.QUEUE_OPERATION, "state");
    props.put(ZkStateReader.STATE_PROP, state.toString());
    props.put(ZkStateReader.BASE_URL_PROP, getBaseUrl());
    props.put(ZkStateReader.CORE_NAME_PROP, cd.getName());
    props.put(ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles());
    props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
    props.put(ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId());
    props.put(ZkStateReader.COLLECTION_PROP, collection);
    props.put(ZkStateReader.REPLICA_TYPE, cd.getCloudDescriptor().getReplicaType().toString());
    props.put(ZkStateReader.FORCE_SET_STATE_PROP, "false");
    if (numShards != null) {
      props.put(ZkStateReader.NUM_SHARDS_PROP, numShards.toString());
    }
    if (coreNodeName != null) {
      props.put(ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
    }
    try (SolrCore core = cc.getCore(cd.getName())) {
      if (core != null && state == Replica.State.ACTIVE) {
        ensureRegisteredSearcher(core);
      }
      if (core != null && core.getDirectoryFactory().isSharedStorage()) {
        if (core.getDirectoryFactory().isSharedStorage()) {
          props.put(ZkStateReader.SHARED_STORAGE_PROP, "true");
          props.put("dataDir", core.getDataDir());
          UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
          if (ulog != null) {
            props.put("ulogDir", ulog.getLogDir());
          }
        }
      }
    } catch (SolrCoreInitializationException ex) {
      // The core had failed to initialize (in a previous request, not this one), hence nothing to do here.
      if (log.isInfoEnabled()) {
        log.info("The core '{}' had failed to initialize before.", cd.getName());
      }
    }

    // pull replicas are excluded because their terms are not considered
    if (state == Replica.State.RECOVERING && cd.getCloudDescriptor().getReplicaType() != Type.PULL) {
      // state is used by client, state of replica can change from RECOVERING to DOWN without needed to finish recovery
      // by calling this we will know that a replica actually finished recovery or not
      getShardTerms(collection, shardId).startRecovering(coreNodeName);
    }
    if (state == Replica.State.ACTIVE && cd.getCloudDescriptor().getReplicaType() != Type.PULL) {
      getShardTerms(collection, shardId).doneRecovering(coreNodeName);
    }

    ZkNodeProps m = new ZkNodeProps(props);

    if (updateLastState) {
      cd.getCloudDescriptor().setLastPublished(state);
    }
    overseerJobQueue.offer(Utils.toJSON(m));
  } finally {
    MDCLoggingContext.clear();
  }
}
 
Example 14
Source File: OverseerCollectionMessageHandler.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private List<Replica> collectionCmd(ZkNodeProps message, ModifiableSolrParams params,
                           NamedList<Object> results, Replica.State stateMatcher, String asyncId) {
  return collectionCmd( message, params, results, stateMatcher, asyncId, Collections.emptySet());
}
 
Example 15
Source File: CoreAdminRequest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public Replica.State getState() {
  return state;
}
 
Example 16
Source File: CloudDescriptor.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public Replica.State getLastPublished() {
  return lastPublished;
}
 
Example 17
Source File: CloudDescriptor.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void setLastPublished(Replica.State state) {
  lastPublished = state;
}
 
Example 18
Source File: SimCloudManager.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public String dumpClusterState(boolean withCollections) throws Exception {
  StringBuilder sb = new StringBuilder();
  sb.append("#######################################\n");
  sb.append("############ CLUSTER STATE ############\n");
  sb.append("#######################################\n");
  sb.append("## Live nodes:\t\t").append(getLiveNodesSet().size()).append("\n");
  int emptyNodes = 0;
  int maxReplicas = 0;
  int minReplicas = Integer.MAX_VALUE;
  Map<String, Map<Replica.State, AtomicInteger>> replicaStates = new TreeMap<>();
  int numReplicas = 0;
  for (String node : getLiveNodesSet().get()) {
    List<ReplicaInfo> replicas = getSimClusterStateProvider().simGetReplicaInfos(node);
    numReplicas += replicas.size();
    if (replicas.size() > maxReplicas) {
      maxReplicas = replicas.size();
    }
    if (minReplicas > replicas.size()) {
      minReplicas = replicas.size();
    }
    for (ReplicaInfo ri : replicas) {
      replicaStates.computeIfAbsent(ri.getCollection(), c -> new TreeMap<>())
          .computeIfAbsent(ri.getState(), s -> new AtomicInteger())
          .incrementAndGet();
    }
    if (replicas.isEmpty()) {
      emptyNodes++;
    }
  }
  if (minReplicas == Integer.MAX_VALUE) {
    minReplicas = 0;
  }
  sb.append("## Empty nodes:\t").append(emptyNodes).append("\n");
  Set<String> deadNodes = getSimNodeStateProvider().simGetDeadNodes();
  sb.append("## Dead nodes:\t\t").append(deadNodes.size()).append("\n");
  deadNodes.forEach(n -> sb.append("##\t\t").append(n).append("\n"));
  sb.append("## Collections:\n");
    clusterStateProvider.simGetCollectionStats().forEach((coll, stats) -> {
      sb.append("##  * ").append(coll).append('\n');
      stats.forEach((k, v) -> {
        sb.append("##    ").append(k).append("\t").append(v).append("\n");
      });
    });
  if (withCollections) {
    ClusterState state = clusterStateProvider.getClusterState();
    state.forEachCollection(coll -> sb.append(coll.toString()).append("\n"));
  }
  sb.append("## Max replicas per node:\t").append(maxReplicas).append("\n");
  sb.append("## Min replicas per node:\t").append(minReplicas).append("\n");
  sb.append("## Total replicas:\t\t").append(numReplicas).append("\n");
  replicaStates.forEach((c, map) -> {
    AtomicInteger repCnt = new AtomicInteger();
    map.forEach((s, cnt) -> repCnt.addAndGet(cnt.get()));
    sb.append("## * ").append(c).append("\t\t").append(repCnt.get()).append("\n");
    map.forEach((s, cnt) -> sb.append("##\t\t- ").append(String.format(Locale.ROOT, "%-12s  %4d", s, cnt.get())).append("\n"));
  });
  sb.append("######### Solr op counts ##########\n");
  simGetOpCounts().forEach((k, cnt) -> sb.append("##\t\t- ").append(String.format(Locale.ROOT, "%-14s  %4d", k, cnt.get())).append("\n"));
  sb.append("######### Autoscaling event counts ###########\n");
  Map<String, Map<String, AtomicInteger>> counts = simGetEventCounts();
  counts.forEach((trigger, map) -> {
    sb.append("## * Trigger: ").append(trigger).append("\n");
    map.forEach((s, cnt) -> sb.append("##\t\t- ").append(String.format(Locale.ROOT, "%-11s  %4d", s, cnt.get())).append("\n"));
  });
  return sb.toString();
}
 
Example 19
Source File: HttpPartitionTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
protected void waitToSeeReplicasActive(String testCollectionName, String shardId, Set<String> replicasToCheck, int maxWaitSecs) throws Exception {
  final RTimer timer = new RTimer();

  ZkStateReader zkr = cloudClient.getZkStateReader();
  zkr.forceUpdateCollection(testCollectionName);
  ClusterState cs = zkr.getClusterState();
  boolean allReplicasUp = false;
  long waitMs = 0L;
  long maxWaitMs = maxWaitSecs * 1000L;
  while (waitMs < maxWaitMs && !allReplicasUp) {
    cs = cloudClient.getZkStateReader().getClusterState();
    assertNotNull(cs);
    final DocCollection docCollection = cs.getCollectionOrNull(testCollectionName);
    assertNotNull(docCollection);
    Slice shard = docCollection.getSlice(shardId);
    assertNotNull("No Slice for "+shardId, shard);
    allReplicasUp = true; // assume true

    // wait to see all replicas are "active"
    for (Replica replica : shard.getReplicas()) {
      if (!replicasToCheck.contains(replica.getName()))
        continue;

      final Replica.State state = replica.getState();
      if (state != Replica.State.ACTIVE) {
        if (log.isInfoEnabled()) {
          log.info("Replica {} is currently {}", replica.getName(), state);
        }
        allReplicasUp = false;
      }
    }

    if (!allReplicasUp) {
      try {
        Thread.sleep(200L);
      } catch (Exception ignoreMe) {}
      waitMs += 200L;
    }
  } // end while

  if (!allReplicasUp)
    fail("Didn't see replicas "+ replicasToCheck +
        " come up within " + maxWaitMs + " ms! ClusterState: " + printClusterStateInfo(testCollectionName));

  if (log.isInfoEnabled()) {
    log.info("Took {} ms to see replicas [{}] become active.", timer.getTime(), replicasToCheck);
  }
}
 
Example 20
Source File: CoreAdminRequest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void setState(Replica.State state) {
  this.state = state;
}