Java Code Examples for org.apache.solr.common.cloud.ZkStateReader#forceUpdateCollection()

The following examples show how to use org.apache.solr.common.cloud.ZkStateReader#forceUpdateCollection() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AbstractFullDistribZkTestBase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected void logReplicaTypesReplicationInfo(String collectionName, ZkStateReader zkStateReader) throws KeeperException, InterruptedException, IOException {
  log.info("## Collecting extra Replica.Type information of the cluster");
  zkStateReader.updateLiveNodes();
  StringBuilder builder = new StringBuilder();
  zkStateReader.forceUpdateCollection(collectionName);
  DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
  for(Slice s:collection.getSlices()) {
    Replica leader = s.getLeader();
    for (Replica r:s.getReplicas()) {
      if (!r.isActive(zkStateReader.getClusterState().getLiveNodes())) {
        builder.append(String.format(Locale.ROOT, "Replica %s not in liveNodes or is not active%s", r.getName(), System.lineSeparator()));
        continue;
      }
      if (r.equals(leader)) {
        builder.append(String.format(Locale.ROOT, "Replica %s is leader%s", r.getName(), System.lineSeparator()));
      }
      logReplicationDetails(r, builder);
    }
  }
  log.info("Summary of the cluster: {}", builder);
}
 
Example 2
Source File: AbstractDistribZkTestBase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected static void assertAllActive(String collection, ZkStateReader zkStateReader)
    throws KeeperException, InterruptedException {

    zkStateReader.forceUpdateCollection(collection);
    ClusterState clusterState = zkStateReader.getClusterState();
    final DocCollection docCollection = clusterState.getCollectionOrNull(collection);
    if (docCollection == null || docCollection.getSlices() == null) {
      throw new IllegalArgumentException("Cannot find collection:" + collection);
    }

    Map<String,Slice> slices = docCollection.getSlicesMap();
    for (Map.Entry<String,Slice> entry : slices.entrySet()) {
      Slice slice = entry.getValue();
      if (slice.getState() != Slice.State.ACTIVE) {
        fail("Not all shards are ACTIVE - found a shard " + slice.getName() + " that is: " + slice.getState());
      }
      Map<String,Replica> shards = slice.getReplicasMap();
      for (Map.Entry<String,Replica> shard : shards.entrySet()) {
        Replica replica = shard.getValue();
        if (replica.getState() != Replica.State.ACTIVE) {
          fail("Not all replicas are ACTIVE - found a replica " + replica.getName() + " that is: " + replica.getState());
        }
      }
    }
}
 
Example 3
Source File: ChaosMonkeyShardSplitTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void waitTillRecovered() throws Exception {
  for (int i = 0; i < 30; i++) {
    Thread.sleep(3000);
    ZkStateReader zkStateReader = cloudClient.getZkStateReader();
    zkStateReader.forceUpdateCollection("collection1");
    ClusterState clusterState = zkStateReader.getClusterState();
    DocCollection collection1 = clusterState.getCollection("collection1");
    Slice slice = collection1.getSlice("shard1");
    Collection<Replica> replicas = slice.getReplicas();
    boolean allActive = true;
    for (Replica replica : replicas) {
      if (!clusterState.liveNodesContain(replica.getNodeName()) || replica.getState() != Replica.State.ACTIVE) {
        allActive = false;
        break;
      }
    }
    if (allActive) {
      return;
    }
  }
  printLayout();
  fail("timeout waiting to see recovered node");
}
 
Example 4
Source File: HttpPartitionTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected void waitForState(String collection, String replicaName, Replica.State state, long ms) throws KeeperException, InterruptedException {
  TimeOut timeOut = new TimeOut(ms, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
  Replica.State replicaState = Replica.State.ACTIVE;
  while (!timeOut.hasTimedOut()) {
    ZkStateReader zkr = cloudClient.getZkStateReader();
    zkr.forceUpdateCollection(collection);; // force the state to be fresh
    ClusterState cs = zkr.getClusterState();
    Collection<Slice> slices = cs.getCollection(collection).getActiveSlices();
    Slice slice = slices.iterator().next();
    Replica partitionedReplica = slice.getReplica(replicaName);
    replicaState = partitionedReplica.getState();
    if (replicaState == state) return;
  }
  assertEquals("Timeout waiting for state "+ state +" of replica " + replicaName + ", current state " + replicaState,
      state, replicaState);
}
 
Example 5
Source File: Solr6Index.java    From atlas with Apache License 2.0 5 votes vote down vote up
/**
 * Checks if the collection has already been created in Solr.
 */
private static boolean checkIfCollectionExists(CloudSolrClient server, String collection) throws KeeperException, InterruptedException {
    final ZkStateReader zkStateReader = server.getZkStateReader();
    zkStateReader.forceUpdateCollection(collection);
    final ClusterState clusterState = zkStateReader.getClusterState();
    return clusterState.getCollectionOrNull(collection) != null;
}
 
Example 6
Source File: Solr6Index.java    From atlas with Apache License 2.0 5 votes vote down vote up
/**
 * Wait for all the collection shards to be ready.
 */
private static void waitForRecoveriesToFinish(CloudSolrClient server, String collection) throws KeeperException, InterruptedException {
    final ZkStateReader zkStateReader = server.getZkStateReader();
    try {
        boolean cont = true;

        while (cont) {
            boolean sawLiveRecovering = false;
            zkStateReader.forceUpdateCollection(collection);
            final ClusterState clusterState = zkStateReader.getClusterState();
            final Map<String, Slice> slices = clusterState.getCollection(collection).getSlicesMap();
            Preconditions.checkNotNull(slices, "Could not find collection:" + collection);

            // change paths for Replica.State per Solr refactoring
            // remove SYNC state per: http://tinyurl.com/pag6rwt
            for (final Map.Entry<String, Slice> entry : slices.entrySet()) {
                final Map<String, Replica> shards = entry.getValue().getReplicasMap();
                for (final Map.Entry<String, Replica> shard : shards.entrySet()) {
                    final String state = shard.getValue().getStr(ZkStateReader.STATE_PROP).toUpperCase();
                    if ((Replica.State.RECOVERING.name().equals(state) || Replica.State.DOWN.name().equals(state))
                            && clusterState.liveNodesContain(shard.getValue().getStr(
                            ZkStateReader.NODE_NAME_PROP))) {
                        sawLiveRecovering = true;
                    }
                }
            }


            if (!sawLiveRecovering) {
                cont = false;
            } else {
                Thread.sleep(1000);
            }
        }
    } finally {
        logger.info("Exiting solr wait");
    }
}
 
Example 7
Source File: V2HttpCall.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Lookup the collection from the collection string (maybe comma delimited).
 * Also sets {@link #collectionsList} by side-effect.
 * if {@code secondTry} is false then we'll potentially recursively try this all one more time while ensuring
 * the alias and collection info is sync'ed from ZK.
 */
protected DocCollection resolveDocCollection(String collectionStr) {
  if (!cores.isZooKeeperAware()) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Solr not running in cloud mode ");
  }
  ZkStateReader zkStateReader = cores.getZkController().getZkStateReader();

  Supplier<DocCollection> logic = () -> {
    this.collectionsList = resolveCollectionListOrAlias(collectionStr); // side-effect
    if (collectionsList.size() > 1) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Request must be sent to a single collection " +
          "or an alias that points to a single collection," +
          " but '" + collectionStr + "' resolves to " + this.collectionsList);
    }
    String collectionName = collectionsList.get(0); // first
    //TODO an option to choose another collection in the list if can't find a local replica of the first?

    return zkStateReader.getClusterState().getCollectionOrNull(collectionName);
  };

  DocCollection docCollection = logic.get();
  if (docCollection != null) {
    return docCollection;
  }
  // ensure our view is up to date before trying again
  try {
    zkStateReader.aliasesManager.update();
    zkStateReader.forceUpdateCollection(collectionsList.get(0));
  } catch (Exception e) {
    log.error("Error trying to update state while resolving collection.", e);
    //don't propagate exception on purpose
  }
  return logic.get();
}
 
Example 8
Source File: ShardLeaderElectionContext.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void publishActiveIfRegisteredAndNotActive(SolrCore core) throws Exception {
  if (core.getCoreDescriptor().getCloudDescriptor().hasRegistered()) {
    ZkStateReader zkStateReader = zkController.getZkStateReader();
    zkStateReader.forceUpdateCollection(collection);
    ClusterState clusterState = zkStateReader.getClusterState();
    Replica rep = getReplica(clusterState, collection, leaderProps.getStr(ZkStateReader.CORE_NODE_NAME_PROP));
    if (rep == null) return;
    if (rep.getState() != Replica.State.ACTIVE || core.getCoreDescriptor().getCloudDescriptor().getLastPublished() != Replica.State.ACTIVE) {
      log.debug("We have become the leader after core registration but are not in an ACTIVE state - publishing ACTIVE");
      zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
    }
  }
}
 
Example 9
Source File: AbstractFullDistribZkTestBase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
protected void waitForReplicationFromReplicas(String collectionName, ZkStateReader zkStateReader, TimeOut timeout) throws KeeperException, InterruptedException, IOException {
  log.info("waitForReplicationFromReplicas: {}", collectionName);
  zkStateReader.forceUpdateCollection(collectionName);
  DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
  Map<String, CoreContainer> containers = new HashMap<>();
  for (JettySolrRunner runner:jettys) {
    if (!runner.isRunning()) {
      continue;
    }
    containers.put(runner.getNodeName(), runner.getCoreContainer());
  }
  for(Slice s:collection.getSlices()) {
    Replica leader = zkStateReader.getLeaderRetry(collectionName, s.getName(), (int)timeout.timeLeft(TimeUnit.MILLISECONDS));
    long leaderIndexVersion = -1;
    while (!timeout.hasTimedOut()) {
      leaderIndexVersion = getIndexVersion(leader);
      if (leaderIndexVersion >= 0) {
        break;
      }
      Thread.sleep(1000);
    }
    if (timeout.hasTimedOut()) {
      fail("Unable to get leader indexVersion");
    }
    for (Replica pullReplica:s.getReplicas(EnumSet.of(Replica.Type.PULL,Replica.Type.TLOG))) {
      if (!zkStateReader.getClusterState().liveNodesContain(pullReplica.getNodeName())) {
        continue;
      }
      while (true) {
        long replicaIndexVersion = getIndexVersion(pullReplica); 
        if (leaderIndexVersion == replicaIndexVersion) {
          if (log.isInfoEnabled()) {
            log.info("Leader replica's version ({}) in sync with replica({}): {} == {}"
                , leader.getName(), pullReplica.getName(), leaderIndexVersion, replicaIndexVersion);
          }
          
          // Make sure the host is serving the correct version
          try (SolrCore core = containers.get(pullReplica.getNodeName()).getCore(pullReplica.getCoreName())) {
            RefCounted<SolrIndexSearcher> ref = core.getRegisteredSearcher();
            try {
              SolrIndexSearcher searcher = ref.get();
              String servingVersion = searcher.getIndexReader().getIndexCommit().getUserData().get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY);
              if (Long.parseLong(servingVersion) == replicaIndexVersion) {
                break;
              } else {
                if (log.isInfoEnabled()) {
                  log.info("Replica {} has the correct version replicated, but the searcher is not ready yet. Replicated version: {}, Serving version: {}"
                      , pullReplica.getName(), replicaIndexVersion, servingVersion);
                }
              }
            } finally {
              if (ref != null) ref.decref();
            }
          }
        } else {
          if (timeout.hasTimedOut()) {
            logReplicaTypesReplicationInfo(collectionName, zkStateReader);
            fail(String.format(Locale.ROOT, "Timed out waiting for replica %s (%d) to replicate from leader %s (%d)", pullReplica.getName(), replicaIndexVersion, leader.getName(), leaderIndexVersion));
          }
          if (leaderIndexVersion > replicaIndexVersion) {
            if (log.isInfoEnabled()) {
              log.info("{} version is {} and leader's is {}, will wait for replication"
                  , pullReplica.getName(), replicaIndexVersion, leaderIndexVersion);
            }
          } else {
            if (log.isInfoEnabled()) {
              log.info("Leader replica's version ({}) is lower than pull replica({}): {} < {}"
                  , leader.getName(), pullReplica.getName(), leaderIndexVersion, replicaIndexVersion);
            }
          }
        }
        Thread.sleep(1000);
      }
    }
  }
}
 
Example 10
Source File: HttpPartitionTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
protected void waitToSeeReplicasActive(String testCollectionName, String shardId, Set<String> replicasToCheck, int maxWaitSecs) throws Exception {
  final RTimer timer = new RTimer();

  ZkStateReader zkr = cloudClient.getZkStateReader();
  zkr.forceUpdateCollection(testCollectionName);
  ClusterState cs = zkr.getClusterState();
  boolean allReplicasUp = false;
  long waitMs = 0L;
  long maxWaitMs = maxWaitSecs * 1000L;
  while (waitMs < maxWaitMs && !allReplicasUp) {
    cs = cloudClient.getZkStateReader().getClusterState();
    assertNotNull(cs);
    final DocCollection docCollection = cs.getCollectionOrNull(testCollectionName);
    assertNotNull(docCollection);
    Slice shard = docCollection.getSlice(shardId);
    assertNotNull("No Slice for "+shardId, shard);
    allReplicasUp = true; // assume true

    // wait to see all replicas are "active"
    for (Replica replica : shard.getReplicas()) {
      if (!replicasToCheck.contains(replica.getName()))
        continue;

      final Replica.State state = replica.getState();
      if (state != Replica.State.ACTIVE) {
        if (log.isInfoEnabled()) {
          log.info("Replica {} is currently {}", replica.getName(), state);
        }
        allReplicasUp = false;
      }
    }

    if (!allReplicasUp) {
      try {
        Thread.sleep(200L);
      } catch (Exception ignoreMe) {}
      waitMs += 200L;
    }
  } // end while

  if (!allReplicasUp)
    fail("Didn't see replicas "+ replicasToCheck +
        " come up within " + maxWaitMs + " ms! ClusterState: " + printClusterStateInfo(testCollectionName));

  if (log.isInfoEnabled()) {
    log.info("Took {} ms to see replicas [{}] become active.", timer.getTime(), replicasToCheck);
  }
}
 
Example 11
Source File: LeaderElectionContextKeyTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void test() throws KeeperException, InterruptedException, IOException, SolrServerException {
  ZkStateReader stateReader = cluster.getSolrClient().getZkStateReader();
  stateReader.forceUpdateCollection(TEST_COLLECTION_1);
  ClusterState clusterState = stateReader.getClusterState();
  // The test assume that TEST_COLLECTION_1 and TEST_COLLECTION_2 will have identical layout
  // ( same replica's name on every shard )
  for (int i = 1; i <= 2; i++) {
    String coll1ShardiLeader = clusterState.getCollection(TEST_COLLECTION_1).getLeader("shard"+i).getName();
    String coll2ShardiLeader = clusterState.getCollection(TEST_COLLECTION_2).getLeader("shard"+i).getName();
    String assertMss = String.format(Locale.ROOT, "Expect %s and %s each have a replica with same name on shard %s",
        coll1ShardiLeader, coll2ShardiLeader, "shard"+i);
    assertEquals(
        assertMss,
        coll1ShardiLeader,
        coll2ShardiLeader
    );
  }

  String shard = "shard" + String.valueOf(random().nextInt(2) + 1);
  Replica replica = clusterState.getCollection(TEST_COLLECTION_1).getLeader(shard);
  assertNotNull(replica);

  try (SolrClient shardLeaderClient = new HttpSolrClient.Builder(replica.get("base_url").toString()).build()) {
    assertEquals(1L, getElectionNodes(TEST_COLLECTION_1, shard, stateReader.getZkClient()).size());
    List<String> collection2Shard1Nodes = getElectionNodes(TEST_COLLECTION_2, "shard1", stateReader.getZkClient());
    List<String> collection2Shard2Nodes = getElectionNodes(TEST_COLLECTION_2, "shard2", stateReader.getZkClient());
    CoreAdminRequest.unloadCore(replica.getCoreName(), shardLeaderClient);
    // Waiting for leader election being kicked off
    long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS);
    boolean found = false;
    while (System.nanoTime() < timeout) {
      try {
        found = getElectionNodes(TEST_COLLECTION_1, shard, stateReader.getZkClient()).size() == 0;
        break;
      } catch (KeeperException.NoNodeException nne) {
        // ignore
      }
    }
    assertTrue(found);
    // There are no leader election was kicked off on testCollection2
    assertThat(collection2Shard1Nodes, CoreMatchers.is(getElectionNodes(TEST_COLLECTION_2, "shard1", stateReader.getZkClient())));
    assertThat(collection2Shard2Nodes, CoreMatchers.is(getElectionNodes(TEST_COLLECTION_2, "shard2", stateReader.getZkClient())));
  }
}