Java Code Examples for org.apache.solr.common.cloud.DocRouter#Range

The following examples show how to use org.apache.solr.common.cloud.DocRouter#Range . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CrossCollectionJoinQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private String createHashRangeFq() {
  if (routedByJoinKey) {
    ClusterState clusterState = searcher.getCore().getCoreContainer().getZkController().getClusterState();
    CloudDescriptor desc = searcher.getCore().getCoreDescriptor().getCloudDescriptor();
    Slice slice = clusterState.getCollection(desc.getCollectionName()).getSlicesMap().get(desc.getShardId());
    DocRouter.Range range = slice.getRange();

    // In CompositeIdRouter, the routing prefix only affects the top 16 bits
    int min = range.min & 0xffff0000;
    int max = range.max | 0x0000ffff;

    return String.format(Locale.ROOT, "{!hash_range f=%s l=%d u=%d}", fromField, min, max);
  } else {
    return null;
  }
}
 
Example 2
Source File: SplitOp.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
static String toSplitString(Collection<DocRouter.Range> splits) throws Exception {
  if (splits == null) {
    return null;
  }

  StringBuilder sb = new StringBuilder();
  for (DocRouter.Range range : splits) {
    if (sb.length() > 0) {
      sb.append(",");
    }
    sb.append(range);
  }


  return sb.toString();
}
 
Example 3
Source File: DistributedZkUpdateProcessor.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** For {@link org.apache.solr.common.params.CollectionParams.CollectionAction#SPLITSHARD} */
protected boolean amISubShardLeader(DocCollection coll, Slice parentSlice, String id, SolrInputDocument doc) throws InterruptedException {
  // Am I the leader of a shard in "construction/recovery" state?
  String myShardId = cloudDesc.getShardId();
  Slice mySlice = coll.getSlice(myShardId);
  final Slice.State state = mySlice.getState();
  if (state == Slice.State.CONSTRUCTION || state == Slice.State.RECOVERY) {
    Replica myLeader = zkController.getZkStateReader().getLeaderRetry(collection, myShardId);
    boolean amILeader = myLeader.getName().equals(cloudDesc.getCoreNodeName());
    if (amILeader) {
      // Does the document belong to my hash range as well?
      DocRouter.Range myRange = mySlice.getRange();
      if (myRange == null) myRange = new DocRouter.Range(Integer.MIN_VALUE, Integer.MAX_VALUE);
      if (parentSlice != null)  {
        boolean isSubset = parentSlice.getRange() != null && myRange.isSubsetOf(parentSlice.getRange());
        return isSubset && coll.getRouter().isTargetSlice(id, doc, req.getParams(), myShardId, coll);
      } else  {
        // delete by query case -- as long as I am a sub shard leader we're fine
        return true;
      }
    }
  }
  return false;
}
 
Example 4
Source File: ShardSplitTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected void splitShard(String collection, String shardId, List<DocRouter.Range> subRanges, String splitKey, boolean offline) throws SolrServerException, IOException {
  ModifiableSolrParams params = new ModifiableSolrParams();
  params.set("action", CollectionParams.CollectionAction.SPLITSHARD.toString());
  params.set("timing", "true");
  params.set("offline", String.valueOf(offline));
  params.set("collection", collection);
  if (shardId != null)  {
    params.set("shard", shardId);
  }
  if (subRanges != null)  {
    StringBuilder ranges = new StringBuilder();
    for (int i = 0; i < subRanges.size(); i++) {
      DocRouter.Range subRange = subRanges.get(i);
      ranges.append(subRange.toString());
      if (i < subRanges.size() - 1)
        ranges.append(",");
    }
    params.set("ranges", ranges.toString());
  }
  if (splitKey != null) {
    params.set("split.key", splitKey);
  }
  @SuppressWarnings({"rawtypes"})
  SolrRequest request = new QueryRequest(params);
  request.setPath("/admin/collections");

  String baseUrl = ((HttpSolrClient) shardToJetty.get(SHARD1).get(0).client.getSolrClient()).getBaseURL();
  baseUrl = baseUrl.substring(0, baseUrl.length() - "collection1".length());

  try (HttpSolrClient baseServer = getHttpSolrClient(baseUrl, 30000, 60000 * 5)) {
    NamedList<Object> rsp = baseServer.request(request);
    if (log.isInfoEnabled()) {
      log.info("Shard split response: {}", Utils.toJSONString(rsp));
    }
  }
}
 
Example 5
Source File: ShardSplitTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected void indexAndUpdateCount(DocRouter router, List<DocRouter.Range> ranges, int[] docCounts, String id, int n, Set<String> documentIds) throws Exception {
  index("id", id, "n_ti", n);

  int idx = getHashRangeIdx(router, ranges, id);
  if (idx != -1)  {
    docCounts[idx]++;
    documentIds.add(String.valueOf(id));
  }
}
 
Example 6
Source File: ShardSplitTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static int getHashRangeIdx(DocRouter router, List<DocRouter.Range> ranges, String id) {
  int hash = 0;
  if (router instanceof HashBasedRouter) {
    HashBasedRouter hashBasedRouter = (HashBasedRouter) router;
    hash = hashBasedRouter.sliceHash(id, null, null,null);
  }
  for (int i = 0; i < ranges.size(); i++) {
    DocRouter.Range range = ranges.get(i);
    if (range.includes(hash))
      return i;
  }
  return -1;
}
 
Example 7
Source File: ShardSplitTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected void deleteAndUpdateCount(DocRouter router, List<DocRouter.Range> ranges, int[] docCounts, String id) throws Exception {
  controlClient.deleteById(id);
  cloudClient.deleteById(id);

  int idx = getHashRangeIdx(router, ranges, id);
  if (idx != -1)  {
    docCounts[idx]--;
  }
}
 
Example 8
Source File: SolrIndexSplitterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void doTestSplitByPaths(SolrIndexSplitter.SplitMethod splitMethod) throws Exception {
  LocalSolrQueryRequest request = null;
  try {
    // add two docs
    String id1 = "dorothy";
    assertU(adoc("id", id1));
    String id2 = "kansas";
    assertU(adoc("id", id2));
    assertU(commit());
    assertJQ(req("q", "*:*"), "/response/numFound==2");

    // find minHash/maxHash hash ranges
    List<DocRouter.Range> ranges = getRanges(id1, id2);

    request = lrf.makeRequest("q", "dummy");
    SolrQueryResponse rsp = new SolrQueryResponse();
    SplitIndexCommand command = new SplitIndexCommand(request, rsp,
        Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter(), null, null, splitMethod);
    doSplit(command);

    Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
        DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
    DirectoryReader reader = DirectoryReader.open(directory);
    assertEquals("id:dorothy should be present in split index1", 1, reader.docFreq(new Term("id", "dorothy")));
    assertEquals("id:kansas should not be present in split index1", 0, reader.docFreq(new Term("id", "kansas")));
    assertEquals("split index1 should have only one document", 1, reader.numDocs());
    reader.close();
    h.getCore().getDirectoryFactory().release(directory);
    directory = h.getCore().getDirectoryFactory().get(indexDir2.getAbsolutePath(),
        DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
    reader = DirectoryReader.open(directory);
    assertEquals("id:dorothy should not be present in split index2", 0, reader.docFreq(new Term("id", "dorothy")));
    assertEquals("id:kansas should be present in split index2", 1, reader.docFreq(new Term("id", "kansas")));
    assertEquals("split index2 should have only one document", 1, reader.numDocs());
    reader.close();
    h.getCore().getDirectoryFactory().release(directory);
  } finally {
    if (request != null) request.close(); // decrefs the searcher
  }
}
 
Example 9
Source File: OverseerCollectionMessageHandler.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
DocRouter.Range intersect(DocRouter.Range a, DocRouter.Range b) {
  if (a == null || b == null || !a.overlaps(b)) {
    return null;
  } else if (a.isSubsetOf(b))
    return a;
  else if (b.isSubsetOf(a))
    return b;
  else if (b.includes(a.max)) {
    return new DocRouter.Range(b.min, a.max);
  } else  {
    return new DocRouter.Range(a.min, b.max);
  }
}
 
Example 10
Source File: SplitOp.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 *   Returns a list of range counts sorted by the range lower bound, using the indexed "id" field (i.e. the terms are full IDs, not just prefixes)
 */
static Collection<RangeCount> getHashHistogramFromId(SolrIndexSearcher searcher, String idField, DocRouter router, DocCollection collection) throws IOException {
  RTimer timer = new RTimer();

  TreeMap<DocRouter.Range, RangeCount> counts = new TreeMap<>();

  Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), idField);
  if (terms == null) {
    return counts.values();
  }

  int numPrefixes = 0;
  int numCollisions = 0;
  long sumBuckets = 0;


  byte sep = (byte) CompositeIdRouter.SEPARATOR.charAt(0);
  TermsEnum termsEnum = terms.iterator();
  BytesRef currPrefix = new BytesRef();  // prefix of the previous "id" term
  int bucketCount = 0; // count of the number of docs in the current bucket

  // We're going to iterate over all terms, so do the minimum amount of work per term.
  // Terms are sorted, so all terms sharing a prefix will be grouped together.  The extra work
  // is really just limited to stepping over all the terms in the id field.
  for (;;) {
    BytesRef term = termsEnum.next();

    // compare to current prefix bucket and see if this new term shares the same prefix
    if (term != null && term.length >= currPrefix.length && currPrefix.length > 0) {
      if (StringHelper.startsWith(term, currPrefix)) {
        bucketCount++;  // use 1 since we are dealing with unique ids
        continue;
      }
    }

    // At this point the prefix did not match, so if we had a bucket we were working on, record it.
    if (currPrefix.length > 0) {
      numPrefixes++;
      sumBuckets += bucketCount;
      String currPrefixStr = currPrefix.utf8ToString();
      DocRouter.Range range = router.getSearchRangeSingle(currPrefixStr, null, collection);

      RangeCount rangeCount = new RangeCount(range, bucketCount);
      bucketCount = 0;

      RangeCount prev = counts.put(rangeCount.range, rangeCount);
      if (prev != null) {
        // we hit a hash collision, so add the buckets together.
        rangeCount.count += prev.count;
        numCollisions++;
      }
    }

    // if the current term is null, we ran out of values
    if (term == null) break;

    // find the new prefix (if any)

    // resize if needed
    if (currPrefix.length < term.length) {
      currPrefix.bytes = new byte[term.length+10];
    }

    // Copy the bytes up to and including the separator, and set the length if the separator is found.
    // If there was no separator, then length remains 0 and it's the indicator that we have no prefix bucket
    currPrefix.length = 0;
    for (int i=0; i<term.length; i++) {
      byte b = term.bytes[i + term.offset];
      currPrefix.bytes[i] = b;
      if (b == sep) {
        currPrefix.length = i + 1;
        bucketCount++;
        break;
      }
    }
  }

  if (log.isInfoEnabled()) {
    log.info("Split histogram from idField {}: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numCollisions={}"
        , idField, timer.getTime(), counts.size(), sumBuckets, numPrefixes, numCollisions);
  }

  return counts.values();
}
 
Example 11
Source File: SolrIndexSplitterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void doTestSplitByRouteKey(SolrIndexSplitter.SplitMethod splitMethod) throws Exception  {
  File indexDir = createTempDir().toFile();

  CompositeIdRouter r1 = new CompositeIdRouter();
  String splitKey = "sea-line!";
  String key2 = "soul-raising!";

  // murmur2 has a collision on the above two keys
  assertEquals(r1.keyHashRange(splitKey), r1.keyHashRange(key2));

  /*
  More strings with collisions on murmur2 for future reference:
  "Drava" "dessert spoon"
  "Bighorn" "pleasure lover"
  "attributable to" "second edition"
  "sea-line" "soul-raising"
  "lift direction" "testimony meeting"
   */

  for (int i=0; i<10; i++)  {
    assertU(adoc("id", splitKey + i));
    assertU(adoc("id", key2 + i));
  }
  assertU(commit());
  assertJQ(req("q", "*:*"), "/response/numFound==20");

  DocRouter.Range splitKeyRange = r1.keyHashRange(splitKey);

  LocalSolrQueryRequest request = null;
  Directory directory = null;
  try {
    request = lrf.makeRequest("q", "dummy");
    SolrQueryResponse rsp = new SolrQueryResponse();
    SplitIndexCommand command = new SplitIndexCommand(request, rsp,
        Lists.newArrayList(indexDir.getAbsolutePath()), null, Lists.newArrayList(splitKeyRange),
        new CompositeIdRouter(), null, splitKey, splitMethod);
    doSplit(command);
    directory = h.getCore().getDirectoryFactory().get(indexDir.getAbsolutePath(),
        DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
    DirectoryReader reader = DirectoryReader.open(directory);
    assertEquals("split index has wrong number of documents", 10, reader.numDocs());
    reader.close();
    h.getCore().getDirectoryFactory().release(directory);
    directory = null;
  } finally {
    if (request != null)  {
      request.close();
    }
    if (directory != null)  {
      h.getCore().getDirectoryFactory().release(directory);
    }
  }
}
 
Example 12
Source File: SolrIndexSplitterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void doTestSplitByCores(SolrIndexSplitter.SplitMethod splitMethod) throws Exception {
  // add three docs and 1 delete
  String id1 = "dorothy";
  assertU(adoc("id", id1));
  String id2 = "kansas";
  assertU(adoc("id", id2));
  String id3 = "wizard";
  assertU(adoc("id", id3));
  assertU(commit());
  assertJQ(req("q", "*:*"), "/response/numFound==3");
  assertU(delI("wizard"));
  assertU(commit());
  assertJQ(req("q", "*:*"), "/response/numFound==2");
  List<DocRouter.Range> ranges = getRanges(id1, id2);

  SolrCore core1 = null, core2 = null;
  try {

    core1 = h.getCoreContainer().create("split1",
        ImmutableMap.of("dataDir", indexDir1.getAbsolutePath(), "configSet", "cloud-minimal"));
    core2 = h.getCoreContainer().create("split2",
        ImmutableMap.of("dataDir", indexDir2.getAbsolutePath(), "configSet", "cloud-minimal"));

    LocalSolrQueryRequest request = null;
    try {
      request = lrf.makeRequest("q", "dummy");
      SolrQueryResponse rsp = new SolrQueryResponse();
      SplitIndexCommand command = new SplitIndexCommand(request, rsp, null, Lists.newArrayList(core1, core2), ranges,
          new PlainIdRouter(), null, null, splitMethod);
      doSplit(command);
    } finally {
      if (request != null) request.close();
    }
    @SuppressWarnings("resource")
    final EmbeddedSolrServer server1 = new EmbeddedSolrServer(h.getCoreContainer(), "split1");
    @SuppressWarnings("resource")
    final EmbeddedSolrServer server2 = new EmbeddedSolrServer(h.getCoreContainer(), "split2");
    server1.commit(true, true);
    server2.commit(true, true);
    assertEquals("id:dorothy should be present in split index1", 1, server1.query(new SolrQuery("id:dorothy")).getResults().getNumFound());
    assertEquals("id:kansas should not be present in split index1", 0, server1.query(new SolrQuery("id:kansas")).getResults().getNumFound());
    assertEquals("id:dorothy should not be present in split index2", 0, server2.query(new SolrQuery("id:dorothy")).getResults().getNumFound());
    assertEquals("id:kansas should be present in split index2", 1, server2.query(new SolrQuery("id:kansas")).getResults().getNumFound());
  } finally {
    h.getCoreContainer().unload("split2");
    h.getCoreContainer().unload("split1");
  }
}
 
Example 13
Source File: SolrIndexSplitterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void doTestSplitDeletes(SolrIndexSplitter.SplitMethod splitMethod) throws Exception {
  LocalSolrQueryRequest request = null;
  try {
    // add two docs
    String id1 = "dorothy";
    assertU(adoc("id", id1));
    String id2 = "kansas";
    assertU(adoc("id", id2));
    assertU(commit());
    assertJQ(req("q", "*:*"), "/response/numFound==2");
    assertU(delI(id2)); // delete id2
    assertU(commit());


    // find minHash/maxHash hash ranges
    List<DocRouter.Range> ranges = getRanges(id1, id2);

    request = lrf.makeRequest("q", "dummy");
    SolrQueryResponse rsp = new SolrQueryResponse();

    SplitIndexCommand command = new SplitIndexCommand(request, rsp,
        Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter(), null, null, splitMethod);
    doSplit(command);

    Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
        DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
    DirectoryReader reader = DirectoryReader.open(directory);
    assertEquals("id:dorothy should be present in split index1", 1, reader.docFreq(new Term("id", "dorothy")));
    assertEquals("id:kansas should not be present in split index1", 0, reader.docFreq(new Term("id", "kansas")));
    assertEquals("split index1 should have only one document", 1, reader.numDocs());
    reader.close();
    h.getCore().getDirectoryFactory().release(directory);
    directory = h.getCore().getDirectoryFactory().get(indexDir2.getAbsolutePath(),
        DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
    reader = DirectoryReader.open(directory);
    assertEquals(0, reader.numDocs()); // should be empty
    reader.close();
    h.getCore().getDirectoryFactory().release(directory);
  } finally {
    if (request != null) request.close(); // decrefs the searcher
  }
}
 
Example 14
Source File: ClusterStateMutator.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({"unchecked"})
public ZkWriteCommand createCollection(ClusterState clusterState, ZkNodeProps message) {
  String cName = message.getStr(NAME);
  log.debug("building a new cName: {}", cName);
  if (clusterState.hasCollection(cName)) {
    log.warn("Collection {} already exists. exit", cName);
    return ZkStateWriter.NO_OP;
  }

  Map<String, Object> routerSpec = DocRouter.getRouterSpec(message);
  String routerName = routerSpec.get(NAME) == null ? DocRouter.DEFAULT_NAME : (String) routerSpec.get(NAME);
  DocRouter router = DocRouter.getDocRouter(routerName);

  Object messageShardsObj = message.get("shards");

  Map<String, Slice> slices;
  if (messageShardsObj instanceof Map) { // we are being explicitly told the slice data (e.g. coll restore)
    slices = Slice.loadAllFromMap(cName, (Map<String, Object>)messageShardsObj);
  } else {
    List<String> shardNames = new ArrayList<>();

    if (router instanceof ImplicitDocRouter) {
      getShardNames(shardNames, message.getStr("shards", DocRouter.DEFAULT_NAME));
    } else {
      int numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, -1);
      if (numShards < 1)
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "numShards is a required parameter for 'compositeId' router");
      getShardNames(numShards, shardNames);
    }
    List<DocRouter.Range> ranges = router.partitionRange(shardNames.size(), router.fullRange());//maybe null

    slices = new LinkedHashMap<>();
    for (int i = 0; i < shardNames.size(); i++) {
      String sliceName = shardNames.get(i);

      Map<String, Object> sliceProps = new LinkedHashMap<>(1);
      sliceProps.put(Slice.RANGE, ranges == null ? null : ranges.get(i));

      slices.put(sliceName, new Slice(sliceName, null, sliceProps,cName));
    }
  }

  Map<String, Object> collectionProps = new HashMap<>();

  for (Map.Entry<String, Object> e : OverseerCollectionMessageHandler.COLLECTION_PROPS_AND_DEFAULTS.entrySet()) {
    Object val = message.get(e.getKey());
    if (val == null) {
      val = OverseerCollectionMessageHandler.COLLECTION_PROPS_AND_DEFAULTS.get(e.getKey());
    }
    if (val != null) collectionProps.put(e.getKey(), val);
  }
  collectionProps.put(DocCollection.DOC_ROUTER, routerSpec);

  if (message.getStr("fromApi") == null) {
    collectionProps.put("autoCreated", "true");
  }

  DocCollection newCollection = new DocCollection(cName, slices, collectionProps, router, -1);

  return new ZkWriteCommand(cName, newCollection);
}
 
Example 15
Source File: SplitOp.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
static Collection<DocRouter.Range> getSplits(Collection<RangeCount> rawCounts, DocRouter.Range currentRange) throws Exception {
  int totalCount = 0;
  RangeCount biggest = null; // keep track of the largest in case we need to split it out into it's own shard
  RangeCount last = null;  // keep track of what the last range is

  // Remove counts that don't overlap with currentRange (can happen if someone overrode document routing)
  List<RangeCount> counts = new ArrayList<>(rawCounts.size());
  for (RangeCount rangeCount : rawCounts) {
    if (!rangeCount.range.overlaps(currentRange)) {
      continue;
    }
    totalCount += rangeCount.count;
    if (biggest == null || rangeCount.count > biggest.count) {
      biggest = rangeCount;
    }
    counts.add(rangeCount);
    last = rangeCount;
  }

  if (counts.size() == 0) {
    // we don't have any data to go off of, so do the split the normal way
    return null;
  }


  List<DocRouter.Range> targetRanges = new ArrayList<>();

  if (counts.size() == 1) {
    // We have a single range, so we should split it.
    // Currently, we only split a prefix/bucket when we have just one, but this could be changed/controlled
    // in the future via a allowedSizeDifference parameter (i.e. if just separating prefix buckets results in
    // too large of an imbalanced, allow splitting within a prefix)

    // It may already be a partial range, so figure that out
    int lower = Math.max(last.range.min, currentRange.min);
    int upper = Math.min(last.range.max, currentRange.max);
    int mid = lower + (upper-lower)/2;
    if (mid == lower || mid == upper) {
      // shard too small... this should pretty much never happen, but use default split logic if it does.
      return null;
    }

    // Make sure to include the shard's current range in the new ranges so we don't create useless empty shards.
    DocRouter.Range lowerRange = new DocRouter.Range(currentRange.min, mid);
    DocRouter.Range upperRange = new DocRouter.Range(mid+1, currentRange.max);
    targetRanges.add(lowerRange);
    targetRanges.add(upperRange);

    return targetRanges;
  }

  // We have at least two ranges, so we want to partition the ranges
  // and avoid splitting any individual range.
  // The "middle" bucket we are going to find will be included with the lower range and excluded from the upper range.

  int targetCount = totalCount / 2;
  RangeCount middle = null;
  RangeCount prev = null;
  int currCount = 0;
  for (RangeCount rangeCount : counts) {
    currCount += rangeCount.count;
    if (currCount >= targetCount) {  // this should at least be true on the last range
      middle = rangeCount;
      break;
    }
    prev = rangeCount;
  }

  // check if using the range before the middle one would make a better split point
  int overError = currCount - targetCount;  // error if we include middle in first split
  int underError = targetCount - (currCount - middle.count); // error if we include middle in second split
  if (underError < overError) {
    middle = prev;
  }

  // The middle should never be the last, since that means that we won't actually do a split.
  // Minimising the error (above) should already ensure this never happens.
  assert middle != last;


  // Make sure to include the shard's current range in the new ranges so we don't create useless empty shards.
  DocRouter.Range lowerRange = new DocRouter.Range(currentRange.min, middle.range.max);
  DocRouter.Range upperRange = new DocRouter.Range(middle.range.max+1, currentRange.max);
  targetRanges.add(lowerRange);
  targetRanges.add(upperRange);

  return targetRanges;
}
 
Example 16
Source File: SplitOp.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
static Collection<RangeCount> getHashHistogram(SolrIndexSearcher searcher, String prefixField, DocRouter router, DocCollection collection) throws IOException {
  RTimer timer = new RTimer();
  TreeMap<DocRouter.Range,RangeCount> counts = new TreeMap<>();

  Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), prefixField);
  if (terms == null) {
    return counts.values();
  }

  int numPrefixes = 0;
  int numTriLevel = 0;
  int numCollisions = 0;
  long sumBuckets = 0;

  TermsEnum termsEnum = terms.iterator();
  BytesRef term;
  while ((term = termsEnum.next()) != null) {
    numPrefixes++;

    String termStr = term.utf8ToString();
    int firstSep = termStr.indexOf(CompositeIdRouter.SEPARATOR);
    // truncate to first separator since we don't support multiple levels currently
    // NOTE: this does not currently work for tri-level composite ids since the number of bits allocated to the first ID is 16 for a 2 part id
    // and 8 for a 3 part id!
    if (firstSep != termStr.length()-1 && firstSep > 0) {
      numTriLevel++;
      termStr = termStr.substring(0, firstSep+1);
    }

    DocRouter.Range range = router.getSearchRangeSingle(termStr, null, collection);
    int numDocs = termsEnum.docFreq();
    sumBuckets += numDocs;

    RangeCount rangeCount = new RangeCount(range, numDocs);

    RangeCount prev = counts.put(rangeCount.range, rangeCount);
    if (prev != null) {
      // we hit a hash collision or truncated a prefix to first level, so add the buckets together.
      rangeCount.count += prev.count;
      numCollisions++;
    }
  }

  if (log.isInfoEnabled()) {
    log.info("Split histogram: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numTriLevel={} numCollisions={}"
        , timer.getTime(), counts.size(), sumBuckets, numPrefixes, numTriLevel, numCollisions);
  }

  return counts.values();
}
 
Example 17
Source File: SplitOp.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public RangeCount(DocRouter.Range range, int count) {
  this.range = range;
  this.count = count;
}
 
Example 18
Source File: SplitHandlerTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void doRandomSplitRecommendation(Random rand) throws Exception {
  int low = 0;
  int high = 0;

  while (high-low < 10) {
    low = randomBound(rand);
    high = randomBound(rand);
    if (low > high) {
      int tmp = low;
      low = high;
      high = tmp;
    }
  }

  DocRouter.Range curr = new DocRouter.Range(low,high);


  int maxRanges = rand.nextInt(20);

  int start = low;

  // bucket can start before or after
  if (rand.nextBoolean()) {
      start += rand.nextInt(200) - 100;
      if (start > low) {
        // underflow
        start = Integer.MIN_VALUE;
      }
  }

  List<SplitOp.RangeCount> counts = new ArrayList<>(maxRanges);
  for (;;) {
    int end = start + rand.nextInt(100) + 1;
    if (end < start) {
      // overflow
      end = Integer.MAX_VALUE;
    }
    counts.add( new SplitOp.RangeCount(new DocRouter.Range(start, end), rand.nextInt(1000)+1));
    if (counts.size() >= maxRanges) break;
    if (counts.size() == maxRanges / 2 && rand.nextBoolean()) {
      // transition toward the end of the range (more boundary cases for large ranges)
      start = high - rand.nextInt(100);
      start = Math.max(start, end+1);
    } else {
      start = end + 1;
    }
    if (rand.nextBoolean()) {
      start += rand.nextInt(100);
    }
    if (start < end) {
      // overflow
      break;
    }
  }

  try {
    Collection<DocRouter.Range> results = SplitOp.getSplits(counts, curr);
    verifyContiguous(results, curr);
  } catch (Throwable e) {
    // System.err.println(e);
  }
}
 
Example 19
Source File: DistributedZkUpdateProcessor.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void doDefensiveChecks(DistribPhase phase) {
  boolean isReplayOrPeersync = (updateCommand.getFlags() & (UpdateCommand.REPLAY | UpdateCommand.PEER_SYNC)) != 0;
  if (isReplayOrPeersync) return;

  String from = req.getParams().get(DISTRIB_FROM);

  DocCollection docCollection = clusterState.getCollection(collection);
  Slice mySlice = docCollection.getSlice(cloudDesc.getShardId());
  boolean localIsLeader = cloudDesc.isLeader();
  if (DistribPhase.FROMLEADER == phase && localIsLeader && from != null) { // from will be null on log replay
    String fromShard = req.getParams().get(DISTRIB_FROM_PARENT);
    if (fromShard != null) {
      if (mySlice.getState() == Slice.State.ACTIVE)  {
        throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
            "Request says it is coming from parent shard leader but we are in active state");
      }
      // shard splitting case -- check ranges to see if we are a sub-shard
      Slice fromSlice = docCollection.getSlice(fromShard);
      DocRouter.Range parentRange = fromSlice.getRange();
      if (parentRange == null) parentRange = new DocRouter.Range(Integer.MIN_VALUE, Integer.MAX_VALUE);
      if (mySlice.getRange() != null && !mySlice.getRange().isSubsetOf(parentRange)) {
        throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
            "Request says it is coming from parent shard leader but parent hash range is not superset of my range");
      }
    } else {
      String fromCollection = req.getParams().get(DISTRIB_FROM_COLLECTION); // is it because of a routing rule?
      if (fromCollection == null)  {
        log.error("Request says it is coming from leader, but we are the leader: {}", req.getParamString());
        SolrException solrExc = new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Request says it is coming from leader, but we are the leader");
        solrExc.setMetadata("cause", "LeaderChanged");
        throw solrExc;
      }
    }
  }

  int count = 0;
  while (((isLeader && !localIsLeader) || (isSubShardLeader && !localIsLeader)) && count < 5) {
    count++;
    // re-getting localIsLeader since we published to ZK first before setting localIsLeader value
    localIsLeader = cloudDesc.isLeader();
    try {
      Thread.sleep(500);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
    }
  }

  if ((isLeader && !localIsLeader) || (isSubShardLeader && !localIsLeader)) {
    log.error("ClusterState says we are the leader, but locally we don't think so");
    throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
        "ClusterState says we are the leader (" + zkController.getBaseUrl()
            + "/" + req.getCore().getName() + "), but locally we don't think so. Request came from " + from);
  }
}
 
Example 20
Source File: ShardSplitTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void splitByRouteFieldTest() throws Exception  {
  log.info("Starting testSplitWithRouteField");
  String collectionName = "routeFieldColl";
  int numShards = 4;
  int replicationFactor = 2;
  int maxShardsPerNode = (((numShards * replicationFactor) / getCommonCloudSolrClient()
      .getZkStateReader().getClusterState().getLiveNodes().size())) + 1;

  HashMap<String, List<Integer>> collectionInfos = new HashMap<>();
  String shard_fld = "shard_s";
  try (CloudSolrClient client = createCloudClient(null)) {
    Map<String, Object> props = Utils.makeMap(
        REPLICATION_FACTOR, replicationFactor,
        MAX_SHARDS_PER_NODE, maxShardsPerNode,
        OverseerCollectionMessageHandler.NUM_SLICES, numShards,
        "router.field", shard_fld);

    createCollection(collectionInfos, collectionName, props, client);
  }

  List<Integer> list = collectionInfos.get(collectionName);
  checkForCollection(collectionName, list, null);

  waitForRecoveriesToFinish(false);

  String url = getUrlFromZk(getCommonCloudSolrClient().getZkStateReader().getClusterState(), collectionName);

  try (HttpSolrClient collectionClient = getHttpSolrClient(url)) {

    ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
    final DocRouter router = clusterState.getCollection(collectionName).getRouter();
    Slice shard1 = clusterState.getCollection(collectionName).getSlice(SHARD1);
    DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
    final List<DocRouter.Range> ranges = router.partitionRange(2, shard1Range);
    final int[] docCounts = new int[ranges.size()];

    for (int i = 100; i <= 200; i++) {
      String shardKey = "" + (char) ('a' + (i % 26)); // See comment in ShardRoutingTest for hash distribution

      collectionClient.add(getDoc(id, i, "n_ti", i, shard_fld, shardKey));
      int idx = getHashRangeIdx(router, ranges, shardKey);
      if (idx != -1) {
        docCounts[idx]++;
      }
    }

    for (int i = 0; i < docCounts.length; i++) {
      int docCount = docCounts[i];
      log.info("Shard shard1_{} docCount = {}", i, docCount);
    }

    collectionClient.commit();

    trySplit(collectionName, null, SHARD1, 3);

    waitForRecoveriesToFinish(collectionName, false);

    assertEquals(docCounts[0], collectionClient.query(new SolrQuery("*:*").setParam("shards", "shard1_0")).getResults().getNumFound());
    assertEquals(docCounts[1], collectionClient.query(new SolrQuery("*:*").setParam("shards", "shard1_1")).getResults().getNumFound());
  }
}