Java Code Examples for org.apache.lucene.index.LeafReader#getSortedDocValues()

The following examples show how to use org.apache.lucene.index.LeafReader#getSortedDocValues() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

6 votes

public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  SortedDocValues valuesIn = reader.getSortedDocValues(field);
  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  } else {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
      return DocValues.emptySorted();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
      // we don't try to build a sorted instance from numeric/binary doc
      // values because dedup can be very costly
      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
      return DocValues.emptySorted();
    }
    SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
  }
}

Example 2

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

6 votes

public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
  if (valuesIn == null) {
    valuesIn = reader.getSortedDocValues(field);
  }

  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  }

  final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
  if (info == null) {
    return DocValues.emptyBinary();
  } else if (info.getDocValuesType() != DocValuesType.NONE) {
    throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
  } else if (info.getIndexOptions() == IndexOptions.NONE) {
    return DocValues.emptyBinary();
  }

  BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
  return impl.iterator();
}

Example 3

Source File: StatisHelper.java From HongsCORE with MIT License

6 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext lrc) throws IOException {
    LeafReader reader = lrc.reader( );

    for (int i = 0; i < fields.length; i ++) {
        if (groups[i][0] >= 1) {
        if (groups[i][1] == 1) {
            values[i] = reader.getSortedNumericDocValues("%"+fields[i]);
        } else {
            values[i] = reader.      getNumericDocValues("#"+fields[i]);
        }
        } else {
        if (groups[i][1] == 1) {
            values[i] = reader.getSortedSetDocValues("%"+fields[i]);
        } else {
            values[i] = reader.   getSortedDocValues("#"+fields[i]);
        }
        }
    }

    return this;
}

Example 4

Source File: DocValuesFieldExistsQuery.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Returns a {@link DocIdSetIterator} from the given field or null if the field doesn't exist
 * in the reader or if the reader has no doc values for the field.
 */
public static DocIdSetIterator getDocValuesDocIdSetIterator(String field, LeafReader reader) throws IOException {
  FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
  final DocIdSetIterator iterator;
  if (fieldInfo != null) {
    switch (fieldInfo.getDocValuesType()) {
      case NONE:
        iterator = null;
        break;
      case NUMERIC:
        iterator = reader.getNumericDocValues(field);
        break;
      case BINARY:
        iterator = reader.getBinaryDocValues(field);
        break;
      case SORTED:
        iterator = reader.getSortedDocValues(field);
        break;
      case SORTED_NUMERIC:
        iterator = reader.getSortedNumericDocValues(field);
        break;
      case SORTED_SET:
        iterator = reader.getSortedSetDocValues(field);
        break;
      default:
        throw new AssertionError();
    }
    return iterator;
  }
  return null;
}

Example 5

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

5 votes

private BitsEntry createValueDocValues(LeafReader reader, String field) throws IOException {
  FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
  
  DocValuesType dvType = fieldInfo.getDocValuesType();
  DocIdSetIterator iterator;
  switch(dvType) {
  case NUMERIC:
    iterator = reader.getNumericDocValues(field);
    break;
  case BINARY:
    iterator = reader.getBinaryDocValues(field);
    break;
  case SORTED:
    iterator = reader.getSortedDocValues(field);
    break;
  case SORTED_NUMERIC:
    iterator = reader.getSortedNumericDocValues(field);
    break;
  case SORTED_SET:
    iterator = reader.getSortedSetDocValues(field);
    break;
  default:
    throw new AssertionError();
  }

  FixedBitSet bits = new FixedBitSet(reader.maxDoc());
  while (true) {
    int docID = iterator.nextDoc();
    if (docID == DocIdSetIterator.NO_MORE_DOCS) {
      break;
    }
    bits.set(docID);
  }

  return new BitsEntry(bits);
}

Example 6

Source File: TestMemoryIndex.java From lucene-solr with Apache License 2.0

4 votes

public void testDocValues() throws Exception {
  Document doc = new Document();
  doc.add(new NumericDocValuesField("numeric", 29L));
  doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
  doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
  doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
  doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
  doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
  doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
  doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
  doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
  doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
  doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
  doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));

  MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
  LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
  NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
  assertEquals(0, numericDocValues.nextDoc());
  assertEquals(29L, numericDocValues.longValue());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, numericDocValues.nextDoc());
  SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
  assertEquals(0, sortedNumericDocValues.nextDoc());
  assertEquals(5, sortedNumericDocValues.docValueCount());
  assertEquals(30L, sortedNumericDocValues.nextValue());
  assertEquals(31L, sortedNumericDocValues.nextValue());
  assertEquals(32L, sortedNumericDocValues.nextValue());
  assertEquals(32L, sortedNumericDocValues.nextValue());
  assertEquals(33L, sortedNumericDocValues.nextValue());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, sortedNumericDocValues.nextDoc());
  BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
  assertEquals(0, binaryDocValues.nextDoc());
  assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, binaryDocValues.nextDoc());
  SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
  assertEquals(0, sortedDocValues.nextDoc());
  assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
  assertEquals(0, sortedDocValues.ordValue());
  assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, sortedDocValues.nextDoc());
  SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
  assertEquals(3, sortedSetDocValues.getValueCount());
  assertEquals(0, sortedSetDocValues.nextDoc());
  assertEquals(0L, sortedSetDocValues.nextOrd());
  assertEquals(1L, sortedSetDocValues.nextOrd());
  assertEquals(2L, sortedSetDocValues.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
  assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
  assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
  assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, sortedDocValues.nextDoc());
}

Example 7

Source File: JoinUtil.java From lucene-solr with Apache License 2.0

4 votes

/**
 * A query time join using global ordinals over a dedicated join field.
 *
 * This join has certain restrictions and requirements:
 * 1) A document can only refer to one other document. (but can be referred by one or more documents)
 * 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
 *    that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
 * 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
 *    should store the join values as UTF-8 strings.
 * 4) An ordinal map must be provided that is created on top of the join field.
 *
 * Note: min and max filtering and the avg score mode will require this join to keep track of the number of times
 * a document matches per join value. This will increase the per join cost in terms of execution time and memory.
 *
 * @param joinField   The {@link SortedDocValues} field containing the join values
 * @param fromQuery   The query containing the actual user query. Also the fromQuery can only match "from" documents.
 * @param toQuery     The query identifying all documents on the "to" side.
 * @param searcher    The index searcher used to execute the from query
 * @param scoreMode   Instructs how scores from the fromQuery are mapped to the returned query
 * @param ordinalMap  The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
 *                    needs to be provided.
 * @param min         Optionally the minimum number of "from" documents that are required to match for a "to" document
 *                    to be a match. The min is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
 *                    disables the min and max "from" documents filtering
 * @param max         Optionally the maximum number of "from" documents that are allowed to match for a "to" document
 *                    to be a match. The max is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
 *                    disables the min and max "from" documents filtering
 * @return a {@link Query} instance that can be used to join documents based on the join field
 * @throws IOException If I/O related errors occur
 */
public static Query createJoinQuery(String joinField,
                                    Query fromQuery,
                                    Query toQuery,
                                    IndexSearcher searcher,
                                    ScoreMode scoreMode,
                                    OrdinalMap ordinalMap,
                                    int min,
                                    int max) throws IOException {
  int numSegments = searcher.getIndexReader().leaves().size();
  final long valueCount;
  if (numSegments == 0) {
    return new MatchNoDocsQuery("JoinUtil.createJoinQuery with no segments");
  } else if (numSegments == 1) {
    // No need to use the ordinal map, because there is just one segment.
    ordinalMap = null;
    LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
    SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
    if (joinSortedDocValues != null) {
      valueCount = joinSortedDocValues.getValueCount();
    } else {
      return new MatchNoDocsQuery("JoinUtil.createJoinQuery: no join values");
    }
  } else {
    if (ordinalMap == null) {
      throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
    }
    valueCount = ordinalMap.getValueCount();
  }

  final Query rewrittenFromQuery = searcher.rewrite(fromQuery);
  final Query rewrittenToQuery = searcher.rewrite(toQuery);
  GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
  switch (scoreMode) {
    case Total:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount, min, max);
      break;
    case Min:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Min(joinField, ordinalMap, valueCount, min, max);
      break;
    case Max:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount, min, max);
      break;
    case Avg:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount, min, max);
      break;
    case None:
      if (min <= 0 && max == Integer.MAX_VALUE) {
        GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
        searcher.search(rewrittenFromQuery, globalOrdinalsCollector);
        return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, rewrittenToQuery,
            rewrittenFromQuery, searcher.getTopReaderContext().id());
      } else {
        globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.NoScore(joinField, ordinalMap, valueCount, min, max);
        break;
      }
    default:
      throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
  }
  searcher.search(rewrittenFromQuery, globalOrdinalsWithScoreCollector);
  return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, scoreMode, joinField, ordinalMap, rewrittenToQuery,
      rewrittenFromQuery, min, max, searcher.getTopReaderContext().id());
}

Example 8

Source File: TestUtil.java From lucene-solr with Apache License 2.0

4 votes

private static void checkReaderSanity(LeafReader reader) throws IOException {
  for (FieldInfo info : reader.getFieldInfos()) {
    
    // reader shouldn't return normValues if the field does not have them
    if (!info.hasNorms()) {
      if (reader.getNormValues(info.name) != null) {
        throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
      }
    }
    
    // reader shouldn't return docValues if the field does not have them
    // reader shouldn't return multiple docvalues types for the same field.
    switch(info.getDocValuesType()) {
      case NONE:
        if (reader.getBinaryDocValues(info.name) != null ||
            reader.getNumericDocValues(info.name) != null ||
            reader.getSortedDocValues(info.name) != null || 
            reader.getSortedSetDocValues(info.name) != null) {
          throw new RuntimeException("field: " + info.name + " has docvalues but should omit them!");
        }
        break;
      case SORTED:
        if (reader.getBinaryDocValues(info.name) != null ||
            reader.getNumericDocValues(info.name) != null ||
            reader.getSortedNumericDocValues(info.name) != null ||
            reader.getSortedSetDocValues(info.name) != null) {
          throw new RuntimeException(info.name + " returns multiple docvalues types!");
        }
        break;
      case SORTED_NUMERIC:
        if (reader.getBinaryDocValues(info.name) != null ||
            reader.getNumericDocValues(info.name) != null ||
            reader.getSortedSetDocValues(info.name) != null ||
            reader.getSortedDocValues(info.name) != null) {
          throw new RuntimeException(info.name + " returns multiple docvalues types!");
        }
        break;
      case SORTED_SET:
        if (reader.getBinaryDocValues(info.name) != null ||
            reader.getNumericDocValues(info.name) != null ||
            reader.getSortedNumericDocValues(info.name) != null ||
            reader.getSortedDocValues(info.name) != null) {
          throw new RuntimeException(info.name + " returns multiple docvalues types!");
        }
        break;
      case BINARY:
        if (reader.getNumericDocValues(info.name) != null ||
            reader.getSortedDocValues(info.name) != null ||
            reader.getSortedNumericDocValues(info.name) != null ||
            reader.getSortedSetDocValues(info.name) != null) {
          throw new RuntimeException(info.name + " returns multiple docvalues types!");
        }
        break;
      case NUMERIC:
        if (reader.getBinaryDocValues(info.name) != null ||
            reader.getSortedDocValues(info.name) != null ||
            reader.getSortedNumericDocValues(info.name) != null ||
            reader.getSortedSetDocValues(info.name) != null) {
          throw new RuntimeException(info.name + " returns multiple docvalues types!");
        }
        break;
      default:
        throw new AssertionError();
    }
  }
}

Example 9

Source File: TestFieldCacheVsDocValues.java From lucene-solr with Apache License 2.0

4 votes

private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
  Document doc = new Document();
  Field idField = new StringField("id", "", Field.Store.NO);
  Field indexedField = new StringField("indexed", "", Field.Store.NO);
  Field dvField = new SortedDocValuesField("dv", new BytesRef());
  doc.add(idField);
  doc.add(indexedField);
  doc.add(dvField);
  
  // index some docs
  int numDocs = atLeast(300);
  for (int i = 0; i < numDocs; i++) {
    idField.setStringValue(Integer.toString(i));
    final int length;
    if (minLength == maxLength) {
      length = minLength; // fixed length
    } else {
      length = TestUtil.nextInt(random(), minLength, maxLength);
    }
    String value = TestUtil.randomSimpleString(random(), length);
    indexedField.setStringValue(value);
    dvField.setBytesValue(new BytesRef(value));
    writer.addDocument(doc);
    if (random().nextInt(31) == 0) {
      writer.commit();
    }
  }
  
  // delete some docs
  int numDeletions = random().nextInt(numDocs/10);
  for (int i = 0; i < numDeletions; i++) {
    int id = random().nextInt(numDocs);
    writer.deleteDocuments(new Term("id", Integer.toString(id)));
  }
  writer.close();
  
  // compare
  DirectoryReader ir = DirectoryReader.open(dir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();
    SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
    SortedDocValues actual = r.getSortedDocValues("dv");
    assertEquals(r.maxDoc(), expected, actual);
  }
  ir.close();
  dir.close();
}

Example 10

Source File: DocValuesTest.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testDocValues() throws IOException {
  assertU(adoc("id", "1"));
  assertU(commit());
  try (SolrCore core = h.getCoreInc()) {
    final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
    final SolrIndexSearcher searcher = searcherRef.get();
    try {
      final LeafReader reader = searcher.getSlowAtomicReader();
      assertEquals(1, reader.numDocs());
      final FieldInfos infos = reader.getFieldInfos();
      assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("floatdv").getDocValuesType());
      assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("intdv").getDocValuesType());
      assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("doubledv").getDocValuesType());
      assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("longdv").getDocValuesType());
      assertEquals(DocValuesType.SORTED, infos.fieldInfo("stringdv").getDocValuesType());
      assertEquals(DocValuesType.SORTED, infos.fieldInfo("booldv").getDocValuesType());

      NumericDocValues dvs = reader.getNumericDocValues("floatdv");
      assertEquals(0, dvs.nextDoc());
      assertEquals((long) Float.floatToIntBits(1), dvs.longValue());
      dvs = reader.getNumericDocValues("intdv");
      assertEquals(0, dvs.nextDoc());
      assertEquals(2L, dvs.longValue());
      dvs = reader.getNumericDocValues("doubledv");
      assertEquals(0, dvs.nextDoc());
      assertEquals(Double.doubleToLongBits(3), dvs.longValue());
      dvs = reader.getNumericDocValues("longdv");
      assertEquals(0, dvs.nextDoc());
      assertEquals(4L, dvs.longValue());
      SortedDocValues sdv = reader.getSortedDocValues("stringdv");
      assertEquals(0, sdv.nextDoc());
      assertEquals("solr", sdv.binaryValue().utf8ToString());
      sdv = reader.getSortedDocValues("booldv");
      assertEquals(0, sdv.nextDoc());
      assertEquals("T", sdv.binaryValue().utf8ToString());

      final IndexSchema schema = core.getLatestSchema();
      final SchemaField floatDv = schema.getField("floatdv");
      final SchemaField intDv = schema.getField("intdv");
      final SchemaField doubleDv = schema.getField("doubledv");
      final SchemaField longDv = schema.getField("longdv");
      final SchemaField boolDv = schema.getField("booldv");

      FunctionValues values = floatDv.getType().getValueSource(floatDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
      assertEquals(1f, values.floatVal(0), 0f);
      assertEquals(1f, values.objectVal(0));
      values = intDv.getType().getValueSource(intDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
      assertEquals(2, values.intVal(0));
      assertEquals(2, values.objectVal(0));
      values = doubleDv.getType().getValueSource(doubleDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
      assertEquals(3d, values.doubleVal(0), 0d);
      assertEquals(3d, values.objectVal(0));
      values = longDv.getType().getValueSource(longDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
      assertEquals(4L, values.longVal(0));
      assertEquals(4L, values.objectVal(0));
      
      values = boolDv.getType().getValueSource(boolDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
      assertEquals("true", values.strVal(0));
      assertEquals(true, values.objectVal(0));

      // check reversibility of created fields
      tstToObj(schema.getField("floatdv"), -1.5f);
      tstToObj(schema.getField("floatdvs"), -1.5f);
      tstToObj(schema.getField("doubledv"), -1.5d);
      tstToObj(schema.getField("doubledvs"), -1.5d);
      tstToObj(schema.getField("intdv"), -7);
      tstToObj(schema.getField("intdvs"), -7);
      tstToObj(schema.getField("longdv"), -11L);
      tstToObj(schema.getField("longdvs"), -11L);
      tstToObj(schema.getField("datedv"), new Date(1000));
      tstToObj(schema.getField("datedvs"), new Date(1000));
      tstToObj(schema.getField("stringdv"), "foo");
      tstToObj(schema.getField("stringdvs"), "foo");
      tstToObj(schema.getField("booldv"), true);
      tstToObj(schema.getField("booldvs"), true);

    } finally {
      searcherRef.decref();
    }
  }
}