org.apache.lucene.util.BytesRef Java Examples

The following examples show how to use org.apache.lucene.util.BytesRef. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GlobalOrdinalsQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
  SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
  if (values == null) {
    return Explanation.noMatch("Not a match");
  }

  if (values.advance(doc) != doc) {
    return Explanation.noMatch("Not a match");
  }
  int segmentOrd = values.ordValue();
  BytesRef joinValue = values.lookupOrd(segmentOrd);

  int ord;
  if (globalOrds != null) {
    ord = (int) globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
  } else {
    ord = segmentOrd;
  }
  if (foundOrds.get(ord) == false) {
    return Explanation.noMatch("Not a match, join value " + Term.toString(joinValue));
  }

  return Explanation.match(score(), "A match, join value " + Term.toString(joinValue));
}
 
Example #2
Source File: ParentChildFilteredTermsEnum.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
protected AcceptStatus accept(BytesRef term) throws IOException {
    if (parentTypes.isEmpty()) {
        return AcceptStatus.END;
    }

    BytesRef[] typeAndId = Uid.splitUidIntoTypeAndId(term);
    if (parentTypes.contains(typeAndId[0])) {
        type = typeAndId[0].utf8ToString();
        id = typeAndId[1];
        return AcceptStatus.YES;
    } else {
        BytesRef nextType = parentTypes.ceiling(typeAndId[0]);
        if (nextType == null) {
            return AcceptStatus.END;
        }
        seekTerm = nextType;
        return AcceptStatus.NO_AND_SEEK;
    }
}
 
Example #3
Source File: TestTermVectorsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testDocsEnum() throws IOException {
  TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg.info, fieldInfos, newIOContext(random()));
  for (int j = 0; j < 5; j++) {
    Terms vector = reader.get(j).terms(testFields[0]);
    assertNotNull(vector);
    assertEquals(testTerms.length, vector.size());
    TermsEnum termsEnum = vector.iterator();
    PostingsEnum postingsEnum = null;
    for (int i = 0; i < testTerms.length; i++) {
      final BytesRef text = termsEnum.next();
      assertNotNull(text);
      String term = text.utf8ToString();
      //System.out.println("Term: " + term);
      assertEquals(testTerms[i], term);
      
      postingsEnum = TestUtil.docs(random(), termsEnum, postingsEnum, PostingsEnum.NONE);
      assertNotNull(postingsEnum);
      int doc = postingsEnum.docID();
      assertEquals(-1, doc);
      assertTrue(postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc());
    }
    assertNull(termsEnum.next());
  }
  reader.close();
}
 
Example #4
Source File: STUniformSplitTermsWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void combinePostingsPerField(BytesRef term,
                                     Map<String, MergingFieldTerms> fieldTermsMap,
                                     Map<String, List<SegmentPostings>> fieldPostingsMap,
                                     List<MergingFieldTerms> groupedFieldTerms) {
  groupedFieldTerms.clear();
  for (Map.Entry<String, List<SegmentPostings>> fieldPostingsEntry : fieldPostingsMap.entrySet()) {
    // The field defined in fieldPostingsMap comes from the FieldInfos of the SegmentReadState.
    // The fieldTermsMap contains entries for fields coming from the SegmentMergeSate.
    // So it is possible that the field is not present in fieldTermsMap because it is removed.
    MergingFieldTerms fieldTerms = fieldTermsMap.get(fieldPostingsEntry.getKey());
    if (fieldTerms != null) {
      fieldTerms.resetIterator(term, fieldPostingsEntry.getValue());
      groupedFieldTerms.add(fieldTerms);
    }
  }
  // Keep the fields ordered by their number in the target merge segment.
  groupedFieldTerms.sort(Comparator.comparingInt(fieldTerms -> fieldTerms.fieldMetadata.getFieldInfo().number));
}
 
Example #5
Source File: TestBKDRadixSelector.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws  IOException {
 int docID = Integer.MAX_VALUE;
  try (PointReader reader = p.writer.getReader(p.start, p.count)) {
    while (reader.next()) {
      PointValue pointValue = reader.pointValue();
      BytesRef packedValue = pointValue.packedValue();
      int offset = dimension * bytesPerDimension;
      int dataOffset = indexDims * bytesPerDimension;
      int dataLength = (dataDims - indexDims) * bytesPerDimension;
      if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
        && Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
        int newDocID = pointValue.docID();
        if (newDocID < docID) {
          docID = newDocID;
        }
      }
    }
  }
  return docID;
}
 
Example #6
Source File: LindenFieldCacheImpl.java    From linden with Apache License 2.0 6 votes vote down vote up
@Override
protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
    throws IOException {
  final Map<String, Integer> uidMap = new HashMap<>();

  Uninvert u = new Uninvert() {
    private String currentValue;

    @Override
    public void visitTerm(BytesRef term) {
      currentValue = term.utf8ToString();
    }

    @Override
    public void visitDoc(int docID) {
      uidMap.put(currentValue, docID);
    }

    @Override
    protected TermsEnum termsEnum(Terms terms) throws IOException {
      return terms.iterator(null);
    }
  };
  u.uninvert(reader, key.field, setDocsWithField);
  return new PerReaderUIDMaps(reader.getContext().ord, uidMap);
}
 
Example #7
Source File: TestLuceneIndexer.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@Override
public void run() {
  try {
    for (int i = 0; i < 10000; ++i) {
      final Document document = new Document();
      final String key = "key" + i;
      final String val = "value" + i;
      document.add(new StringField(key, val, Field.Store.YES));
      document.add(new SortedDocValuesField(key, new BytesRef(val.getBytes())));
      index.add(document);
      data.put(key, val);
      sleep(1);
    }
  } catch (InterruptedException e) {
  }
}
 
Example #8
Source File: TermBuilder.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public BytesRef term(Long value) {
    BytesRefBuilder builder = new BytesRefBuilder();
    NumericUtils.longToPrefixCoded(value, 0, builder);
    return builder.get();
}
 
Example #9
Source File: SimpleTextUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Inverse of {@link BytesRef#toString}. */
public static BytesRef fromBytesRefString(String s) {
  if (s.length() < 2) {
    throw new IllegalArgumentException("string " + s + " was not created from BytesRef.toString?");
  }
  if (s.charAt(0) != '[' || s.charAt(s.length()-1) != ']') {
    throw new IllegalArgumentException("string " + s + " was not created from BytesRef.toString?");
  }
  String[] parts = s.substring(1, s.length()-1).split(" ");
  byte[] bytes = new byte[parts.length];
  for(int i=0;i<parts.length;i++) {
    bytes[i] = (byte) Integer.parseInt(parts[i], 16);
  }

  return new BytesRef(bytes);
}
 
Example #10
Source File: GroupingSearchTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void compareGroupValue(String expected, GroupDocs<?> group) {
  if (expected == null) {
    if (group.groupValue == null) {
      return;
    } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) {
      return;
    } else if (((BytesRef) group.groupValue).length == 0) {
      return;
    }
    fail();
  }

  if (group.groupValue.getClass().isAssignableFrom(BytesRef.class)) {
    assertEquals(new BytesRef(expected), group.groupValue);
  } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) {
    MutableValueStr v = new MutableValueStr();
    v.value.copyChars(expected);
    assertEquals(v, group.groupValue);
  } else {
    fail();
  }
}
 
Example #11
Source File: IpColumnReference.java    From crate with Apache License 2.0 6 votes vote down vote up
@Override
public String value() {
    try {
        if (values.advanceExact(docId)) {
            long ord = values.nextOrd();
            if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
                throw new GroupByOnArrayUnsupportedException(columnName);
            }
            BytesRef encoded = values.lookupOrd(ord);
            return (String) DocValueFormat.IP.format(encoded);
        } else {
            return null;
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
 
Example #12
Source File: MutatableAction.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
  IndexReader indexReader = searcher.getIndexReader();
  BytesRef rowIdRef = new BytesRef(rowId);
  List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
  for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
    AtomicReader atomicReader = atomicReaderContext.reader();
    Fields fields = atomicReader.fields();
    if (fields == null) {
      continue;
    }
    Terms terms = fields.terms(BlurConstants.ROW_ID);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(rowIdRef, true)) {
      continue;
    }
    // need atomic read as well...
    possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
  }
  if (possibleRowIds.isEmpty()) {
    return null;
  }
  return new IterableRow(rowId, getRecords(possibleRowIds));
}
 
Example #13
Source File: IndexNumericFieldData.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public long toLong(BytesRef indexForm) {
    if (indexForm.equals(BooleanFieldMapper.Values.FALSE)) {
        return 0;
    } else if (indexForm.equals(BooleanFieldMapper.Values.TRUE)) {
        return 1;
    } else {
        throw new IllegalArgumentException("Cannot convert " + indexForm + " to a boolean");
    }
}
 
Example #14
Source File: TestBinaryResponseWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void compareStringFormat(String input) {
  byte[] bytes1 = new byte[1024];
  int len1 = ByteUtils.UTF16toUTF8(input, 0, input.length(), bytes1, 0);
  BytesRef bytesref = new BytesRef(input);
  System.out.println();
  assertEquals(len1, bytesref.length);
  for (int i = 0; i < len1; i++) {
    assertEquals(input + " not matching char at :" + i, bytesref.bytes[i], bytes1[i]);
  }
}
 
Example #15
Source File: CustomSpellCheckListner.java    From customized-symspell with MIT License 5 votes vote down vote up
/**
 * Relod method of spellcheck listner
 * @param newSearcher
 * @param checker
 * @throws IOException
 * @throws SpellCheckException
 */
public void reload(SolrIndexSearcher newSearcher, SpellChecker checker)
    throws IOException, SpellCheckException {

  DirectoryReader productsIndexReader = newSearcher.getIndexReader();
  Fields fields = MultiFields.getFields(productsIndexReader);
  IndexSchema schema = newSearcher.getCore().getLatestSchema();
  long time = System.currentTimeMillis();
  for (String field : fields) {
    if (!fieldArr.contains(field)) {
      continue;
    }
    FieldType type = schema.getField(field).getType();
    int insertionsCount = 0;
    for (TermsEnum iterator = fields.terms(field).iterator(); iterator.next() != null; ) {
      BytesRef term = iterator.term();
      CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
      type.indexedToReadable(term, charsRefBuilder);
      insertionsCount++;
      checker.getDataHolder().addItem(
          new DictionaryItem(charsRefBuilder.toString().trim(), (double) iterator.totalTermFreq(),
              0.0));
    }
    log.info("Spellcheck Dictionary populated for Field Name {}, Count {}", field,
        insertionsCount);
  }
  log.info("Data for SpellChecker  was populated. Time={} ms",
      (System.currentTimeMillis() - time));
}
 
Example #16
Source File: FSTTermsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public BytesRef next() throws IOException {
  if (seekPending) {  // previously positioned, but termOutputs not fetched
    seekPending = false;
    SeekStatus status = seekCeil(term);
    assert status == SeekStatus.FOUND;  // must positioned on valid term
  }
  updateEnum(fstEnum.next());
  return term;
}
 
Example #17
Source File: CompressingTermVectorsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public BytesRef getPayload() throws IOException {
  checkPosition();
  if (payloadIndex == null || payload.length == 0) {
    return null;
  } else {
    return payload;
  }
}
 
Example #18
Source File: TokenStreamOffsetStrategy.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
TokenStreamOffsetsEnum(TokenStream ts, CharArrayMatcher[] matchers) throws IOException {
  this.stream = ts;
  this.matchers = matchers;
  matchDescriptions = new BytesRef[matchers.length];
  charTermAtt = ts.addAttribute(CharTermAttribute.class);
  offsetAtt = ts.addAttribute(OffsetAttribute.class);
  ts.reset();
}
 
Example #19
Source File: BKDWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void writeLeafBlockPackedValues(DataOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues, int leafCardinality) throws IOException {
  int prefixLenSum = Arrays.stream(commonPrefixLengths).sum();
  if (prefixLenSum == packedBytesLength) {
    // all values in this block are equal
    out.writeByte((byte) -1);
  } else {
    assert commonPrefixLengths[sortedDim] < bytesPerDim;
    // estimate if storing the values with cardinality is cheaper than storing all values.
    int compressedByteOffset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
    int highCardinalityCost;
    int lowCardinalityCost;
    if (count == leafCardinality) {
      // all values in this block are different
      highCardinalityCost = 0;
      lowCardinalityCost = 1;
    } else {
      // compute cost of runLen compression
      int numRunLens = 0;
      for (int i = 0; i < count; ) {
        // do run-length compression on the byte at compressedByteOffset
        int runLen = runLen(packedValues, i, Math.min(i + 0xff, count), compressedByteOffset);
        assert runLen <= 0xff;
        numRunLens++;
        i += runLen;
      }
      // Add cost of runLen compression
      highCardinalityCost = count * (packedBytesLength - prefixLenSum - 1) + 2 * numRunLens;
      // +1 is the byte needed for storing the cardinality
      lowCardinalityCost = leafCardinality * (packedBytesLength - prefixLenSum + 1);
    }
    if (lowCardinalityCost <= highCardinalityCost) {
      out.writeByte((byte) -2);
      writeLowCardinalityLeafBlockPackedValues(out, commonPrefixLengths, count, packedValues);
    } else {
      out.writeByte((byte) sortedDim);
      writeHighCardinalityLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, compressedByteOffset);
    }
  }
}
 
Example #20
Source File: NodeOsInfoExpression.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
private void addChildImplementations(final OsInfo info) {
    childImplementations.put(AVAILABLE_PROCESSORS, new OsInfoExpression() {
        @Override
        public Integer value() {
            return info.getAvailableProcessors();
        }
    });
    childImplementations.put(OS, new OsInfoExpression() {
        @Override
        public BytesRef value() {
            return OS_NAME;
        }
    });
    childImplementations.put(ARCH, new OsInfoExpression() {
        @Override
        public BytesRef value() {
            return OS_ARCH;
        }
    });
    childImplementations.put(VERSION, new OsInfoExpression() {
        @Override
        public BytesRef value() {
            return OS_VERSION;
        }
    });
    childImplementations.put(SysNodesTableInfo.SYS_COL_OS_INFO_JVM, new NodeOsJvmExpression());
}
 
Example #21
Source File: GraphTermsQParserPlugin.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public DocSet getDocSet(IndexSearcher searcher) throws IOException {
  IndexReaderContext top = ReaderUtil.getTopLevelContext(searcher.getTopReaderContext());
  List<LeafReaderContext> segs = top.leaves();
  DocSetBuilder builder = new DocSetBuilder(top.reader().maxDoc(), Math.min(64,(top.reader().maxDoc()>>>10)+4));
  PointValues[] segPoints = new PointValues[segs.size()];
  for (int i=0; i<segPoints.length; i++) {
    segPoints[i] = segs.get(i).reader().getPointValues(field);
  }

  int maxCollect = Math.min(maxDocFreq, top.reader().maxDoc());

  PointSetQuery.CutoffPointVisitor visitor = new PointSetQuery.CutoffPointVisitor(maxCollect);
  PrefixCodedTerms.TermIterator iterator = sortedPackedPoints.iterator();
  outer: for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
    visitor.setPoint(point);
    for (int i=0; i<segs.size(); i++) {
      if (segPoints[i] == null) continue;
      visitor.setBase(segs.get(i).docBase);
      segPoints[i].intersect(visitor);
      if (visitor.getCount() > maxDocFreq) {
        continue outer;
      }
    }
    int collected = visitor.getCount();
    int[] ids = visitor.getGlobalIds();
    for (int i=0; i<collected; i++) {
      builder.add( ids[i] );
    }
  }

  FixedBitSet liveDocs = getLiveDocs(searcher);
  DocSet set = builder.build(liveDocs);
  return set;
}
 
Example #22
Source File: MatchQueryBuilder.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public Query query(Map<String, Object> fields, BytesRef queryString) throws IOException {
    assert fields.size() == 1;
    Map.Entry<String, Object> entry = fields.entrySet().iterator().next();
    Query query = singleQueryAndApply(
        matchType.matchQueryType(), entry.getKey(), queryString, floatOrNull(entry.getValue()));
    Float boost = this.options.boost();
    if (boost != null) {
        query.setBoost(boost);
    }
    return query;
}
 
Example #23
Source File: DocumentsImpl.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Optional<Term> seekTerm(String termText) {
  Objects.requireNonNull(termText);

  if (curField == null) {
    // field is not selected
    log.warn("Field not selected.");
    return Optional.empty();
  }

  try {
    Terms terms = IndexUtils.getTerms(reader, curField);
    setTermsIterator(terms.iterator());

    if (tenum.seekCeil(new BytesRef(termText)) == TermsEnum.SeekStatus.END) {
      // reached to the end of the iterator
      resetTermsIterator();
      log.info("Reached the end of the term iterator for field: {}.", curField);
      return Optional.empty();
    } else {
      return Optional.of(new Term(curField, tenum.term()));
    }
  } catch (IOException e) {
    resetTermsIterator();
    throw new LukeException(String.format(Locale.ENGLISH, "Terms not available for field: %s.", curField), e);
  } finally {
    // discard current postings enum
    resetPostingsIterator();
  }
}
 
Example #24
Source File: DocumentValueSourceDictionaryTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private Map<String, Document> generateIndexDocuments(int ndocs) {
  Map<String, Document> docs = new HashMap<>();
  for(int i = 0; i < ndocs ; i++) {
    Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
    Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
    Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
    Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
    Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_"  + i + "_0"));
    Document doc = new Document();
    doc.add(field);
    // even if payload is not required usually have it
    if (usually()) {
      Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
      doc.add(payload);
    }
    doc.add(weight1);
    doc.add(weight2);
    doc.add(weight3);
    doc.add(contexts);
    for(int j = 1; j < atLeast(3); j++) {
      contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j));
      doc.add(contexts);
    }
    docs.put(field.stringValue(), doc);
  }
  return docs;
}
 
Example #25
Source File: LegacyNumericUtils.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the shift value from a prefix encoded {@code long}.
 * @throws NumberFormatException if the supplied {@link BytesRef} is
 * not correctly prefix encoded.
 */
public static int getPrefixCodedLongShift(final BytesRef val) {
  final int shift = val.bytes[val.offset] - SHIFT_START_LONG;
  if (shift > 63 || shift < 0)
    throw new NumberFormatException("Invalid shift value (" + shift + ") in prefixCoded bytes (is encoded value really an INT?)");
  return shift;
}
 
Example #26
Source File: TestSegmentReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testTerms() throws IOException {
  final Collection<String> fields = FieldInfos.getIndexedFields(reader);
  for (String field : fields) {
    Terms terms = MultiTerms.getTerms(reader, field);
    assertNotNull(terms);
    TermsEnum termsEnum = terms.iterator();
    while(termsEnum.next() != null) {
      BytesRef term = termsEnum.term();
      assertTrue(term != null);
      String fieldValue = (String) DocHelper.nameValues.get(field);
      assertTrue(fieldValue.indexOf(term.utf8ToString()) != -1);
    }
  }
  
  PostingsEnum termDocs = TestUtil.docs(random(), reader,
      DocHelper.TEXT_FIELD_1_KEY,
      new BytesRef("field"),
      null,
      0);
  assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

  termDocs = TestUtil.docs(random(), reader,
      DocHelper.NO_NORMS_KEY,
      new BytesRef(DocHelper.NO_NORMS_TEXT),
      null,
      0);

  assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

  
  PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader,
                                                                    DocHelper.TEXT_FIELD_1_KEY,
                                                                    new BytesRef("field"));
  // NOTE: prior rev of this test was failing to first
  // call next here:
  assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertTrue(positions.docID() == 0);
  assertTrue(positions.nextPosition() >= 0);
}
 
Example #27
Source File: BaseExplanationTestCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static Document createDoc(int index) {
  Document doc = new Document();
  doc.add(newStringField(KEY, ""+index, Field.Store.NO));
  doc.add(new SortedDocValuesField(KEY, new BytesRef(""+index)));
  Field f = newTextField(FIELD, docFields[index], Field.Store.NO);
  doc.add(f);
  doc.add(newTextField(ALTFIELD, docFields[index], Field.Store.NO));
  return doc;
}
 
Example #28
Source File: SignificantTermsAggregatorFactory.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public long getBackgroundFrequency(BytesRef termBytes) {
    assert termsEnum != null; // having failed to find a field in the index we don't expect any calls for frequencies
    long result = 0;
    try {
        if (termsEnum.seekExact(termBytes)) {
            result = termsEnum.docFreq();
        }
    } catch (IOException e) {
        throw new ElasticsearchException("IOException loading background document frequency info", e);
    }
    return result;
}
 
Example #29
Source File: TermInSetQueryTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRamBytesUsed() {
  List<BytesRef> terms = new ArrayList<>();
  final int numTerms = 10000 + random().nextInt(1000);
  for (int i = 0; i < numTerms; ++i) {
    terms.add(new BytesRef(RandomStrings.randomUnicodeOfLength(random(), 10)));
  }
  TermInSetQuery query = new TermInSetQuery("f", terms);
  final long actualRamBytesUsed = RamUsageTester.sizeOf(query);
  final long expectedRamBytesUsed = query.ramBytesUsed();
  // error margin within 5%
  assertEquals(expectedRamBytesUsed, actualRamBytesUsed, actualRamBytesUsed / 20);
}
 
Example #30
Source File: FreeTextSuggester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, /* ignored */ boolean onlyMorePopular, int num) {
  try {
    return lookup(key, contexts, num);
  } catch (IOException ioe) {
    // bogus:
    throw new RuntimeException(ioe);
  }
}