org.apache.lucene.util.BytesRefHash Java Examples

The following examples show how to use org.apache.lucene.util.BytesRefHash. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BytesRefTermsSet.java    From siren-join with GNU Affero General Public License v3.0 6 votes vote down vote up
private void readFromBytes(BytesRef bytes) {
  // Read pruned flag
  this.setIsPruned(bytes.bytes[bytes.offset++] == 1 ? true : false);

  // Read size fo the set
  int size = Bytes.readInt(bytes);

  // Read terms
  bytesUsed = Counter.newCounter();
  pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
  set = new BytesRefHash(pool);

  BytesRef reusable = new BytesRef();
  for (int i = 0; i < size; i++) {
    Bytes.readBytesRef(bytes, reusable);
    set.add(reusable);
  }
}
 
Example #2
Source File: MultipassTermFilteredPresearcher.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public Query build() {
  Map<String, BytesRef[]> collectedTerms = new HashMap<>();
  for (Map.Entry<String, BytesRefHash> entry : terms.entrySet()) {
    collectedTerms.put(entry.getKey(), convertHash(entry.getValue()));
  }
  BooleanQuery.Builder parent = new BooleanQuery.Builder();
  for (int i = 0; i < passes; i++) {
    BooleanQuery.Builder child = new BooleanQuery.Builder();
    for (String field : terms.keySet()) {
      child.add(new TermInSetQuery(field(field, i), collectedTerms.get(field)), BooleanClause.Occur.SHOULD);
    }
    parent.add(child.build(), BooleanClause.Occur.MUST);
  }
  return parent.build();
}
 
Example #3
Source File: MultipassTermFilteredPresearcher.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public Document buildQueryDocument(QueryTree querytree) {

  Document doc = new Document();

  for (int i = 0; i < passes; i++) {
    Map<String, BytesRefHash> fieldTerms = collectTerms(querytree);
    for (Map.Entry<String, BytesRefHash> entry : fieldTerms.entrySet()) {
      // we add the index terms once under a suffixed field for the multipass query, and
      // once under the plan field name for the TermsEnumTokenFilter
      doc.add(new Field(field(entry.getKey(), i),
          new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
      doc.add(new Field(entry.getKey(),
          new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
    }
    querytree.advancePhase(minWeight);
  }

  return doc;
}
 
Example #4
Source File: TermsIncludingScoreQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
TermsIncludingScoreQuery(ScoreMode scoreMode, String toField, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores,
                         String fromField, Query fromQuery, Object indexReaderContextId) {
  this.scoreMode = scoreMode;
  this.toField = toField;
  this.multipleValuesPerDocument = multipleValuesPerDocument;
  this.terms = terms;
  this.scores = scores;
  this.ords = terms.sort();

  this.fromField = fromField;
  this.fromQuery = fromQuery;
  this.topReaderContextId = indexReaderContextId;

  this.ramBytesUsed = BASE_RAM_BYTES +
      RamUsageEstimator.sizeOfObject(fromField) +
      RamUsageEstimator.sizeOfObject(fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
      RamUsageEstimator.sizeOfObject(ords) +
      RamUsageEstimator.sizeOfObject(scores) +
      RamUsageEstimator.sizeOfObject(terms) +
      RamUsageEstimator.sizeOfObject(toField);
}
 
Example #5
Source File: TermsQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * @param toField               The field that should contain terms that are specified in the next parameter.
 * @param terms                 The terms that matching documents should have. The terms must be sorted by natural order.
 * @param indexReaderContextId  Refers to the top level index reader used to create the set of terms in the previous parameter.
 */
TermsQuery(String toField, BytesRefHash terms, String fromField, Query fromQuery, Object indexReaderContextId) {
  super(toField);
  this.terms = terms;
  ords = terms.sort();
  this.fromField = fromField;
  this.fromQuery = fromQuery;
  this.indexReaderContextId = indexReaderContextId;

  this.ramBytesUsed = BASE_RAM_BYTES +
      RamUsageEstimator.sizeOfObject(field) +
      RamUsageEstimator.sizeOfObject(fromField) +
      RamUsageEstimator.sizeOfObject(fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
      RamUsageEstimator.sizeOfObject(ords) +
      RamUsageEstimator.sizeOfObject(terms);
}
 
Example #6
Source File: GenericTermsCollector.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
static GenericTermsCollector wrap(final TermsCollector<?> collector) {
  return new GenericTermsCollector() {

    
    @Override
    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
      return collector.getLeafCollector(context);
    }

    @Override
    public org.apache.lucene.search.ScoreMode scoreMode() {
      return collector.scoreMode();
    }

    @Override
    public BytesRefHash getCollectedTerms() {
      return collector.getCollectorTerms();
    }

    @Override
    public float[] getScoresPerTerm() {
      throw new UnsupportedOperationException("scores are not available for "+collector);
    }
  };
}
 
Example #7
Source File: MemoryIndex.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private final int binarySearch(BytesRef b, BytesRef bytesRef, int low,
    int high, BytesRefHash hash, int[] ords) {
  int mid = 0;
  while (low <= high) {
    mid = (low + high) >>> 1;
    hash.get(ords[mid], bytesRef);
    final int cmp = bytesRef.compareTo(b);
    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid;
    }
  }
  assert bytesRef.compareTo(b) != 0;
  return -(low + 1);
}
 
Example #8
Source File: GraphQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Build an automaton to represent the frontier query */
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
  // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
  final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
  for (int i = 0 ; i < termBytesHash.size(); i++) {
    BytesRef ref = new BytesRef();
    termBytesHash.get(i, ref);
    terms.add(ref);
  }
  final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
  return a;    
}
 
Example #9
Source File: BytesRefTermsSet.java    From siren-join with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
protected void addAll(TermsSet terms) {
  if (!(terms instanceof BytesRefTermsSet)) {
    throw new UnsupportedOperationException("Invalid type: BytesRefTermsSet expected.");
  }

  BytesRefHash input = ((BytesRefTermsSet) terms).set;
  BytesRef reusable = new BytesRef();
  for (int i = 0; i < input.size(); i++) {
    input.get(i, reusable);
    set.add(reusable);
  }
}
 
Example #10
Source File: GraphEdgeCollector.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Build an automaton to represent the frontier query
 */
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
  // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
  final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
  for (int i = 0; i < termBytesHash.size(); i++) {
    BytesRef ref = new BytesRef();
    termBytesHash.get(i, ref);
    terms.add(ref);
  }
  final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
  return a;
}
 
Example #11
Source File: BytesRefTermsSet.java    From siren-join with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void readFrom(StreamInput in) throws IOException {
  this.setIsPruned(in.readBoolean());
  int size = in.readInt();

  bytesUsed = Counter.newCounter();
  pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
  set = new BytesRefHash(pool);

  for (long i = 0; i < size; i++) {
    set.add(in.readBytesRef());
  }
}
 
Example #12
Source File: SortedDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, DocIdSetIterator docsWithField) {
  this.hash = hash;
  this.valueCount = valueCount;
  this.sortedValues = sortedValues;
  this.iter = docToOrd.iterator();
  this.ordMap = ordMap;
  this.docsWithField = docsWithField;
}
 
Example #13
Source File: SortedDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example #14
Source File: SortedSetDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, DocIdSetIterator docsWithField) {
  this.currentDoc = new int[maxCount];
  this.sortedValues = sortedValues;
  this.ordMap = ordMap;
  this.hash = hash;
  this.ordsIter = ords.iterator();
  this.ordCountsIter = ordCounts.iterator();
  this.docsWithField = docsWithField;
}
 
Example #15
Source File: SortedSetDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example #16
Source File: MultipassTermFilteredPresearcher.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static BytesRef[] convertHash(BytesRefHash hash) {
  BytesRef[] terms = new BytesRef[hash.size()];
  for (int i = 0; i < terms.length; i++) {
    BytesRef t = new BytesRef();
    terms[i] = hash.get(i, t);
  }
  return terms;
}
 
Example #17
Source File: MemoryIndex.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private Info(FieldInfo fieldInfo, ByteBlockPool byteBlockPool) {
  this.fieldInfo = fieldInfo;
  this.sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
  this.terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);;
  this.binaryProducer = new BinaryDocValuesProducer();
  this.numericProducer = new NumericDocValuesProducer();
}
 
Example #18
Source File: TermFilteredPresearcher.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Collects terms from a {@link QueryTree} and maps them per-field
 */
protected Map<String, BytesRefHash> collectTerms(QueryTree querytree) {
  Map<String, BytesRefHash> fieldTerms = new HashMap<>();
  querytree.collectTerms((field, term) -> {
    BytesRefHash tt = fieldTerms.computeIfAbsent(field, f -> new BytesRefHash());
    tt.add(term);
  });
  return fieldTerms;
}
 
Example #19
Source File: TermFilteredPresearcher.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Builds a {@link Document} from the terms extracted from a query
 */
protected Document buildQueryDocument(QueryTree querytree) {
  Map<String, BytesRefHash> fieldTerms = collectTerms(querytree);
  Document doc = new Document();
  for (Map.Entry<String, BytesRefHash> entry : fieldTerms.entrySet()) {
    doc.add(new Field(entry.getKey(),
        new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
  }
  return doc;
}
 
Example #20
Source File: QueryIndex.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public boolean test(String field, BytesRef term) {
  BytesRefHash bytes = termsHash.get(field);
  if (bytes == null) {
    return false;
  }
  return bytes.find(term) != -1;
}
 
Example #21
Source File: QueryIndex.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
QueryTermFilter(IndexReader reader) throws IOException {
  for (LeafReaderContext ctx : reader.leaves()) {
    for (FieldInfo fi : ctx.reader().getFieldInfos()) {
      BytesRefHash terms = termsHash.computeIfAbsent(fi.name, f -> new BytesRefHash());
      Terms t = ctx.reader().terms(fi.name);
      if (t != null) {
        TermsEnum te = t.iterator();
        BytesRef term;
        while ((term = te.next()) != null) {
          terms.add(term);
        }
      }
    }
  }
}
 
Example #22
Source File: SeekingTermSetTermsEnum.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor
 */
public SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) {
  super(tenum);
  this.terms = terms;
  this.ords = ords;
  lastElement = terms.size() - 1;
  lastTerm = terms.get(ords[lastElement], new BytesRef());
  seekTerm = terms.get(ords[upto], spare);
}
 
Example #23
Source File: MultipassTermFilteredPresearcher.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void addTerm(String field, BytesRef term) {
  BytesRefHash t = terms.computeIfAbsent(field, f -> new BytesRefHash());
  t.add(term);
}
 
Example #24
Source File: TermFilteredPresearcher.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
BytesRefHashIterator(BytesRefHash terms) {
  this.terms = terms;
  this.sortedTerms = terms.sort();
}
 
Example #25
Source File: SynonymMap.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public SynonymMap(FST<BytesRef> fst, BytesRefHash words, int maxHorizontalContext) {
  this.fst = fst;
  this.words = words;
  this.maxHorizontalContext = maxHorizontalContext;
}
 
Example #26
Source File: GraphEdgeCollector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
GraphTermsCollector(SchemaField collectField, DocSet skipSet, DocSet leafNodes) {
  super(collectField, skipSet, leafNodes);
  this.collectorTerms = new BytesRefHash();
}
 
Example #27
Source File: BytesRefTermsSet.java    From siren-join with GNU Affero General Public License v3.0 4 votes vote down vote up
public BytesRefTermsSet(final CircuitBreaker breaker) {
  super(breaker);
  this.bytesUsed = Counter.newCounter();
  this.pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
  this.set = new BytesRefHash(pool);
}
 
Example #28
Source File: TermsWithScoreCollector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public BytesRefHash getCollectedTerms() {
  return collectedTerms;
}
 
Example #29
Source File: BytesRefTermsSet.java    From siren-join with GNU Affero General Public License v3.0 4 votes vote down vote up
public BytesRefHash getBytesRefHash() {
  return set;
}
 
Example #30
Source File: TermsCollector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public BytesRefHash getCollectorTerms() {
  return collectorTerms;
}