Java Code Examples for org.apache.lucene.util.BytesRefHash

The following examples show how to use org.apache.lucene.util.BytesRefHash. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: MemoryIndex.java    License: Apache License 2.0 6 votes vote down vote up
private final int binarySearch(BytesRef b, BytesRef bytesRef, int low,
    int high, BytesRefHash hash, int[] ords) {
  int mid = 0;
  while (low <= high) {
    mid = (low + high) >>> 1;
    hash.get(ords[mid], bytesRef);
    final int cmp = bytesRef.compareTo(b);
    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid;
    }
  }
  assert bytesRef.compareTo(b) != 0;
  return -(low + 1);
}
 
Example 2
Source Project: lucene-solr   Source File: GenericTermsCollector.java    License: Apache License 2.0 6 votes vote down vote up
static GenericTermsCollector wrap(final TermsCollector<?> collector) {
  return new GenericTermsCollector() {

    
    @Override
    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
      return collector.getLeafCollector(context);
    }

    @Override
    public org.apache.lucene.search.ScoreMode scoreMode() {
      return collector.scoreMode();
    }

    @Override
    public BytesRefHash getCollectedTerms() {
      return collector.getCollectorTerms();
    }

    @Override
    public float[] getScoresPerTerm() {
      throw new UnsupportedOperationException("scores are not available for "+collector);
    }
  };
}
 
Example 3
Source Project: lucene-solr   Source File: TermsQuery.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * @param toField               The field that should contain terms that are specified in the next parameter.
 * @param terms                 The terms that matching documents should have. The terms must be sorted by natural order.
 * @param indexReaderContextId  Refers to the top level index reader used to create the set of terms in the previous parameter.
 */
TermsQuery(String toField, BytesRefHash terms, String fromField, Query fromQuery, Object indexReaderContextId) {
  super(toField);
  this.terms = terms;
  ords = terms.sort();
  this.fromField = fromField;
  this.fromQuery = fromQuery;
  this.indexReaderContextId = indexReaderContextId;

  this.ramBytesUsed = BASE_RAM_BYTES +
      RamUsageEstimator.sizeOfObject(field) +
      RamUsageEstimator.sizeOfObject(fromField) +
      RamUsageEstimator.sizeOfObject(fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
      RamUsageEstimator.sizeOfObject(ords) +
      RamUsageEstimator.sizeOfObject(terms);
}
 
Example 4
Source Project: lucene-solr   Source File: TermsIncludingScoreQuery.java    License: Apache License 2.0 6 votes vote down vote up
TermsIncludingScoreQuery(ScoreMode scoreMode, String toField, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores,
                         String fromField, Query fromQuery, Object indexReaderContextId) {
  this.scoreMode = scoreMode;
  this.toField = toField;
  this.multipleValuesPerDocument = multipleValuesPerDocument;
  this.terms = terms;
  this.scores = scores;
  this.ords = terms.sort();

  this.fromField = fromField;
  this.fromQuery = fromQuery;
  this.topReaderContextId = indexReaderContextId;

  this.ramBytesUsed = BASE_RAM_BYTES +
      RamUsageEstimator.sizeOfObject(fromField) +
      RamUsageEstimator.sizeOfObject(fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
      RamUsageEstimator.sizeOfObject(ords) +
      RamUsageEstimator.sizeOfObject(scores) +
      RamUsageEstimator.sizeOfObject(terms) +
      RamUsageEstimator.sizeOfObject(toField);
}
 
Example 5
@Override
public Query build() {
  Map<String, BytesRef[]> collectedTerms = new HashMap<>();
  for (Map.Entry<String, BytesRefHash> entry : terms.entrySet()) {
    collectedTerms.put(entry.getKey(), convertHash(entry.getValue()));
  }
  BooleanQuery.Builder parent = new BooleanQuery.Builder();
  for (int i = 0; i < passes; i++) {
    BooleanQuery.Builder child = new BooleanQuery.Builder();
    for (String field : terms.keySet()) {
      child.add(new TermInSetQuery(field(field, i), collectedTerms.get(field)), BooleanClause.Occur.SHOULD);
    }
    parent.add(child.build(), BooleanClause.Occur.MUST);
  }
  return parent.build();
}
 
Example 6
@Override
public Document buildQueryDocument(QueryTree querytree) {

  Document doc = new Document();

  for (int i = 0; i < passes; i++) {
    Map<String, BytesRefHash> fieldTerms = collectTerms(querytree);
    for (Map.Entry<String, BytesRefHash> entry : fieldTerms.entrySet()) {
      // we add the index terms once under a suffixed field for the multipass query, and
      // once under the plan field name for the TermsEnumTokenFilter
      doc.add(new Field(field(entry.getKey(), i),
          new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
      doc.add(new Field(entry.getKey(),
          new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
    }
    querytree.advancePhase(minWeight);
  }

  return doc;
}
 
Example 7
private void readFromBytes(BytesRef bytes) {
  // Read pruned flag
  this.setIsPruned(bytes.bytes[bytes.offset++] == 1 ? true : false);

  // Read size fo the set
  int size = Bytes.readInt(bytes);

  // Read terms
  bytesUsed = Counter.newCounter();
  pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
  set = new BytesRefHash(pool);

  BytesRef reusable = new BytesRef();
  for (int i = 0; i < size; i++) {
    Bytes.readBytesRef(bytes, reusable);
    set.add(reusable);
  }
}
 
Example 8
Source Project: lucene-solr   Source File: MemoryIndex.java    License: Apache License 2.0 5 votes vote down vote up
private Info(FieldInfo fieldInfo, ByteBlockPool byteBlockPool) {
  this.fieldInfo = fieldInfo;
  this.sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
  this.terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);;
  this.binaryProducer = new BinaryDocValuesProducer();
  this.numericProducer = new NumericDocValuesProducer();
}
 
Example 9
Source Project: lucene-solr   Source File: SeekingTermSetTermsEnum.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Constructor
 */
public SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) {
  super(tenum);
  this.terms = terms;
  this.ords = ords;
  lastElement = terms.size() - 1;
  lastTerm = terms.get(ords[lastElement], new BytesRef());
  seekTerm = terms.get(ords[upto], spare);
}
 
Example 10
Source Project: lucene-solr   Source File: QueryIndex.java    License: Apache License 2.0 5 votes vote down vote up
QueryTermFilter(IndexReader reader) throws IOException {
  for (LeafReaderContext ctx : reader.leaves()) {
    for (FieldInfo fi : ctx.reader().getFieldInfos()) {
      BytesRefHash terms = termsHash.computeIfAbsent(fi.name, f -> new BytesRefHash());
      Terms t = ctx.reader().terms(fi.name);
      if (t != null) {
        TermsEnum te = t.iterator();
        BytesRef term;
        while ((term = te.next()) != null) {
          terms.add(term);
        }
      }
    }
  }
}
 
Example 11
Source Project: lucene-solr   Source File: QueryIndex.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean test(String field, BytesRef term) {
  BytesRefHash bytes = termsHash.get(field);
  if (bytes == null) {
    return false;
  }
  return bytes.find(term) != -1;
}
 
Example 12
Source Project: lucene-solr   Source File: TermFilteredPresearcher.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Builds a {@link Document} from the terms extracted from a query
 */
protected Document buildQueryDocument(QueryTree querytree) {
  Map<String, BytesRefHash> fieldTerms = collectTerms(querytree);
  Document doc = new Document();
  for (Map.Entry<String, BytesRefHash> entry : fieldTerms.entrySet()) {
    doc.add(new Field(entry.getKey(),
        new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
  }
  return doc;
}
 
Example 13
Source Project: lucene-solr   Source File: TermFilteredPresearcher.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Collects terms from a {@link QueryTree} and maps them per-field
 */
protected Map<String, BytesRefHash> collectTerms(QueryTree querytree) {
  Map<String, BytesRefHash> fieldTerms = new HashMap<>();
  querytree.collectTerms((field, term) -> {
    BytesRefHash tt = fieldTerms.computeIfAbsent(field, f -> new BytesRefHash());
    tt.add(term);
  });
  return fieldTerms;
}
 
Example 14
private static BytesRef[] convertHash(BytesRefHash hash) {
  BytesRef[] terms = new BytesRef[hash.size()];
  for (int i = 0; i < terms.length; i++) {
    BytesRef t = new BytesRef();
    terms[i] = hash.get(i, t);
  }
  return terms;
}
 
Example 15
Source Project: lucene-solr   Source File: SortedSetDocValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example 16
Source Project: lucene-solr   Source File: SortedSetDocValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, DocIdSetIterator docsWithField) {
  this.currentDoc = new int[maxCount];
  this.sortedValues = sortedValues;
  this.ordMap = ordMap;
  this.hash = hash;
  this.ordsIter = ords.iterator();
  this.ordCountsIter = ordCounts.iterator();
  this.docsWithField = docsWithField;
}
 
Example 17
Source Project: lucene-solr   Source File: SortedDocValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example 18
Source Project: lucene-solr   Source File: SortedDocValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, DocIdSetIterator docsWithField) {
  this.hash = hash;
  this.valueCount = valueCount;
  this.sortedValues = sortedValues;
  this.iter = docToOrd.iterator();
  this.ordMap = ordMap;
  this.docsWithField = docsWithField;
}
 
Example 19
Source Project: lucene-solr   Source File: GraphEdgeCollector.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Build an automaton to represent the frontier query
 */
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
  // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
  final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
  for (int i = 0; i < termBytesHash.size(); i++) {
    BytesRef ref = new BytesRef();
    termBytesHash.get(i, ref);
    terms.add(ref);
  }
  final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
  return a;
}
 
Example 20
Source Project: lucene-solr   Source File: GraphQuery.java    License: Apache License 2.0 5 votes vote down vote up
/** Build an automaton to represent the frontier query */
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
  // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
  final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
  for (int i = 0 ; i < termBytesHash.size(); i++) {
    BytesRef ref = new BytesRef();
    termBytesHash.get(i, ref);
    terms.add(ref);
  }
  final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
  return a;    
}
 
Example 21
@Override
protected void addAll(TermsSet terms) {
  if (!(terms instanceof BytesRefTermsSet)) {
    throw new UnsupportedOperationException("Invalid type: BytesRefTermsSet expected.");
  }

  BytesRefHash input = ((BytesRefTermsSet) terms).set;
  BytesRef reusable = new BytesRef();
  for (int i = 0; i < input.size(); i++) {
    input.get(i, reusable);
    set.add(reusable);
  }
}
 
Example 22
@Override
public void readFrom(StreamInput in) throws IOException {
  this.setIsPruned(in.readBoolean());
  int size = in.readInt();

  bytesUsed = Counter.newCounter();
  pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
  set = new BytesRefHash(pool);

  for (long i = 0; i < size; i++) {
    set.add(in.readBytesRef());
  }
}
 
Example 23
Source Project: lucene-solr   Source File: MemoryIndex.java    License: Apache License 2.0 4 votes vote down vote up
private void storeDocValues(Info info, DocValuesType docValuesType, Object docValuesValue) {
  String fieldName = info.fieldInfo.name;
  DocValuesType existingDocValuesType = info.fieldInfo.getDocValuesType();
  if (existingDocValuesType == DocValuesType.NONE) {
    // first time we add doc values for this field:
    info.fieldInfo = new FieldInfo(
        info.fieldInfo.name, info.fieldInfo.number, info.fieldInfo.hasVectors(), info.fieldInfo.hasPayloads(),
        info.fieldInfo.hasPayloads(), info.fieldInfo.getIndexOptions(), docValuesType, -1, info.fieldInfo.attributes(),
        info.fieldInfo.getPointDimensionCount(), info.fieldInfo.getPointIndexDimensionCount(), info.fieldInfo.getPointNumBytes(),
        info.fieldInfo.isSoftDeletesField()
    );
  } else if (existingDocValuesType != docValuesType) {
    throw new IllegalArgumentException("Can't add [" + docValuesType + "] doc values field [" + fieldName + "], because [" + existingDocValuesType + "] doc values field already exists");
  }
  switch (docValuesType) {
    case NUMERIC:
      if (info.numericProducer.dvLongValues != null) {
        throw new IllegalArgumentException("Only one value per field allowed for [" + docValuesType + "] doc values field [" + fieldName + "]");
      }
      info.numericProducer.dvLongValues = new long[]{(long) docValuesValue};
      info.numericProducer.count++;
      break;
    case SORTED_NUMERIC:
      if (info.numericProducer.dvLongValues == null) {
        info.numericProducer.dvLongValues = new long[4];
      }
      info.numericProducer.dvLongValues = ArrayUtil.grow(info.numericProducer.dvLongValues, info.numericProducer.count + 1);
      info.numericProducer.dvLongValues[info.numericProducer.count++] = (long) docValuesValue;
      break;
    case BINARY:
      if (info.binaryProducer.dvBytesValuesSet != null) {
        throw new IllegalArgumentException("Only one value per field allowed for [" + docValuesType + "] doc values field [" + fieldName + "]");
      }
      info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
      info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
      break;
    case SORTED:
      if (info.binaryProducer.dvBytesValuesSet != null) {
        throw new IllegalArgumentException("Only one value per field allowed for [" + docValuesType + "] doc values field [" + fieldName + "]");
      }
      info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
      info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
      break;
    case SORTED_SET:
      if (info.binaryProducer.dvBytesValuesSet == null) {
        info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
      }
      info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
      break;
    default:
      throw new UnsupportedOperationException("unknown doc values type [" + docValuesType + "]");
  }
}
 
Example 24
Source Project: lucene-solr   Source File: TermsCollector.java    License: Apache License 2.0 4 votes vote down vote up
public BytesRefHash getCollectorTerms() {
  return collectorTerms;
}
 
Example 25
Source Project: lucene-solr   Source File: TermsWithScoreCollector.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public BytesRefHash getCollectedTerms() {
  return collectedTerms;
}
 
Example 26
Source Project: lucene-solr   Source File: SynonymMap.java    License: Apache License 2.0 4 votes vote down vote up
public SynonymMap(FST<BytesRef> fst, BytesRefHash words, int maxHorizontalContext) {
  this.fst = fst;
  this.words = words;
  this.maxHorizontalContext = maxHorizontalContext;
}
 
Example 27
Source Project: lucene-solr   Source File: TermFilteredPresearcher.java    License: Apache License 2.0 4 votes vote down vote up
BytesRefHashIterator(BytesRefHash terms) {
  this.terms = terms;
  this.sortedTerms = terms.sort();
}
 
Example 28
@Override
public void addTerm(String field, BytesRef term) {
  BytesRefHash t = terms.computeIfAbsent(field, f -> new BytesRefHash());
  t.add(term);
}
 
Example 29
Source Project: lucene-solr   Source File: GraphEdgeCollector.java    License: Apache License 2.0 4 votes vote down vote up
GraphTermsCollector(SchemaField collectField, DocSet skipSet, DocSet leafNodes) {
  super(collectField, skipSet, leafNodes);
  this.collectorTerms = new BytesRefHash();
}
 
Example 30
public BytesRefTermsSet(final CircuitBreaker breaker) {
  super(breaker);
  this.bytesUsed = Counter.newCounter();
  this.pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
  this.set = new BytesRefHash(pool);
}