Java Code Examples for org.apache.lucene.util.BytesRefIterator

The following examples show how to use org.apache.lucene.util.BytesRefIterator. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Elasticsearch   Source File: OrdinalsBuilder.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * This method iterates all terms in the given {@link TermsEnum} and
 * associates each terms ordinal with the terms documents. The caller must
 * exhaust the returned {@link BytesRefIterator} which returns all values
 * where the first returned value is associted with the ordinal <tt>1</tt>
 * etc.
 * <p>
 * If the {@link TermsEnum} contains prefix coded numerical values the terms
 * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
 * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
 * the {@link TermsEnum} is not wrapped the returned
 * {@link BytesRefIterator} will contain partial precision terms rather than
 * only full-precision terms.
 * </p>
 */
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
    return new BytesRefIterator() {
        private PostingsEnum docsEnum = null;

        @Override
        public BytesRef next() throws IOException {
            BytesRef ref;
            if ((ref = termsEnum.next()) != null) {
                docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
                nextOrdinal();
                int docId;
                while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    addDoc(docId);
                }
            }
            return ref;
        }
    };
}
 
Example 2
Source Project: lucene-solr   Source File: FSTCompletionBuilder.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Builds the final automaton from a list of entries.
 */
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
  // Build the automaton.
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  final Object empty = outputs.getNoOutput();
  final FSTCompiler<Object> fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
      .shareMaxTailLength(shareMaxTailLength).build();

  BytesRefBuilder scratch = new BytesRefBuilder();
  BytesRef entry;
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  int count = 0;
  BytesRefIterator iter = sorter.iterator();
  while((entry = iter.next()) != null) {
    count++;
    if (scratch.get().compareTo(entry) != 0) {
      fstCompiler.add(Util.toIntsRef(entry, scratchIntsRef), empty);
      scratch.copyBytes(entry);
    }
  }
  
  return count == 0 ? null : fstCompiler.compile();
}
 
Example 3
Source Project: lucene-solr   Source File: ExternalRefSorter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public BytesRefIterator iterator() throws IOException {
  if (sortedFileName == null) {
    closeWriter();
    
    boolean success = false;
    try {
      sortedFileName = sorter.sort(input.getName());
      success = true;
    } finally {
      if (success) {
        sorter.getDirectory().deleteFile(input.getName());
      } else {
        IOUtils.deleteFilesIgnoringExceptions(sorter.getDirectory(), input.getName());
      }
    }
    
    input = null;
  }
  
  return new ByteSequenceIterator(new OfflineSorter.ByteSequencesReader(sorter.getDirectory().openChecksumInput(sortedFileName, IOContext.READONCE), sortedFileName));
}
 
Example 4
Source Project: lucene-solr   Source File: BytesRefSortersTest.java    License: Apache License 2.0 6 votes vote down vote up
private void check(BytesRefSorter sorter) throws Exception {
  for (int i = 0; i < 100; i++) {
    byte [] current = new byte [random().nextInt(256)];
    random().nextBytes(current);
    sorter.add(new BytesRef(current));
  }

  // Create two iterators and check that they're aligned with each other.
  BytesRefIterator i1 = sorter.iterator();
  BytesRefIterator i2 = sorter.iterator();
  
  // Verify sorter contract.
  expectThrows(IllegalStateException.class, () -> {
    sorter.add(new BytesRef(new byte [1]));
  });

  while (true) {
    BytesRef spare1 = i1.next();
    BytesRef spare2 = i2.next();
    assertEquals(spare1, spare2);
    if (spare1 == null) {
      break;
    }
  }
}
 
Example 5
Source Project: lucene-solr   Source File: TestTermsEnumTokenFilter.java    License: Apache License 2.0 6 votes vote down vote up
public void testPosIncAttributeOverflow() throws IOException {

    final BytesRef foo = new BytesRef("foo");
    final BytesRef bar = new BytesRef("bar");

    BytesRefIterator terms = new BytesRefIterator() {

      long count = 1000;

      @Override
      public BytesRef next() throws IOException {
        if (count-- > 100)
          return foo;
        if (count-- > 0)
          return bar;
        return null;
      }
    };

    try (TokenStream ts = new LeapfrogTokenFilter(new TermsEnumTokenStream(terms))) {
      while (ts.incrementToken()) {
        // This tight loop will throw an exception if clearAttributes() is not called
        // by TermsEnumTokenStream.  See issue #46
      }
    }
  }
 
Example 6
Source Project: lucene-solr   Source File: DisjunctionMatchesIterator.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
 *
 * Only terms that have at least one match in the given document will be included
 */
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
  Objects.requireNonNull(field);
  Terms t = context.reader().terms(field);
  if (t == null)
    return null;
  TermsEnum te = t.iterator();
  PostingsEnum reuse = null;
  for (BytesRef term = terms.next(); term != null; term = terms.next()) {
    if (te.seekExact(term)) {
      PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
      if (pe.advance(doc) == doc) {
        return new TermsEnumDisjunctionMatchesIterator(new TermMatchesIterator(query, pe), terms, te, doc, query);
      }
      else {
        reuse = pe;
      }
    }
  }
  return null;
}
 
Example 7
Source Project: crate   Source File: Netty4Utils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Turns the given BytesReference into a ByteBuf. Note: the returned ByteBuf will reference the internal
 * pages of the BytesReference. Don't free the bytes of reference before the ByteBuf goes out of scope.
 */
public static ByteBuf toByteBuf(final BytesReference reference) {
    if (reference.length() == 0) {
        return Unpooled.EMPTY_BUFFER;
    }
    if (reference instanceof ByteBufBytesReference) {
        return ((ByteBufBytesReference) reference).toByteBuf();
    } else {
        final BytesRefIterator iterator = reference.iterator();
        // usually we have one, two, or three components from the header, the message, and a buffer
        final List<ByteBuf> buffers = new ArrayList<>(3);
        try {
            BytesRef slice;
            while ((slice = iterator.next()) != null) {
                buffers.add(Unpooled.wrappedBuffer(slice.bytes, slice.offset, slice.length));
            }
            final CompositeByteBuf composite = Unpooled.compositeBuffer(buffers.size());
            composite.addComponents(true, buffers);
            return composite;
        } catch (IOException ex) {
            throw new AssertionError("no IO happens here", ex);
        }
    }
}
 
Example 8
Source Project: crate   Source File: BytesReference.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public int hashCode() {
    if (hash == null) {
        final BytesRefIterator iterator = iterator();
        BytesRef ref;
        int result = 1;
        try {
            while ((ref = iterator.next()) != null) {
                for (int i = 0; i < ref.length; i++) {
                    result = 31 * result + ref.bytes[ref.offset + i];
                }
            }
        } catch (IOException ex) {
            throw new AssertionError("wont happen", ex);
        }
        return hash = result;
    } else {
        return hash.intValue();
    }
}
 
Example 9
Source Project: lucene-solr   Source File: TestHighFrequencyDictionary.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmpty() throws Exception {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  writer.commit();
  writer.close();
  IndexReader ir = DirectoryReader.open(dir);
  Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
  BytesRefIterator tf = dictionary.getEntryIterator();
  assertNull(tf.next());
  dir.close();
}
 
Example 10
Source Project: lucene-solr   Source File: DisjunctionMatchesIterator.java    License: Apache License 2.0 5 votes vote down vote up
private static BytesRefIterator asBytesRefIterator(List<Term> terms) {
  return new BytesRefIterator() {
    int i = 0;
    @Override
    public BytesRef next() {
      if (i >= terms.size())
        return null;
      return terms.get(i++).bytes();
    }
  };
}
 
Example 11
Source Project: lucene-solr   Source File: DisjunctionMatchesIterator.java    License: Apache License 2.0 5 votes vote down vote up
TermsEnumDisjunctionMatchesIterator(MatchesIterator first, BytesRefIterator terms, TermsEnum te, int doc, Query query) {
  this.first = first;
  this.terms = terms;
  this.te = te;
  this.doc = doc;
  this.query = query;
}
 
Example 12
Source Project: crate   Source File: BytesReferenceStreamInput.java    License: Apache License 2.0 5 votes vote down vote up
BytesReferenceStreamInput(BytesRefIterator iterator, final int length) throws IOException {
    this.iterator = iterator;
    this.slice = iterator.next();
    this.length = length;
    this.offset = 0;
    this.sliceIndex = 0;
}
 
Example 13
Source Project: crate   Source File: BytesReference.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Writes the bytes directly to the output stream.
 */
public void writeTo(OutputStream os) throws IOException {
    final BytesRefIterator iterator = iterator();
    BytesRef ref;
    while ((ref = iterator.next()) != null) {
        os.write(ref.bytes, ref.offset, ref.length);
    }
}
 
Example 14
Source Project: crate   Source File: BytesReference.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns a BytesRefIterator for this BytesReference. This method allows
 * access to the internal pages of this reference without copying them. Use with care!
 * @see BytesRefIterator
 */
public BytesRefIterator iterator() {
    return new BytesRefIterator() {
        BytesRef ref = length() == 0 ? null : toBytesRef();
        @Override
        public BytesRef next() throws IOException {
            BytesRef r = ref;
            ref = null; // only return it once...
            return r;
        }
    };
}
 
Example 15
Source Project: crate   Source File: PagedBytesReference.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public final BytesRefIterator iterator() {
    final int offset = this.offset;
    final int length = this.length;
    // this iteration is page aligned to ensure we do NOT materialize the pages from the ByteArray
    // we calculate the initial fragment size here to ensure that if this reference is a slice we are still page aligned
    // across the entire iteration. The first page is smaller if our offset != 0 then we start in the middle of the page
    // otherwise we iterate full pages until we reach the last chunk which also might end within a page.
    final int initialFragmentSize = offset != 0 ? PAGE_SIZE - (offset % PAGE_SIZE) : PAGE_SIZE;
    return new BytesRefIterator() {
        int position = 0;
        int nextFragmentSize = Math.min(length, initialFragmentSize);
        // this BytesRef is reused across the iteration on purpose - BytesRefIterator interface was designed for this
        final BytesRef slice = new BytesRef();

        @Override
        public BytesRef next() throws IOException {
            if (nextFragmentSize != 0) {
                final boolean materialized = byteArray.get(offset + position, nextFragmentSize, slice);
                assert materialized == false : "iteration should be page aligned but array got materialized";
                position += nextFragmentSize;
                final int remaining = length - position;
                nextFragmentSize = Math.min(remaining, PAGE_SIZE);
                return slice;
            } else {
                assert nextFragmentSize == 0 : "fragmentSize expected [0] but was: [" + nextFragmentSize + "]";
                return null; // we are done with this iteration
            }
        }
    };
}
 
Example 16
Source Project: crate   Source File: CompositeBytesReference.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BytesRef toBytesRef() {
    BytesRefBuilder builder = new BytesRefBuilder();
    builder.grow(length());
    BytesRef spare;
    BytesRefIterator iterator = iterator();
    try {
        while ((spare = iterator.next()) != null) {
            builder.append(spare);
        }
    } catch (IOException ex) {
        throw new AssertionError("won't happen", ex); // this is really an error since we don't do IO in our bytesreferences
    }
    return builder.toBytesRef();
}
 
Example 17
protected BaseGeoPointTermsEnum(BytesRefIterator termsEnum) {
    this.termsEnum = termsEnum;
}
 
Example 18
protected GeoPointTermsEnum(BytesRefIterator termsEnum, GeoPointField.TermEncoding termEncoding) {
    super(termsEnum);
    this.termEncoding = termEncoding;
}
 
Example 19
protected GeoPointTermsEnumLegacy(BytesRefIterator termsEnum) {
    super(termsEnum);
    next = new GeoPoint();
    spare = new CharsRefBuilder();
}
 
Example 20
Source Project: lucene-solr   Source File: BytesRefIteratorTokenStream.java    License: Apache License 2.0 4 votes vote down vote up
public BytesRefIterator getBytesRefIterator() {
  return bytesIter;
}
 
Example 21
Source Project: lucene-solr   Source File: BytesRefIteratorTokenStream.java    License: Apache License 2.0 4 votes vote down vote up
public BytesRefIteratorTokenStream setBytesRefIterator(BytesRefIterator iter) {
  this.bytesIter = iter;
  return this;
}
 
Example 22
Source Project: lucene-solr   Source File: InMemorySorter.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public BytesRefIterator iterator() {
  closed = true;
  return buffer.iterator(comparator);
}
 
Example 23
Source Project: lucene-solr   Source File: SpellChecker.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Indexes the data from the given {@link Dictionary}.
 * @param dict Dictionary to index
 * @param config {@link IndexWriterConfig} to use
 * @param fullMerge whether or not the spellcheck index should be fully merged
 * @throws AlreadyClosedException if the Spellchecker is already closed
 * @throws IOException If there is a low-level I/O error.
 */
public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge) throws IOException {
  synchronized (modifyCurrentIndexLock) {
    ensureOpen();
    final Directory dir = this.spellIndex;
    final IndexWriter writer = new IndexWriter(dir, config);
    IndexSearcher indexSearcher = obtainSearcher();
    final List<TermsEnum> termsEnums = new ArrayList<>();

    final IndexReader reader = searcher.getIndexReader();
    if (reader.maxDoc() > 0) {
      for (final LeafReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(F_WORD);
        if (terms != null)
          termsEnums.add(terms.iterator());
      }
    }
    
    boolean isEmpty = termsEnums.isEmpty();

    try { 
      BytesRefIterator iter = dict.getEntryIterator();
      BytesRef currentTerm;
      
      terms: while ((currentTerm = iter.next()) != null) {

        String word = currentTerm.utf8ToString();
        int len = word.length();
        if (len < 3) {
          continue; // too short we bail but "too long" is fine...
        }

        if (!isEmpty) {
          for (TermsEnum te : termsEnums) {
            if (te.seekExact(currentTerm)) {
              continue terms;
            }
          }
        }

        // ok index the word
        Document doc = createDocument(word, getMin(len), getMax(len));
        writer.addDocument(doc);
      }
    } finally {
      releaseSearcher(indexSearcher);
    }
    if (fullMerge) {
      writer.forceMerge(1);
    }
    // close writer
    writer.close();
    // TODO: this isn't that great, maybe in the future SpellChecker should take
    // IWC in its ctor / keep its writer open?
    
    // also re-open the spell index to see our own changes when the next suggestion
    // is fetched:
    swapSearcher(dir);
  }
}
 
Example 24
Source Project: lucene-solr   Source File: BytesRefSorter.java    License: Apache License 2.0 2 votes vote down vote up
/**
* Sorts the entries added in {@link #add(BytesRef)} and returns 
* an iterator over all sorted entries.
* 
* @throws IOException If an I/O exception occurs.
*/
BytesRefIterator iterator() throws IOException;
 
Example 25
Source Project: lucene-solr   Source File: InputIterator.java    License: Apache License 2.0 2 votes vote down vote up
/** 
 * Creates a new wrapper, wrapping the specified iterator and 
 * specifying a weight value of <code>1</code> for all terms 
 * and nullifies associated payloads.
 */
public InputIteratorWrapper(BytesRefIterator wrapped) {
  this.wrapped = wrapped;
}
 
Example 26
Source Project: lucene-solr   Source File: TermsEnumTokenStream.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Create a new TermsEnumTokenStream using a TermsEnum
 *
 * @param termsEnum the TermsEnum to convert
 */
public TermsEnumTokenStream(BytesRefIterator termsEnum) {
  this.termsEnum = termsEnum;
}
 
Example 27
Source Project: lucene-solr   Source File: MatchesUtils.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Create a MatchesIterator that is a disjunction over a list of terms extracted from a {@link BytesRefIterator}.
 *
 * Only terms that have at least one match in the given document will be included
 */
public static MatchesIterator disjunction(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
  return DisjunctionMatchesIterator.fromTermsEnum(context, doc, query, field, terms);
}