Java Code Examples for org.apache.lucene.util.BytesRefIterator#next()

The following examples show how to use org.apache.lucene.util.BytesRefIterator#next() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
/**
 * Builds the final automaton from a list of entries.
 */
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
  // Build the automaton.
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  final Object empty = outputs.getNoOutput();
  final FSTCompiler<Object> fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
      .shareMaxTailLength(shareMaxTailLength).build();

  BytesRefBuilder scratch = new BytesRefBuilder();
  BytesRef entry;
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  int count = 0;
  BytesRefIterator iter = sorter.iterator();
  while((entry = iter.next()) != null) {
    count++;
    if (scratch.get().compareTo(entry) != 0) {
      fstCompiler.add(Util.toIntsRef(entry, scratchIntsRef), empty);
      scratch.copyBytes(entry);
    }
  }
  
  return count == 0 ? null : fstCompiler.compile();
}
 
Example 2
Source Project: lucene-solr   File: BytesRefSortersTest.java    License: Apache License 2.0 6 votes vote down vote up
private void check(BytesRefSorter sorter) throws Exception {
  for (int i = 0; i < 100; i++) {
    byte [] current = new byte [random().nextInt(256)];
    random().nextBytes(current);
    sorter.add(new BytesRef(current));
  }

  // Create two iterators and check that they're aligned with each other.
  BytesRefIterator i1 = sorter.iterator();
  BytesRefIterator i2 = sorter.iterator();
  
  // Verify sorter contract.
  expectThrows(IllegalStateException.class, () -> {
    sorter.add(new BytesRef(new byte [1]));
  });

  while (true) {
    BytesRef spare1 = i1.next();
    BytesRef spare2 = i2.next();
    assertEquals(spare1, spare2);
    if (spare1 == null) {
      break;
    }
  }
}
 
Example 3
/**
 * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
 *
 * Only terms that have at least one match in the given document will be included
 */
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
  Objects.requireNonNull(field);
  Terms t = context.reader().terms(field);
  if (t == null)
    return null;
  TermsEnum te = t.iterator();
  PostingsEnum reuse = null;
  for (BytesRef term = terms.next(); term != null; term = terms.next()) {
    if (te.seekExact(term)) {
      PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
      if (pe.advance(doc) == doc) {
        return new TermsEnumDisjunctionMatchesIterator(new TermMatchesIterator(query, pe), terms, te, doc, query);
      }
      else {
        reuse = pe;
      }
    }
  }
  return null;
}
 
Example 4
Source Project: crate   File: Netty4Utils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Turns the given BytesReference into a ByteBuf. Note: the returned ByteBuf will reference the internal
 * pages of the BytesReference. Don't free the bytes of reference before the ByteBuf goes out of scope.
 */
public static ByteBuf toByteBuf(final BytesReference reference) {
    if (reference.length() == 0) {
        return Unpooled.EMPTY_BUFFER;
    }
    if (reference instanceof ByteBufBytesReference) {
        return ((ByteBufBytesReference) reference).toByteBuf();
    } else {
        final BytesRefIterator iterator = reference.iterator();
        // usually we have one, two, or three components from the header, the message, and a buffer
        final List<ByteBuf> buffers = new ArrayList<>(3);
        try {
            BytesRef slice;
            while ((slice = iterator.next()) != null) {
                buffers.add(Unpooled.wrappedBuffer(slice.bytes, slice.offset, slice.length));
            }
            final CompositeByteBuf composite = Unpooled.compositeBuffer(buffers.size());
            composite.addComponents(true, buffers);
            return composite;
        } catch (IOException ex) {
            throw new AssertionError("no IO happens here", ex);
        }
    }
}
 
Example 5
Source Project: crate   File: BytesReference.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public int hashCode() {
    if (hash == null) {
        final BytesRefIterator iterator = iterator();
        BytesRef ref;
        int result = 1;
        try {
            while ((ref = iterator.next()) != null) {
                for (int i = 0; i < ref.length; i++) {
                    result = 31 * result + ref.bytes[ref.offset + i];
                }
            }
        } catch (IOException ex) {
            throw new AssertionError("wont happen", ex);
        }
        return hash = result;
    } else {
        return hash.intValue();
    }
}
 
Example 6
Source Project: crate   File: BytesReferenceStreamInput.java    License: Apache License 2.0 5 votes vote down vote up
BytesReferenceStreamInput(BytesRefIterator iterator, final int length) throws IOException {
    this.iterator = iterator;
    this.slice = iterator.next();
    this.length = length;
    this.offset = 0;
    this.sliceIndex = 0;
}
 
Example 7
Source Project: crate   File: BytesReference.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Writes the bytes directly to the output stream.
 */
public void writeTo(OutputStream os) throws IOException {
    final BytesRefIterator iterator = iterator();
    BytesRef ref;
    while ((ref = iterator.next()) != null) {
        os.write(ref.bytes, ref.offset, ref.length);
    }
}
 
Example 8
Source Project: crate   File: CompositeBytesReference.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BytesRef toBytesRef() {
    BytesRefBuilder builder = new BytesRefBuilder();
    builder.grow(length());
    BytesRef spare;
    BytesRefIterator iterator = iterator();
    try {
        while ((spare = iterator.next()) != null) {
            builder.append(spare);
        }
    } catch (IOException ex) {
        throw new AssertionError("won't happen", ex); // this is really an error since we don't do IO in our bytesreferences
    }
    return builder.toBytesRef();
}
 
Example 9
Source Project: lucene-solr   File: SpellChecker.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Indexes the data from the given {@link Dictionary}.
 * @param dict Dictionary to index
 * @param config {@link IndexWriterConfig} to use
 * @param fullMerge whether or not the spellcheck index should be fully merged
 * @throws AlreadyClosedException if the Spellchecker is already closed
 * @throws IOException If there is a low-level I/O error.
 */
public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge) throws IOException {
  synchronized (modifyCurrentIndexLock) {
    ensureOpen();
    final Directory dir = this.spellIndex;
    final IndexWriter writer = new IndexWriter(dir, config);
    IndexSearcher indexSearcher = obtainSearcher();
    final List<TermsEnum> termsEnums = new ArrayList<>();

    final IndexReader reader = searcher.getIndexReader();
    if (reader.maxDoc() > 0) {
      for (final LeafReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(F_WORD);
        if (terms != null)
          termsEnums.add(terms.iterator());
      }
    }
    
    boolean isEmpty = termsEnums.isEmpty();

    try { 
      BytesRefIterator iter = dict.getEntryIterator();
      BytesRef currentTerm;
      
      terms: while ((currentTerm = iter.next()) != null) {

        String word = currentTerm.utf8ToString();
        int len = word.length();
        if (len < 3) {
          continue; // too short we bail but "too long" is fine...
        }

        if (!isEmpty) {
          for (TermsEnum te : termsEnums) {
            if (te.seekExact(currentTerm)) {
              continue terms;
            }
          }
        }

        // ok index the word
        Document doc = createDocument(word, getMin(len), getMax(len));
        writer.addDocument(doc);
      }
    } finally {
      releaseSearcher(indexSearcher);
    }
    if (fullMerge) {
      writer.forceMerge(1);
    }
    // close writer
    writer.close();
    // TODO: this isn't that great, maybe in the future SpellChecker should take
    // IWC in its ctor / keep its writer open?
    
    // also re-open the spell index to see our own changes when the next suggestion
    // is fetched:
    swapSearcher(dir);
  }
}