Java Code Examples for org.apache.lucene.store.ByteArrayDataInput#reset()

The following examples show how to use org.apache.lucene.store.ByteArrayDataInput#reset() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: SortedInputIterator.java From lucene-solr with Apache License 2.0

6 votes

/** decodes the contexts at the current position */
protected Set<BytesRef> decodeContexts(BytesRef scratch, ByteArrayDataInput tmpInput) {
  tmpInput.reset(scratch.bytes, scratch.offset, scratch.length);
  tmpInput.skipBytes(scratch.length - 2); //skip to context set size
  short ctxSetSize = tmpInput.readShort();
  scratch.length -= 2;
  final Set<BytesRef> contextSet = new HashSet<>();
  for (short i = 0; i < ctxSetSize; i++) {
    tmpInput.setPosition(scratch.offset + scratch.length - 2);
    short curContextLength = tmpInput.readShort();
    scratch.length -= 2;
    tmpInput.setPosition(scratch.offset + scratch.length - curContextLength);
    BytesRef contextSpare = new BytesRef(curContextLength);
    tmpInput.readBytes(contextSpare.bytes, 0, curContextLength);
    contextSpare.length = curContextLength;
    contextSet.add(contextSpare);
    scratch.length -= curContextLength;
  }
  return contextSet;
}

Example 2

Source File: WFSTCompletionLookup.java From lucene-solr with Apache License 2.0

5 votes

@Override
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
  scratch.length -= 4; // int
  // skip suggestion:
  tmpInput.reset(scratch.bytes, scratch.offset+scratch.length, 4);
  return tmpInput.readInt();
}

Example 3

Source File: SortedInputIterator.java From lucene-solr with Apache License 2.0

5 votes

/** decodes the weight at the current position */
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
  tmpInput.reset(scratch.bytes, scratch.offset, scratch.length);
  tmpInput.skipBytes(scratch.length - 8); // suggestion
  scratch.length -= Long.BYTES; // long
  return tmpInput.readLong();
}

Example 4

Source File: BlockTermsReader.java From lucene-solr with Apache License 2.0

4 votes

private boolean nextBlock() throws IOException {

        // TODO: we still lazy-decode the byte[] for each
        // term (the suffix), but, if we decoded
        // all N terms up front then seeking could do a fast
        // bsearch w/in the block...

        //System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this);
        state.blockFilePointer = in.getFilePointer();
        blockTermCount = in.readVInt();
        //System.out.println("  blockTermCount=" + blockTermCount);
        if (blockTermCount == 0) {
          return false;
        }
        termBlockPrefix = in.readVInt();

        // term suffixes:
        int len = in.readVInt();
        if (termSuffixes.length < len) {
          termSuffixes = new byte[ArrayUtil.oversize(len, 1)];
        }
        //System.out.println("  termSuffixes len=" + len);
        in.readBytes(termSuffixes, 0, len);
        termSuffixesReader.reset(termSuffixes, 0, len);

        // docFreq, totalTermFreq
        len = in.readVInt();
        if (docFreqBytes.length < len) {
          docFreqBytes = new byte[ArrayUtil.oversize(len, 1)];
        }
        //System.out.println("  freq bytes len=" + len);
        in.readBytes(docFreqBytes, 0, len);
        freqReader.reset(docFreqBytes, 0, len);

        // metadata
        len = in.readVInt();
        if (bytes == null) {
          bytes = new byte[ArrayUtil.oversize(len, 1)];
          bytesReader = new ByteArrayDataInput();
        } else if (bytes.length < len) {
          bytes = new byte[ArrayUtil.oversize(len, 1)];
        }
        in.readBytes(bytes, 0, len);
        bytesReader.reset(bytes, 0, len);

        metaDataUpto = 0;
        state.termBlockOrd = 0;

        indexIsCurrent = false;
        //System.out.println("  indexIsCurrent=" + indexIsCurrent);

        return true;
      }

Example 5

Source File: FSTCompletionLookup.java From lucene-solr with Apache License 2.0

4 votes

@Override
public void build(InputIterator iterator) throws IOException {
  if (iterator.hasPayloads()) {
    throw new IllegalArgumentException("this suggester doesn't support payloads");
  }
  if (iterator.hasContexts()) {
    throw new IllegalArgumentException("this suggester doesn't support contexts");
  }

  OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix);
  ExternalRefSorter externalSorter = new ExternalRefSorter(sorter);
  IndexOutput tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT);
  String tempSortedFileName = null;

  OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
  OfflineSorter.ByteSequencesReader reader = null;

  // Push floats up front before sequences to sort them. For now, assume they are non-negative.
  // If negative floats are allowed some trickery needs to be done to find their byte order.
  count = 0;
  try {
    byte [] buffer = new byte [0];
    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
    BytesRef spare;
    int inputLineCount = 0;
    while ((spare = iterator.next()) != null) {
      if (spare.length + 4 >= buffer.length) {
        buffer = ArrayUtil.grow(buffer, spare.length + 4);
      }

      output.reset(buffer);
      output.writeInt(encodeWeight(iterator.weight()));
      output.writeBytes(spare.bytes, spare.offset, spare.length);
      writer.write(buffer, 0, output.getPosition());
      inputLineCount++;
    }
    CodecUtil.writeFooter(tempInput);
    writer.close();

    // We don't know the distribution of scores and we need to bucket them, so we'll sort
    // and divide into equal buckets.
    tempSortedFileName = sorter.sort(tempInput.getName());
    tempDir.deleteFile(tempInput.getName());

    FSTCompletionBuilder builder = new FSTCompletionBuilder(
        buckets, externalSorter, sharedTailLength);

    reader = new OfflineSorter.ByteSequencesReader(tempDir.openChecksumInput(tempSortedFileName, IOContext.READONCE), tempSortedFileName);
    long line = 0;
    int previousBucket = 0;
    int previousScore = 0;
    ByteArrayDataInput input = new ByteArrayDataInput();
    BytesRef tmp2 = new BytesRef();
    while (true) {
      BytesRef scratch = reader.next();
      if (scratch == null) {
        break;
      }
      input.reset(scratch.bytes, scratch.offset, scratch.length);
      int currentScore = input.readInt();

      int bucket;
      if (line > 0 && currentScore == previousScore) {
        bucket = previousBucket;
      } else {
        bucket = (int) (line * buckets / inputLineCount);
      }
      previousScore = currentScore;
      previousBucket = bucket;

      // Only append the input, discard the weight.
      tmp2.bytes = scratch.bytes;
      tmp2.offset = scratch.offset + input.getPosition();
      tmp2.length = scratch.length - input.getPosition();
      builder.add(tmp2, bucket);

      line++;
      count++;
    }

    // The two FSTCompletions share the same automaton.
    this.higherWeightsCompletion = builder.build();
    this.normalCompletion = new FSTCompletion(
        higherWeightsCompletion.getFST(), false, exactMatchFirst);
    
  } finally {
    IOUtils.closeWhileHandlingException(reader, writer, externalSorter);
    IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName);
  }
}

Example 6

Source File: Test2BBinaryDocValues.java From lucene-solr with Apache License 2.0

4 votes

public void testVariableBinary() throws Exception {
  BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BVariableBinary"));
  if (dir instanceof MockDirectoryWrapper) {
    ((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
  }
  
  IndexWriter w = new IndexWriter(dir,
      new IndexWriterConfig(new MockAnalyzer(random()))
      .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
      .setRAMBufferSizeMB(256.0)
      .setMergeScheduler(new ConcurrentMergeScheduler())
      .setMergePolicy(newLogMergePolicy(false, 10))
      .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
      .setCodec(TestUtil.getDefaultCodec()));

  Document doc = new Document();
  byte bytes[] = new byte[4];
  ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes);
  BytesRef data = new BytesRef(bytes);
  BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
  doc.add(dvField);
  
  for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
    encoder.reset(bytes);
    encoder.writeVInt(i % 65535); // 1, 2, or 3 bytes
    data.length = encoder.getPosition();
    w.addDocument(doc);
    if (i % 100000 == 0) {
      System.out.println("indexed: " + i);
      System.out.flush();
    }
  }
  
  w.forceMerge(1);
  w.close();
  
  System.out.println("verifying...");
  System.out.flush();
  
  DirectoryReader r = DirectoryReader.open(dir);
  int expectedValue = 0;
  ByteArrayDataInput input = new ByteArrayDataInput();
  for (LeafReaderContext context : r.leaves()) {
    LeafReader reader = context.reader();
    BinaryDocValues dv = reader.getBinaryDocValues("dv");
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertEquals(i, dv.nextDoc());
      final BytesRef term = dv.binaryValue();
      input.reset(term.bytes, term.offset, term.length);
      assertEquals(expectedValue % 65535, input.readVInt());
      assertTrue(input.eof());
      expectedValue++;
    }
  }
  
  r.close();
  dir.close();
}