org.apache.lucene.store.ByteArrayDataInput Java Examples

The following examples show how to use org.apache.lucene.store.ByteArrayDataInput. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrdsSegmentTermsEnumFrame.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
  final int numBytes = source.length - (in.getPosition() - source.offset);
  assert numBytes > 0;
  if (numBytes > floorData.length) {
    floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
  }
  System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
  floorDataReader.reset(floorData, 0, numBytes);
  numFollowFloorBlocks = floorDataReader.readVInt();
  nextFloorLabel = floorDataReader.readByte() & 0xff;
  nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong();
  // System.out.println("  setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + " shift=" + (nextFloorTermOrd-termOrdOrig));

  //if (DEBUG) {
  //System.out.println("    setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
  //}
}
 
Example #2
Source File: TestBlockPostingsFormat.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void doTestImpactSerialization(List<Impact> impacts) throws IOException {
  CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
  for (Impact impact : impacts) {
    acc.add(impact.freq, impact.norm);
  }
  try(Directory dir = newDirectory()) {
    try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
      Lucene50SkipWriter.writeImpacts(acc, out);
    }
    try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
      byte[] b = new byte[Math.toIntExact(in.length())];
      in.readBytes(b, 0, b.length);
      List<Impact> impacts2 = Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new MutableImpactList());
      assertEquals(impacts, impacts2);
    }
  }
}
 
Example #3
Source File: TestFSTDirectAddressing.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static void countFSTArcs(String fstFilePath) throws IOException {
  byte[] buf = Files.readAllBytes(Paths.get(fstFilePath));
  DataInput in = new ByteArrayDataInput(buf);
  FST<BytesRef> fst = new FST<>(in, in, ByteSequenceOutputs.getSingleton());
  BytesRefFSTEnum<BytesRef> fstEnum = new BytesRefFSTEnum<>(fst);
  int binarySearchArcCount = 0, directAddressingArcCount = 0, listArcCount = 0;
  while(fstEnum.next() != null) {
    if (fstEnum.arcs[fstEnum.upto].bytesPerArc() == 0) {
      listArcCount ++;
    } else if (fstEnum.arcs[fstEnum.upto].nodeFlags() == FST.ARCS_FOR_DIRECT_ADDRESSING) {
      directAddressingArcCount ++;
    } else {
      binarySearchArcCount ++;
    }
  }
  System.out.println("direct addressing arcs = " + directAddressingArcCount
      + ", binary search arcs = " + binarySearchArcCount
      + " list arcs = " + listArcCount);
}
 
Example #4
Source File: TestLucene84PostingsFormat.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void doTestImpactSerialization(List<Impact> impacts) throws IOException {
  CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
  for (Impact impact : impacts) {
    acc.add(impact.freq, impact.norm);
  }
  try(Directory dir = newDirectory()) {
    try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
      Lucene84SkipWriter.writeImpacts(acc, out);
    }
    try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
      byte[] b = new byte[Math.toIntExact(in.length())];
      in.readBytes(b, 0, b.length);
      List<Impact> impacts2 = Lucene84ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new MutableImpactList());
      assertEquals(impacts, impacts2);
    }
  }
}
 
Example #5
Source File: CompressingTermVectorsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
void reset(int numTerms, int flags, int[] prefixLengths, int[] suffixLengths, int[] termFreqs, int[] positionIndex, int[] positions, int[] startOffsets, int[] lengths,
    int[] payloadIndex, BytesRef payloads, ByteArrayDataInput in) {
  this.numTerms = numTerms;
  this.prefixLengths = prefixLengths;
  this.suffixLengths = suffixLengths;
  this.termFreqs = termFreqs;
  this.positionIndex = positionIndex;
  this.positions = positions;
  this.startOffsets = startOffsets;
  this.lengths = lengths;
  this.payloadIndex = payloadIndex;
  this.payloads = payloads;
  this.in = in;
  startPos = in.getPosition();
  reset();
}
 
Example #6
Source File: SortedInputIterator.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** decodes the contexts at the current position */
protected Set<BytesRef> decodeContexts(BytesRef scratch, ByteArrayDataInput tmpInput) {
  tmpInput.reset(scratch.bytes, scratch.offset, scratch.length);
  tmpInput.skipBytes(scratch.length - 2); //skip to context set size
  short ctxSetSize = tmpInput.readShort();
  scratch.length -= 2;
  final Set<BytesRef> contextSet = new HashSet<>();
  for (short i = 0; i < ctxSetSize; i++) {
    tmpInput.setPosition(scratch.offset + scratch.length - 2);
    short curContextLength = tmpInput.readShort();
    scratch.length -= 2;
    tmpInput.setPosition(scratch.offset + scratch.length - curContextLength);
    BytesRef contextSpare = new BytesRef(curContextLength);
    tmpInput.readBytes(contextSpare.bytes, 0, curContextLength);
    contextSpare.length = curContextLength;
    contextSet.add(contextSpare);
    scratch.length -= curContextLength;
  }
  return contextSet;
}
 
Example #7
Source File: Stemmer.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Constructs a new Stemmer which will use the provided Dictionary to create its stems.
 *
 * @param dictionary Dictionary that will be used to create the stems
 */
public Stemmer(Dictionary dictionary) {
  this.dictionary = dictionary;
  this.affixReader = new ByteArrayDataInput(dictionary.affixData);
  for (int level = 0; level < 3; level++) {
    if (dictionary.prefixes != null) {
      prefixArcs[level] = new FST.Arc<>();
      prefixReaders[level] = dictionary.prefixes.getBytesReader();
    }
    if (dictionary.suffixes != null) {
      suffixArcs[level] = new FST.Arc<>();
      suffixReaders[level] = dictionary.suffixes.getBytesReader();
    }
  }
  formStep = dictionary.hasStemExceptions ? 2 : 1;
}
 
Example #8
Source File: SegmentTermsEnumFrame.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public SegmentTermsEnumFrame(SegmentTermsEnum ste, int ord) throws IOException {
  this.ste = ste;
  this.ord = ord;
  this.state = ste.fr.parent.postingsReader.newTermState();
  this.state.totalTermFreq = -1;
  this.version = ste.fr.parent.version;
  if (version >= BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) {
    suffixLengthBytes = new byte[32];
    suffixLengthsReader = new ByteArrayDataInput();
  } else {
    suffixLengthBytes = null;
    suffixLengthsReader = suffixesReader;
  }
}
 
Example #9
Source File: BlockTreeTermsReader.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
  final int numBytes = source.length - (in.getPosition() - source.offset);
  if (numBytes > floorData.length) {
    floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
  }
  System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
  floorDataReader.reset(floorData, 0, numBytes);
  numFollowFloorBlocks = floorDataReader.readVInt();
  nextFloorLabel = floorDataReader.readByte() & 0xff;
  //if (DEBUG) {
  //System.out.println("    setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
  //}
}
 
Example #10
Source File: CompressingTermVectorsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public TermsEnum iterator() throws IOException {
  TVTermsEnum termsEnum = new TVTermsEnum();
  termsEnum.reset(numTerms, flags, prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths,
      payloadIndex, payloadBytes,
      new ByteArrayDataInput(termBytes.bytes, termBytes.offset, termBytes.length));
  return termsEnum;
}
 
Example #11
Source File: IntersectTermsEnumFrame.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public IntersectTermsEnumFrame(IntersectTermsEnum ite, int ord) throws IOException {
  this.ite = ite;
  this.ord = ord;
  this.termState = ite.fr.parent.postingsReader.newTermState();
  this.termState.totalTermFreq = -1;
  this.version = ite.fr.parent.version;
  if (version >= BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) {
    suffixLengthBytes = new byte[32];
    suffixLengthsReader = new ByteArrayDataInput();
  } else {
    suffixLengthBytes = null;
    suffixLengthsReader = suffixesReader;
  }
}
 
Example #12
Source File: SegmentTermsEnumFrame.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
  final int numBytes = source.length - (in.getPosition() - source.offset);
  if (numBytes > floorData.length) {
    floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
  }
  System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
  floorDataReader.reset(floorData, 0, numBytes);
  numFollowFloorBlocks = floorDataReader.readVInt();
  nextFloorLabel = floorDataReader.readByte() & 0xff;
  //if (DEBUG) {
  //System.out.println("    setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
  //}
}
 
Example #13
Source File: BaseSynonymParserTestCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to validate synonym parsing.
 *
 * @param synonynMap  the generated synonym map after parsing
 * @param word        word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
 *                    All spaces will be replaced by word separators.
 * @param includeOrig if synonyms should include original
 * @param synonyms    actual synonyms. All word separators are replaced with a single space.
 */
public static void assertEntryEquals(SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms)
    throws Exception {
  word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
  BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
  assertNotNull("No synonyms found for: " + word, value);

  ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length);
  final int code = bytesReader.readVInt();

  final boolean keepOrig = (code & 0x1) == 0;
  assertEquals("Include original different than expected. Expected " + includeOrig + " was " + keepOrig,
      includeOrig, keepOrig);

  final int count = code >>> 1;
  assertEquals("Invalid synonym count. Expected " + synonyms.length + " was " + count,
      synonyms.length, count);

  Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms));

  BytesRef scratchBytes = new BytesRef();
  for (int i = 0; i < count; i++) {
    synonynMap.words.get(bytesReader.readVInt(), scratchBytes);
    String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' ');
    assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym));
  }
}
 
Example #14
Source File: IDVersionSegmentTermsEnumFrame.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
  final int numBytes = source.length - (in.getPosition() - source.offset);
  if (numBytes > floorData.length) {
    floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
  }
  System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
  floorDataReader.reset(floorData, 0, numBytes);
  numFollowFloorBlocks = floorDataReader.readVInt();
  nextFloorLabel = floorDataReader.readByte() & 0xff;
  //if (DEBUG) {
  //System.out.println("    setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
  //}
}
 
Example #15
Source File: SortedInputIterator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** decodes the payload at the current position */
protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) {
  tmpInput.reset(scratch.bytes, scratch.offset, scratch.length);
  tmpInput.skipBytes(scratch.length - 2); // skip to payload size
  short payloadLength = tmpInput.readShort(); // read payload size
  assert payloadLength >= 0: payloadLength;
  tmpInput.setPosition(scratch.offset + scratch.length - 2 - payloadLength); // setPosition to start of payload
  BytesRef payloadScratch = new BytesRef(payloadLength); 
  tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload
  payloadScratch.length = payloadLength;
  scratch.length -= 2; // payload length info (short)
  scratch.length -= payloadLength; // payload
  return payloadScratch;
}
 
Example #16
Source File: SortedInputIterator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** decodes the weight at the current position */
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
  tmpInput.reset(scratch.bytes, scratch.offset, scratch.length);
  tmpInput.skipBytes(scratch.length - 8); // suggestion
  scratch.length -= Long.BYTES; // long
  return tmpInput.readLong();
}
 
Example #17
Source File: SortedInputIterator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public BytesRef next() throws IOException {
  boolean success = false;
  if (done) {
    return null;
  }
  try {
    ByteArrayDataInput input = new ByteArrayDataInput();
    BytesRef bytes = reader.next();
    if (bytes != null) {
      weight = decode(bytes, input);
      if (hasPayloads) {
        payload = decodePayload(bytes, input);
      }
      if (hasContexts) {
        contexts = decodeContexts(bytes, input);
      }
      success = true;
      return bytes;
    }
    close();
    success = done = true;
    return null;
  } finally {
    if (!success) {
      done = true;
      close();
    }
  }
}
 
Example #18
Source File: WFSTCompletionLookup.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
  scratch.length -= 4; // int
  // skip suggestion:
  tmpInput.reset(scratch.bytes, scratch.offset+scratch.length, 4);
  return tmpInput.readInt();
}
 
Example #19
Source File: BlockReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected void initializeBlockReadLazily() throws IOException {
  if (blockStartFP == -1) {
    blockInput = blockInput.clone();
    blockHeaderReader = createBlockHeaderSerializer();
    blockLineReader = createBlockLineSerializer();
    blockReadBuffer = new ByteArrayDataInput();
    termStatesReadBuffer = new ByteArrayDataInput();
    termStateSerializer = createDeltaBaseTermStateSerializer();
    scratchBlockBytes = new BytesRef();
    scratchBlockLine = new BlockLine(new TermBytes(0, scratchBlockBytes), 0);
  }
}
 
Example #20
Source File: UniformSplitTermsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected Collection<FieldMetadata> readEncodedFieldsMetadata(int numFields, DataInput metadataInput, BlockDecoder blockDecoder,
                                                              FieldInfos fieldInfos, FieldMetadata.Serializer fieldMetadataReader,
                                                              int maxNumDocs) throws IOException {
  long encodedLength = metadataInput.readVLong();
  if (encodedLength < 0) {
    throw new CorruptIndexException("Illegal encoded length: " + encodedLength, metadataInput);
  }
  BytesRef decodedBytes = blockDecoder.decode(metadataInput, encodedLength);
  DataInput decodedMetadataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length);
  return readUnencodedFieldsMetadata(numFields, decodedMetadataInput, fieldInfos, fieldMetadataReader, maxNumDocs);
}
 
Example #21
Source File: FSTTermsReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
BaseTermsEnum() throws IOException {
  this.state = postingsReader.newTermState();
  this.bytesReader = new ByteArrayDataInput();
  // NOTE: metadata will only be initialized in child class
}
 
Example #22
Source File: AbstractTestCompressionMode.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
static byte[] decompress(Decompressor decompressor, byte[] compressed, int originalLength) throws IOException {
  final BytesRef bytes = new BytesRef();
  decompressor.decompress(new ByteArrayDataInput(compressed), originalLength, 0, originalLength, bytes);
  return BytesRef.deepCopyOf(bytes).bytes;
}
 
Example #23
Source File: AbstractTestCompressionMode.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
byte[] decompress(byte[] compressed, int originalLength, int offset, int length) throws IOException {
  Decompressor decompressor = mode.newDecompressor();
  final BytesRef bytes = new BytesRef();
  decompressor.decompress(new ByteArrayDataInput(compressed), originalLength, offset, length, bytes);
  return BytesRef.deepCopyOf(bytes).bytes;
}
 
Example #24
Source File: FSTCompletionLookup.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void build(InputIterator iterator) throws IOException {
  if (iterator.hasPayloads()) {
    throw new IllegalArgumentException("this suggester doesn't support payloads");
  }
  if (iterator.hasContexts()) {
    throw new IllegalArgumentException("this suggester doesn't support contexts");
  }

  OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix);
  ExternalRefSorter externalSorter = new ExternalRefSorter(sorter);
  IndexOutput tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT);
  String tempSortedFileName = null;

  OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
  OfflineSorter.ByteSequencesReader reader = null;

  // Push floats up front before sequences to sort them. For now, assume they are non-negative.
  // If negative floats are allowed some trickery needs to be done to find their byte order.
  count = 0;
  try {
    byte [] buffer = new byte [0];
    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
    BytesRef spare;
    int inputLineCount = 0;
    while ((spare = iterator.next()) != null) {
      if (spare.length + 4 >= buffer.length) {
        buffer = ArrayUtil.grow(buffer, spare.length + 4);
      }

      output.reset(buffer);
      output.writeInt(encodeWeight(iterator.weight()));
      output.writeBytes(spare.bytes, spare.offset, spare.length);
      writer.write(buffer, 0, output.getPosition());
      inputLineCount++;
    }
    CodecUtil.writeFooter(tempInput);
    writer.close();

    // We don't know the distribution of scores and we need to bucket them, so we'll sort
    // and divide into equal buckets.
    tempSortedFileName = sorter.sort(tempInput.getName());
    tempDir.deleteFile(tempInput.getName());

    FSTCompletionBuilder builder = new FSTCompletionBuilder(
        buckets, externalSorter, sharedTailLength);

    reader = new OfflineSorter.ByteSequencesReader(tempDir.openChecksumInput(tempSortedFileName, IOContext.READONCE), tempSortedFileName);
    long line = 0;
    int previousBucket = 0;
    int previousScore = 0;
    ByteArrayDataInput input = new ByteArrayDataInput();
    BytesRef tmp2 = new BytesRef();
    while (true) {
      BytesRef scratch = reader.next();
      if (scratch == null) {
        break;
      }
      input.reset(scratch.bytes, scratch.offset, scratch.length);
      int currentScore = input.readInt();

      int bucket;
      if (line > 0 && currentScore == previousScore) {
        bucket = previousBucket;
      } else {
        bucket = (int) (line * buckets / inputLineCount);
      }
      previousScore = currentScore;
      previousBucket = bucket;

      // Only append the input, discard the weight.
      tmp2.bytes = scratch.bytes;
      tmp2.offset = scratch.offset + input.getPosition();
      tmp2.length = scratch.length - input.getPosition();
      builder.add(tmp2, bucket);

      line++;
      count++;
    }

    // The two FSTCompletions share the same automaton.
    this.higherWeightsCompletion = builder.build();
    this.normalCompletion = new FSTCompletion(
        higherWeightsCompletion.getFST(), false, exactMatchFirst);
    
  } finally {
    IOUtils.closeWhileHandlingException(reader, writer, externalSorter);
    IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName);
  }
}
 
Example #25
Source File: Test2BBinaryDocValues.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testVariableBinary() throws Exception {
  BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BVariableBinary"));
  if (dir instanceof MockDirectoryWrapper) {
    ((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
  }
  
  IndexWriter w = new IndexWriter(dir,
      new IndexWriterConfig(new MockAnalyzer(random()))
      .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
      .setRAMBufferSizeMB(256.0)
      .setMergeScheduler(new ConcurrentMergeScheduler())
      .setMergePolicy(newLogMergePolicy(false, 10))
      .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
      .setCodec(TestUtil.getDefaultCodec()));

  Document doc = new Document();
  byte bytes[] = new byte[4];
  ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes);
  BytesRef data = new BytesRef(bytes);
  BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
  doc.add(dvField);
  
  for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
    encoder.reset(bytes);
    encoder.writeVInt(i % 65535); // 1, 2, or 3 bytes
    data.length = encoder.getPosition();
    w.addDocument(doc);
    if (i % 100000 == 0) {
      System.out.println("indexed: " + i);
      System.out.flush();
    }
  }
  
  w.forceMerge(1);
  w.close();
  
  System.out.println("verifying...");
  System.out.flush();
  
  DirectoryReader r = DirectoryReader.open(dir);
  int expectedValue = 0;
  ByteArrayDataInput input = new ByteArrayDataInput();
  for (LeafReaderContext context : r.leaves()) {
    LeafReader reader = context.reader();
    BinaryDocValues dv = reader.getBinaryDocValues("dv");
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertEquals(i, dv.nextDoc());
      final BytesRef term = dv.binaryValue();
      input.reset(term.bytes, term.offset, term.length);
      assertEquals(expectedValue % 65535, input.readVInt());
      assertTrue(input.eof());
      expectedValue++;
    }
  }
  
  r.close();
  dir.close();
}
 
Example #26
Source File: LZ4TestCase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void doTest(byte[] data, int offset, int length, LZ4.HashTable hashTable) throws IOException {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  LZ4.compress(data, offset, length, out, hashTable);
  byte[] compressed = out.toArrayCopy();

  int off = 0;
  int decompressedOff = 0;
  for (;;) {
    final int token = compressed[off++] & 0xFF;
    int literalLen = token >>> 4;
    if (literalLen == 0x0F) {
      while (compressed[off] == (byte) 0xFF) {
        literalLen += 0xFF;
        ++off;
      }
      literalLen += compressed[off++] & 0xFF;
    }
    // skip literals
    off += literalLen;
    decompressedOff += literalLen;

    // check that the stream ends with literals and that there are at least
    // 5 of them
    if (off == compressed.length) {
      assertEquals(length, decompressedOff);
      assertTrue("lastLiterals=" + literalLen + ", bytes=" + length,
          literalLen >= LZ4.LAST_LITERALS || literalLen == length);
      break;
    }

    final int matchDec = (compressed[off++] & 0xFF) | ((compressed[off++] & 0xFF) << 8);
    // check that match dec is not 0
    assertTrue(matchDec + " " + decompressedOff, matchDec > 0 && matchDec <= decompressedOff);

    int matchLen = token & 0x0F;
    if (matchLen == 0x0F) {
      while (compressed[off] == (byte) 0xFF) {
        matchLen += 0xFF;
        ++off;
      }
      matchLen += compressed[off++] & 0xFF;
    }
    matchLen += LZ4.MIN_MATCH;

    // if the match ends prematurely, the next sequence should not have
    // literals or this means we are wasting space
    if (decompressedOff + matchLen < length - LZ4.LAST_LITERALS) {
      final boolean moreCommonBytes = data[offset + decompressedOff + matchLen] == data[offset + decompressedOff - matchDec + matchLen];
      final boolean nextSequenceHasLiterals = ((compressed[off] & 0xFF) >>> 4) != 0;
      assertTrue(moreCommonBytes == false || nextSequenceHasLiterals == false);
    }      

    decompressedOff += matchLen;
  }
  assertEquals(length, decompressedOff);

  // Compress once again with the same hash table to test reuse
  ByteBuffersDataOutput out2 = new ByteBuffersDataOutput();
  LZ4.compress(data, offset, length, out2, hashTable);
  assertArrayEquals(compressed, out2.toArrayCopy());

  // Now restore and compare bytes
  byte[] restored = new byte[length + random().nextInt(10)];
  LZ4.decompress(new ByteArrayDataInput(compressed), length, restored);
  assertArrayEquals(ArrayUtil.copyOfSubArray(data, offset, offset+length), ArrayUtil.copyOfSubArray(restored, 0, length));
}
 
Example #27
Source File: BlockTermsReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private boolean nextBlock() throws IOException {

        // TODO: we still lazy-decode the byte[] for each
        // term (the suffix), but, if we decoded
        // all N terms up front then seeking could do a fast
        // bsearch w/in the block...

        //System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this);
        state.blockFilePointer = in.getFilePointer();
        blockTermCount = in.readVInt();
        //System.out.println("  blockTermCount=" + blockTermCount);
        if (blockTermCount == 0) {
          return false;
        }
        termBlockPrefix = in.readVInt();

        // term suffixes:
        int len = in.readVInt();
        if (termSuffixes.length < len) {
          termSuffixes = new byte[ArrayUtil.oversize(len, 1)];
        }
        //System.out.println("  termSuffixes len=" + len);
        in.readBytes(termSuffixes, 0, len);
        termSuffixesReader.reset(termSuffixes, 0, len);

        // docFreq, totalTermFreq
        len = in.readVInt();
        if (docFreqBytes.length < len) {
          docFreqBytes = new byte[ArrayUtil.oversize(len, 1)];
        }
        //System.out.println("  freq bytes len=" + len);
        in.readBytes(docFreqBytes, 0, len);
        freqReader.reset(docFreqBytes, 0, len);

        // metadata
        len = in.readVInt();
        if (bytes == null) {
          bytes = new byte[ArrayUtil.oversize(len, 1)];
          bytesReader = new ByteArrayDataInput();
        } else if (bytes.length < len) {
          bytes = new byte[ArrayUtil.oversize(len, 1)];
        }
        in.readBytes(bytes, 0, len);
        bytesReader.reset(bytes, 0, len);

        metaDataUpto = 0;
        state.termBlockOrd = 0;

        indexIsCurrent = false;
        //System.out.println("  indexIsCurrent=" + indexIsCurrent);

        return true;
      }
 
Example #28
Source File: Store.java    From crate with Apache License 2.0 4 votes vote down vote up
public long getStoredChecksum() {
    return new ByteArrayDataInput(checksum).readLong();
}