org.apache.lucene.store.IndexInput#seek

Source File: LZFCompressor.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public boolean isCompressed(IndexInput in) throws IOException {
    long currentPointer = in.getFilePointer();
    // since we have some metdata before the first compressed header, we check on our specific header
    if (in.length() - currentPointer < (LUCENE_HEADER.length)) {
        return false;
    }
    for (int i = 0; i < LUCENE_HEADER.length; i++) {
        if (in.readByte() != LUCENE_HEADER[i]) {
            in.seek(currentPointer);
            return false;
        }
    }
    in.seek(currentPointer);
    return true;
}

Source File: NRTSuggester.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Loads a {@link NRTSuggester} from {@link org.apache.lucene.store.IndexInput} on or off-heap
 * depending on the provided <code>fstLoadMode</code>
 */
public static NRTSuggester load(IndexInput input, FSTLoadMode fstLoadMode) throws IOException {
  final FST<Pair<Long, BytesRef>> fst;
  if (shouldLoadFSTOffHeap(input, fstLoadMode)) {
    OffHeapFSTStore store = new OffHeapFSTStore();
    IndexInput clone = input.clone();
    clone.seek(input.getFilePointer());
    fst = new FST<>(clone, clone, new PairOutputs<>(
        PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()), store);
    input.seek(clone.getFilePointer() + store.size());
  } else {
    fst = new FST<>(input, input, new PairOutputs<>(
        PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  }

  /* read some meta info */
  int maxAnalyzedPathsPerOutput = input.readVInt();
  /*
   * Label used to denote the end of an input in the FST and
   * the beginning of dedup bytes
   */
  int endByte = input.readVInt();
  int payloadSep = input.readVInt();
  return new NRTSuggester(fst, maxAnalyzedPathsPerOutput, payloadSep);
}

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

6 votes

/** Retrieves the full index header from the provided {@link IndexInput}.
 *  This throws {@link CorruptIndexException} if this file does
 * not appear to be an index file. */
public static byte[] readIndexHeader(IndexInput in) throws IOException {
  in.seek(0);
  final int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in);
  }
  String codec = in.readString();
  in.readInt();
  in.seek(in.getFilePointer() + StringHelper.ID_LENGTH);
  int suffixLength = in.readByte() & 0xFF;
  byte[] bytes = new byte[headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffixLength];
  in.seek(0);
  in.readBytes(bytes, 0, bytes.length);
  return bytes;
}

Source File: CodecInfo.java From mtas with Apache License 2.0

6 votes

/**
 * Instantiates a new index doc.
 *
 * @param ref
 *          the ref
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
public IndexDoc(Long ref) throws IOException {
  try {
    IndexInput inIndexDoc = indexInputList.get("doc");
    if (ref != null) {
      inIndexDoc.seek(ref);
    }
    docId = inIndexDoc.readVInt(); // docId
    fpIndexObjectId = inIndexDoc.readVLong(); // ref indexObjectId
    fpIndexObjectPosition = inIndexDoc.readVLong(); // ref
                                                    // indexObjectPosition
    fpIndexObjectParent = inIndexDoc.readVLong(); // ref indexObjectParent
    smallestObjectFilepointer = inIndexDoc.readVLong(); // offset
    objectRefApproxQuotient = inIndexDoc.readVInt(); // slope
    objectRefApproxOffset = inIndexDoc.readZLong(); // offset
    storageFlags = inIndexDoc.readByte(); // flag
    size = inIndexDoc.readVInt(); // number of objects
    minPosition = inIndexDoc.readVInt(); // minimum position
    maxPosition = inIndexDoc.readVInt(); // maximum position
  } catch (Exception e) {
    throw new IOException(e);
  }
}

Source File: BaseCompoundFormatTestCase.java From lucene-solr with Apache License 2.0

6 votes

public void testReadPastEOF() throws IOException {
  Directory dir = newDirectory();
  Directory cr = createLargeCFS(dir);
  IndexInput is = cr.openInput("_123.f2", newIOContext(random()));
  is.seek(is.length() - 10);
  byte b[] = new byte[100];
  is.readBytes(b, 0, 10);

  // Single byte read past end of file
  expectThrows(IOException.class, () -> {
    is.readByte();
  });

  is.seek(is.length() - 10);

  // Block read past end of file
  expectThrows(IOException.class, () -> {
    is.readBytes(b, 0, 50);
  });
  
  is.close();
  cr.close();
  dir.close();
}

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

5 votes

/** Retrieves the full footer from the provided {@link IndexInput}.  This throws
 *  {@link CorruptIndexException} if this file does not have a valid footer. */
public static byte[] readFooter(IndexInput in) throws IOException {
  if (in.length() < footerLength()) {
    throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), in);
  }
  in.seek(in.length() - footerLength());
  validateFooter(in);
  in.seek(in.length() - footerLength());
  byte[] bytes = new byte[footerLength()];
  in.readBytes(bytes, 0, bytes.length);
  return bytes;
}

Source File: BlockPackedReader.java From lucene-solr with Apache License 2.0

5 votes

/** Sole constructor. */
public BlockPackedReader(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
  this.valueCount = valueCount;
  blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
  blockMask = blockSize - 1;
  final int numBlocks = numBlocks(valueCount, blockSize);
  long[] minValues = null;
  subReaders = new PackedInts.Reader[numBlocks];
  long sumBPV = 0;
  for (int i = 0; i < numBlocks; ++i) {
    final int token = in.readByte() & 0xFF;
    final int bitsPerValue = token >>> BPV_SHIFT;
    sumBPV += bitsPerValue;
    if (bitsPerValue > 64) {
      throw new CorruptIndexException("Corrupted Block#" + i, in);
    }
    if ((token & MIN_VALUE_EQUALS_0) == 0) {
      if (minValues == null) {
        minValues = new long[numBlocks];
      }
      minValues[i] = zigZagDecode(1L + readVLong(in));
    }
    if (bitsPerValue == 0) {
      subReaders[i] = new PackedInts.NullReader(blockSize);
    } else {
      final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize);
      if (direct) {
        final long pointer = in.getFilePointer();
        subReaders[i] = PackedInts.getDirectReaderNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
        in.seek(pointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
      } else {
        subReaders[i] = PackedInts.getReaderNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
      }
    }
  }
  this.minValues = minValues;
  this.sumBPV = sumBPV;
}

Source File: DiskDocValuesProducer.java From incubator-retired-blur with Apache License 2.0

5 votes

LongNumericDocValues newNumeric(NumericEntry entry) throws IOException {
  final IndexInput data = this.data.clone();
  data.seek(entry.offset);

  final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count,
      true);
  return new LongNumericDocValues() {
    @Override
    public long get(long id) {
      return reader.get(id);
    }
  };
}

Source File: SimpleTextBKDReader.java From lucene-solr with Apache License 2.0

5 votes

int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  in.seek(blockFP);
  readLine(in, scratch);
  int count = parseInt(scratch, BLOCK_COUNT);
  for(int i=0;i<count;i++) {
    readLine(in, scratch);
    docIDs[i] = parseInt(scratch, BLOCK_DOC_ID);
  }
  return count;
}

Source File: BlockDirectoryTest.java From lucene-solr with Apache License 2.0

5 votes

private void testEof(String name, Directory directory, long length) throws IOException {
  IndexInput input = directory.openInput(name, new IOContext());
  try {
  input.seek(length);
    try {
      input.readByte();
      fail("should throw eof");
    } catch (IOException e) {
    }
  } finally {
    input.close();
  }
}

Source File: DiskDocValuesProducer.java From incubator-retired-blur with Apache License 2.0

5 votes

private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
  final IndexInput data = this.data.clone();

  return new LongBinaryDocValues() {

    private final ThreadValue<IndexInput> in = new ThreadValue<IndexInput>() {
      @Override
      protected IndexInput initialValue() {
        return data.clone();
      }
    };

    @Override
    public void get(long id, BytesRef result) {
      long address = bytes.offset + id * bytes.maxLength;
      try {
        IndexInput indexInput = in.get();
        indexInput.seek(address);
        // NOTE: we could have one buffer, but various consumers (e.g.
        // FieldComparatorSource)
        // assume "they" own the bytes after calling this!
        final byte[] buffer = new byte[bytes.maxLength];
        indexInput.readBytes(buffer, 0, buffer.length);
        result.bytes = buffer;
        result.offset = 0;
        result.length = buffer.length;
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  };
}

Source File: CacheIndexInputTest.java From incubator-retired-blur with Apache License 2.0

5 votes

public static void readRandomDataInt(IndexInput baseInput, IndexInput testInput, Random random, int sampleSize)
    throws IOException {
  assertEquals(baseInput.length(), testInput.length());
  int fileLength = (int) baseInput.length();
  for (int i = 0; i < sampleSize; i++) {
    int position = random.nextInt(fileLength - 4);
    baseInput.seek(position);
    int i1 = baseInput.readInt();
    testInput.seek(position);
    int i2 = testInput.readInt();
    assertEquals("Read [" + i + "] The position is [" + position + "]", i1, i2);
  }
}

Source File: TestCodecUtil.java From lucene-solr with Apache License 2.0

5 votes

public void testSegmentHeaderLength() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeIndexHeader(output, "FooBar", 5, StringHelper.randomId(), "xyz");
  output.writeString("this is the data");
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  input.seek(CodecUtil.indexHeaderLength("FooBar", "xyz"));
  assertEquals("this is the data", input.readString());
  input.close();
}

Source File: HdfsDirectoryTest.java From lucene-solr with Apache License 2.0

4 votes

private void testEof(String name, Directory directory, long length) throws IOException {
  IndexInput input = directory.openInput(name, new IOContext());
  input.seek(length);
  expectThrows(Exception.class, input::readByte);
}

Source File: TestPackedInts.java From lucene-solr with Apache License 2.0

4 votes

public void testEndPointer() throws IOException {
  final Directory dir = newDirectory();
  final int valueCount = RandomNumbers.randomIntBetween(random(), 1, 1000);
  final IndexOutput out = dir.createOutput("tests.bin", newIOContext(random()));
  for (int i = 0; i < valueCount; ++i) {
    out.writeLong(0);
  }
  out.close();
  final IndexInput in = dir.openInput("tests.bin", newIOContext(random()));
  for (int version = PackedInts.VERSION_START; version <= PackedInts.VERSION_CURRENT; ++version) {
    for (int bpv = 1; bpv <= 64; ++bpv) {
      for (PackedInts.Format format : PackedInts.Format.values()) {
        if (!format.isSupported(bpv)) {
          continue;
        }
        final long byteCount = format.byteCount(version, valueCount, bpv);
        String msg = "format=" + format + ",version=" + version + ",valueCount=" + valueCount + ",bpv=" + bpv;

        // test iterator
        in.seek(0L);
        final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(in, format, version, valueCount, bpv, RandomNumbers.randomIntBetween(random(), 1, 1<<16));
        for (int i = 0; i < valueCount; ++i) {
          it.next();
        }
        assertEquals(msg, byteCount, in.getFilePointer());

        // test direct reader
        in.seek(0L);
        final PackedInts.Reader directReader = PackedInts.getDirectReaderNoHeader(in, format, version, valueCount, bpv);
        directReader.get(valueCount - 1);
        assertEquals(msg, byteCount, in.getFilePointer());

        // test reader
        in.seek(0L);
        PackedInts.getReaderNoHeader(in, format, version, valueCount, bpv);
        assertEquals(msg, byteCount, in.getFilePointer());
       }
    }
  }
  in.close();
  dir.close();
}

Source File: FSTTermsReader.java From lucene-solr with Apache License 2.0

4 votes

private void seekDir(IndexInput in) throws IOException {
  in.seek(in.length() - CodecUtil.footerLength() - 8);
  in.seek(in.readLong());
}

Source File: SimpleTextFieldsReader.java From lucene-solr with Apache License 2.0

4 votes

private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final FSTCompiler<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstCompiler;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
      outputsInner);
  fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRefBuilder lastTerm = new BytesRefBuilder();
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
      if (lastDocsStart != -1) {
        fstCompiler.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
            outputs.newPair(lastDocsStart,
                outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch.get(), DOC)) {
      docFreq++;
      sumDocFreq++;
      totalTermFreq++;
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1;
    } else if (StringHelper.startsWith(scratch.get(), TERM)) {
      if (lastDocsStart != -1) {
        fstCompiler.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
            outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length() - TERM.length;
      lastTerm.grow(len);
      System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len);
      lastTerm.setLength(len);
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = visitedDocs.cardinality();
  fst = fstCompiler.compile();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}

Source File: UniformSplitTermsReader.java From lucene-solr with Apache License 2.0

4 votes

/**
 * Positions the given {@link IndexInput} at the beginning of the fields metadata.
 */
protected void seekFieldsMetadata(IndexInput indexInput) throws IOException {
  indexInput.seek(indexInput.length() - CodecUtil.footerLength() - 8);
  indexInput.seek(indexInput.readLong());
}

Source File: VersionBlockTreeTermsReader.java From lucene-solr with Apache License 2.0

4 votes

/** Seek {@code input} to the directory offset. */
private void seekDir(IndexInput input) throws IOException {
  input.seek(input.length() - CodecUtil.footerLength() - 8);
  long dirOffset = input.readLong();
  input.seek(dirOffset);
}

Source File: BKDReader.java From lucene-solr with Apache License 2.0

3 votes

int readDocIDs(IndexInput in, long blockFP, BKDReaderDocIDSetIterator iterator) throws IOException {
  in.seek(blockFP);

  // How many points are stored in this leaf cell:
  int count = in.readVInt();

  DocIdsWriter.readInts(in, count, iterator.docIDs);

  return count;
}

Java Code Examples for org.apache.lucene.store.IndexInput#seek()