Java Code Examples for org.apache.lucene.store.IndexInput#seek()

The following examples show how to use org.apache.lucene.store.IndexInput#seek() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LZFCompressor.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public boolean isCompressed(IndexInput in) throws IOException {
    long currentPointer = in.getFilePointer();
    // since we have some metdata before the first compressed header, we check on our specific header
    if (in.length() - currentPointer < (LUCENE_HEADER.length)) {
        return false;
    }
    for (int i = 0; i < LUCENE_HEADER.length; i++) {
        if (in.readByte() != LUCENE_HEADER[i]) {
            in.seek(currentPointer);
            return false;
        }
    }
    in.seek(currentPointer);
    return true;
}
 
Example 2
Source File: NRTSuggester.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Loads a {@link NRTSuggester} from {@link org.apache.lucene.store.IndexInput} on or off-heap
 * depending on the provided <code>fstLoadMode</code>
 */
public static NRTSuggester load(IndexInput input, FSTLoadMode fstLoadMode) throws IOException {
  final FST<Pair<Long, BytesRef>> fst;
  if (shouldLoadFSTOffHeap(input, fstLoadMode)) {
    OffHeapFSTStore store = new OffHeapFSTStore();
    IndexInput clone = input.clone();
    clone.seek(input.getFilePointer());
    fst = new FST<>(clone, clone, new PairOutputs<>(
        PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()), store);
    input.seek(clone.getFilePointer() + store.size());
  } else {
    fst = new FST<>(input, input, new PairOutputs<>(
        PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  }

  /* read some meta info */
  int maxAnalyzedPathsPerOutput = input.readVInt();
  /*
   * Label used to denote the end of an input in the FST and
   * the beginning of dedup bytes
   */
  int endByte = input.readVInt();
  int payloadSep = input.readVInt();
  return new NRTSuggester(fst, maxAnalyzedPathsPerOutput, payloadSep);
}
 
Example 3
Source File: CodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Retrieves the full index header from the provided {@link IndexInput}.
 *  This throws {@link CorruptIndexException} if this file does
 * not appear to be an index file. */
public static byte[] readIndexHeader(IndexInput in) throws IOException {
  in.seek(0);
  final int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in);
  }
  String codec = in.readString();
  in.readInt();
  in.seek(in.getFilePointer() + StringHelper.ID_LENGTH);
  int suffixLength = in.readByte() & 0xFF;
  byte[] bytes = new byte[headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffixLength];
  in.seek(0);
  in.readBytes(bytes, 0, bytes.length);
  return bytes;
}
 
Example 4
Source File: CodecInfo.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Instantiates a new index doc.
 *
 * @param ref
 *          the ref
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
public IndexDoc(Long ref) throws IOException {
  try {
    IndexInput inIndexDoc = indexInputList.get("doc");
    if (ref != null) {
      inIndexDoc.seek(ref);
    }
    docId = inIndexDoc.readVInt(); // docId
    fpIndexObjectId = inIndexDoc.readVLong(); // ref indexObjectId
    fpIndexObjectPosition = inIndexDoc.readVLong(); // ref
                                                    // indexObjectPosition
    fpIndexObjectParent = inIndexDoc.readVLong(); // ref indexObjectParent
    smallestObjectFilepointer = inIndexDoc.readVLong(); // offset
    objectRefApproxQuotient = inIndexDoc.readVInt(); // slope
    objectRefApproxOffset = inIndexDoc.readZLong(); // offset
    storageFlags = inIndexDoc.readByte(); // flag
    size = inIndexDoc.readVInt(); // number of objects
    minPosition = inIndexDoc.readVInt(); // minimum position
    maxPosition = inIndexDoc.readVInt(); // maximum position
  } catch (Exception e) {
    throw new IOException(e);
  }
}
 
Example 5
Source File: BaseCompoundFormatTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testReadPastEOF() throws IOException {
  Directory dir = newDirectory();
  Directory cr = createLargeCFS(dir);
  IndexInput is = cr.openInput("_123.f2", newIOContext(random()));
  is.seek(is.length() - 10);
  byte b[] = new byte[100];
  is.readBytes(b, 0, 10);

  // Single byte read past end of file
  expectThrows(IOException.class, () -> {
    is.readByte();
  });

  is.seek(is.length() - 10);

  // Block read past end of file
  expectThrows(IOException.class, () -> {
    is.readBytes(b, 0, 50);
  });
  
  is.close();
  cr.close();
  dir.close();
}
 
Example 6
Source File: CodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Retrieves the full footer from the provided {@link IndexInput}.  This throws
 *  {@link CorruptIndexException} if this file does not have a valid footer. */
public static byte[] readFooter(IndexInput in) throws IOException {
  if (in.length() < footerLength()) {
    throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), in);
  }
  in.seek(in.length() - footerLength());
  validateFooter(in);
  in.seek(in.length() - footerLength());
  byte[] bytes = new byte[footerLength()];
  in.readBytes(bytes, 0, bytes.length);
  return bytes;
}
 
Example 7
Source File: BlockPackedReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Sole constructor. */
public BlockPackedReader(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
  this.valueCount = valueCount;
  blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
  blockMask = blockSize - 1;
  final int numBlocks = numBlocks(valueCount, blockSize);
  long[] minValues = null;
  subReaders = new PackedInts.Reader[numBlocks];
  long sumBPV = 0;
  for (int i = 0; i < numBlocks; ++i) {
    final int token = in.readByte() & 0xFF;
    final int bitsPerValue = token >>> BPV_SHIFT;
    sumBPV += bitsPerValue;
    if (bitsPerValue > 64) {
      throw new CorruptIndexException("Corrupted Block#" + i, in);
    }
    if ((token & MIN_VALUE_EQUALS_0) == 0) {
      if (minValues == null) {
        minValues = new long[numBlocks];
      }
      minValues[i] = zigZagDecode(1L + readVLong(in));
    }
    if (bitsPerValue == 0) {
      subReaders[i] = new PackedInts.NullReader(blockSize);
    } else {
      final int size = (int) Math.min(blockSize, valueCount - (long) i * blockSize);
      if (direct) {
        final long pointer = in.getFilePointer();
        subReaders[i] = PackedInts.getDirectReaderNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
        in.seek(pointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, size, bitsPerValue));
      } else {
        subReaders[i] = PackedInts.getReaderNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
      }
    }
  }
  this.minValues = minValues;
  this.sumBPV = sumBPV;
}
 
Example 8
Source File: DiskDocValuesProducer.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
LongNumericDocValues newNumeric(NumericEntry entry) throws IOException {
  final IndexInput data = this.data.clone();
  data.seek(entry.offset);

  final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count,
      true);
  return new LongNumericDocValues() {
    @Override
    public long get(long id) {
      return reader.get(id);
    }
  };
}
 
Example 9
Source File: SimpleTextBKDReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  in.seek(blockFP);
  readLine(in, scratch);
  int count = parseInt(scratch, BLOCK_COUNT);
  for(int i=0;i<count;i++) {
    readLine(in, scratch);
    docIDs[i] = parseInt(scratch, BLOCK_DOC_ID);
  }
  return count;
}
 
Example 10
Source File: BlockDirectoryTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void testEof(String name, Directory directory, long length) throws IOException {
  IndexInput input = directory.openInput(name, new IOContext());
  try {
  input.seek(length);
    try {
      input.readByte();
      fail("should throw eof");
    } catch (IOException e) {
    }
  } finally {
    input.close();
  }
}
 
Example 11
Source File: DiskDocValuesProducer.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
  final IndexInput data = this.data.clone();

  return new LongBinaryDocValues() {

    private final ThreadValue<IndexInput> in = new ThreadValue<IndexInput>() {
      @Override
      protected IndexInput initialValue() {
        return data.clone();
      }
    };

    @Override
    public void get(long id, BytesRef result) {
      long address = bytes.offset + id * bytes.maxLength;
      try {
        IndexInput indexInput = in.get();
        indexInput.seek(address);
        // NOTE: we could have one buffer, but various consumers (e.g.
        // FieldComparatorSource)
        // assume "they" own the bytes after calling this!
        final byte[] buffer = new byte[bytes.maxLength];
        indexInput.readBytes(buffer, 0, buffer.length);
        result.bytes = buffer;
        result.offset = 0;
        result.length = buffer.length;
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  };
}
 
Example 12
Source File: CacheIndexInputTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
public static void readRandomDataInt(IndexInput baseInput, IndexInput testInput, Random random, int sampleSize)
    throws IOException {
  assertEquals(baseInput.length(), testInput.length());
  int fileLength = (int) baseInput.length();
  for (int i = 0; i < sampleSize; i++) {
    int position = random.nextInt(fileLength - 4);
    baseInput.seek(position);
    int i1 = baseInput.readInt();
    testInput.seek(position);
    int i2 = testInput.readInt();
    assertEquals("Read [" + i + "] The position is [" + position + "]", i1, i2);
  }
}
 
Example 13
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSegmentHeaderLength() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeIndexHeader(output, "FooBar", 5, StringHelper.randomId(), "xyz");
  output.writeString("this is the data");
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  input.seek(CodecUtil.indexHeaderLength("FooBar", "xyz"));
  assertEquals("this is the data", input.readString());
  input.close();
}
 
Example 14
Source File: HdfsDirectoryTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void testEof(String name, Directory directory, long length) throws IOException {
  IndexInput input = directory.openInput(name, new IOContext());
  input.seek(length);
  expectThrows(Exception.class, input::readByte);
}
 
Example 15
Source File: TestPackedInts.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testEndPointer() throws IOException {
  final Directory dir = newDirectory();
  final int valueCount = RandomNumbers.randomIntBetween(random(), 1, 1000);
  final IndexOutput out = dir.createOutput("tests.bin", newIOContext(random()));
  for (int i = 0; i < valueCount; ++i) {
    out.writeLong(0);
  }
  out.close();
  final IndexInput in = dir.openInput("tests.bin", newIOContext(random()));
  for (int version = PackedInts.VERSION_START; version <= PackedInts.VERSION_CURRENT; ++version) {
    for (int bpv = 1; bpv <= 64; ++bpv) {
      for (PackedInts.Format format : PackedInts.Format.values()) {
        if (!format.isSupported(bpv)) {
          continue;
        }
        final long byteCount = format.byteCount(version, valueCount, bpv);
        String msg = "format=" + format + ",version=" + version + ",valueCount=" + valueCount + ",bpv=" + bpv;

        // test iterator
        in.seek(0L);
        final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(in, format, version, valueCount, bpv, RandomNumbers.randomIntBetween(random(), 1, 1<<16));
        for (int i = 0; i < valueCount; ++i) {
          it.next();
        }
        assertEquals(msg, byteCount, in.getFilePointer());

        // test direct reader
        in.seek(0L);
        final PackedInts.Reader directReader = PackedInts.getDirectReaderNoHeader(in, format, version, valueCount, bpv);
        directReader.get(valueCount - 1);
        assertEquals(msg, byteCount, in.getFilePointer());

        // test reader
        in.seek(0L);
        PackedInts.getReaderNoHeader(in, format, version, valueCount, bpv);
        assertEquals(msg, byteCount, in.getFilePointer());
       }
    }
  }
  in.close();
  dir.close();
}
 
Example 16
Source File: FSTTermsReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void seekDir(IndexInput in) throws IOException {
  in.seek(in.length() - CodecUtil.footerLength() - 8);
  in.seek(in.readLong());
}
 
Example 17
Source File: SimpleTextFieldsReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final FSTCompiler<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstCompiler;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
      outputsInner);
  fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRefBuilder lastTerm = new BytesRefBuilder();
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
      if (lastDocsStart != -1) {
        fstCompiler.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
            outputs.newPair(lastDocsStart,
                outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch.get(), DOC)) {
      docFreq++;
      sumDocFreq++;
      totalTermFreq++;
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1;
    } else if (StringHelper.startsWith(scratch.get(), TERM)) {
      if (lastDocsStart != -1) {
        fstCompiler.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
            outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length() - TERM.length;
      lastTerm.grow(len);
      System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len);
      lastTerm.setLength(len);
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = visitedDocs.cardinality();
  fst = fstCompiler.compile();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
 
Example 18
Source File: UniformSplitTermsReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Positions the given {@link IndexInput} at the beginning of the fields metadata.
 */
protected void seekFieldsMetadata(IndexInput indexInput) throws IOException {
  indexInput.seek(indexInput.length() - CodecUtil.footerLength() - 8);
  indexInput.seek(indexInput.readLong());
}
 
Example 19
Source File: VersionBlockTreeTermsReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Seek {@code input} to the directory offset. */
private void seekDir(IndexInput input) throws IOException {
  input.seek(input.length() - CodecUtil.footerLength() - 8);
  long dirOffset = input.readLong();
  input.seek(dirOffset);
}
 
Example 20
Source File: BKDReader.java    From lucene-solr with Apache License 2.0 3 votes vote down vote up
int readDocIDs(IndexInput in, long blockFP, BKDReaderDocIDSetIterator iterator) throws IOException {
  in.seek(blockFP);

  // How many points are stored in this leaf cell:
  int count = in.readVInt();

  DocIdsWriter.readInts(in, count, iterator.docIDs);

  return count;
}