org.apache.lucene.store.ChecksumIndexInput Java Examples

The following examples show how to use org.apache.lucene.store.ChecksumIndexInput. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Lucene80DocValuesProducer.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static void readTermDict(ChecksumIndexInput meta, TermsDictEntry entry) throws IOException {
  entry.termsDictSize = meta.readVLong();
  entry.termsDictBlockShift = meta.readInt();
  final int blockShift = meta.readInt();
  final long addressesSize = (entry.termsDictSize + (1L << entry.termsDictBlockShift) - 1) >>> entry.termsDictBlockShift;
  entry.termsAddressesMeta = DirectMonotonicReader.loadMeta(meta, addressesSize, blockShift);
  entry.maxTermLength = meta.readInt();
  entry.termsDataOffset = meta.readLong();
  entry.termsDataLength = meta.readLong();
  entry.termsAddressesOffset = meta.readLong();
  entry.termsAddressesLength = meta.readLong();
  entry.termsDictIndexShift = meta.readInt();
  final long indexSize = (entry.termsDictSize + (1L << entry.termsDictIndexShift) - 1) >>> entry.termsDictIndexShift;
  entry.termsIndexAddressesMeta = DirectMonotonicReader.loadMeta(meta, 1 + indexSize, blockShift);
  entry.termsIndexOffset = meta.readLong();
  entry.termsIndexLength = meta.readLong();
  entry.termsIndexAddressesOffset = meta.readLong();
  entry.termsIndexAddressesLength = meta.readLong();
}
 
Example #2
Source File: SimpleTextPointsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  IndexInput clone = dataIn.clone();
  clone.seek(0);

  // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included in SimpleTextUtil.CHECKSUM):
  long footerStartPos = dataIn.length() - (SimpleTextUtil.CHECKSUM.length + 21);
  ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
  while (true) {
    SimpleTextUtil.readLine(input, scratch);
    if (input.getFilePointer() >= footerStartPos) {
      // Make sure we landed at precisely the right location:
      if (input.getFilePointer() != footerStartPos) {
        throw new CorruptIndexException("SimpleText failure: footer does not start at expected position current=" + input.getFilePointer() + " vs expected=" + footerStartPos, input);
      }
      SimpleTextUtil.checkFooter(input);
      break;
    }
  }
}
 
Example #3
Source File: SimpleTextDocValuesReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  IndexInput clone = data.clone();
  clone.seek(0);
  // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included in SimpleTextUtil.CHECKSUM):
  long footerStartPos = data.length() - (SimpleTextUtil.CHECKSUM.length + 21);
  ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
  while (true) {
    SimpleTextUtil.readLine(input, scratch);
    if (input.getFilePointer() >= footerStartPos) {
      // Make sure we landed at precisely the right location:
      if (input.getFilePointer() != footerStartPos) {
        throw new CorruptIndexException("SimpleText failure: footer does not start at expected position current=" + input.getFilePointer() + " vs expected=" + footerStartPos, input);
      }
      SimpleTextUtil.checkFooter(input);
      break;
    }
  }
}
 
Example #4
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCheckFooterInvalid() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeHeader(output, "FooBar", 5);
  output.writeString("this is the data");
  output.writeInt(CodecUtil.FOOTER_MAGIC);
  output.writeInt(0);
  output.writeLong(1234567); // write a bogus checksum
  output.close();

  ChecksumIndexInput input = new BufferedChecksumIndexInput(new ByteBuffersIndexInput(out.toDataInput(), "temp"));
  CodecUtil.checkHeader(input, "FooBar", 5, 5);
  assertEquals("this is the data", input.readString());
  Exception mine = new RuntimeException("fake exception");
  CorruptIndexException expected = expectThrows(CorruptIndexException.class, () -> {
    CodecUtil.checkFooter(input, mine);
  });
  assertTrue(expected.getMessage().contains("checksum failed"));
  Throwable suppressed[] = expected.getSuppressed();
  assertEquals(1, suppressed.length);
  assertEquals("fake exception", suppressed[0].getMessage());
  input.close();
}
 
Example #5
Source File: SimpleTextFieldsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
  ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
  BytesRefBuilder scratch = new BytesRefBuilder();
  TreeMap<String,Long> fields = new TreeMap<>();

  while (true) {
    SimpleTextUtil.readLine(input, scratch);
    if (scratch.get().equals(END)) {
      SimpleTextUtil.checkFooter(input);
      return fields;
    } else if (StringHelper.startsWith(scratch.get(), FIELD)) {
      String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8);
      fields.put(fieldName, input.getFilePointer());
    }
  }
}
 
Example #6
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCheckFooterValidPastFooter() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeHeader(output, "FooBar", 5);
  output.writeString("this is the data");
  CodecUtil.writeFooter(output);
  output.close();
  
  ChecksumIndexInput input = new BufferedChecksumIndexInput(new ByteBuffersIndexInput(out.toDataInput(), "temp"));
  CodecUtil.checkHeader(input, "FooBar", 5, 5);
  assertEquals("this is the data", input.readString());
  // bogusly read a byte too far (can happen)
  input.readByte();
  Exception mine = new RuntimeException("fake exception");
  CorruptIndexException expected = expectThrows(CorruptIndexException.class, () -> {
    CodecUtil.checkFooter(input, mine);
  });
  assertTrue(expected.getMessage().contains("checksum status indeterminate"));
  Throwable suppressed[] = expected.getSuppressed();
  assertEquals(1, suppressed.length);
  assertEquals("fake exception", suppressed[0].getMessage());
  input.close();
}
 
Example #7
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCheckFooterValidAtFooter() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeHeader(output, "FooBar", 5);
  output.writeString("this is the data");
  CodecUtil.writeFooter(output);
  output.close();
  
  ChecksumIndexInput input = new BufferedChecksumIndexInput(new ByteBuffersIndexInput(out.toDataInput(), "temp"));
  CodecUtil.checkHeader(input, "FooBar", 5, 5);
  assertEquals("this is the data", input.readString());
  Exception mine = new RuntimeException("fake exception");
  RuntimeException expected = expectThrows(RuntimeException.class, () -> {
    CodecUtil.checkFooter(input, mine);
  });
  assertEquals("fake exception", expected.getMessage());
  Throwable suppressed[] = expected.getSuppressed();
  assertEquals(1, suppressed.length);
  assertTrue(suppressed[0].getMessage().contains("checksum passed"));
  input.close();
}
 
Example #8
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCheckFooterValid() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeHeader(output, "FooBar", 5);
  output.writeString("this is the data");
  CodecUtil.writeFooter(output);
  output.close();
  
  ChecksumIndexInput input = new BufferedChecksumIndexInput(new ByteBuffersIndexInput(out.toDataInput(), "temp"));
  Exception mine = new RuntimeException("fake exception");
  RuntimeException expected = expectThrows(RuntimeException.class, () -> {
    CodecUtil.checkFooter(input, mine);
  });
  assertEquals("fake exception", expected.getMessage());
  Throwable suppressed[] = expected.getSuppressed();
  assertEquals(1, suppressed.length);
  assertTrue(suppressed[0].getMessage().contains("checksum passed"));
  input.close();
}
 
Example #9
Source File: Lucene80DocValuesProducer.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
  for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
    FieldInfo info = infos.fieldInfo(fieldNumber);
    if (info == null) {
      throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
    }
    byte type = meta.readByte();
    if (type == Lucene80DocValuesFormat.NUMERIC) {
      numerics.put(info.name, readNumeric(meta));
    } else if (type == Lucene80DocValuesFormat.BINARY) {
      binaries.put(info.name, readBinary(meta));
    } else if (type == Lucene80DocValuesFormat.SORTED) {
      sorted.put(info.name, readSorted(meta));
    } else if (type == Lucene80DocValuesFormat.SORTED_SET) {
      sortedSets.put(info.name, readSortedSet(meta));
    } else if (type == Lucene80DocValuesFormat.SORTED_NUMERIC) {
      sortedNumerics.put(info.name, readSortedNumeric(meta));
    } else {
      throw new CorruptIndexException("invalid type: " + type, meta);
    }
  }
}
 
Example #10
Source File: OfflinePointReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void close() throws IOException {
  try {
    if (countLeft == 0 && in instanceof ChecksumIndexInput && checked == false) {
      //System.out.println("NOW CHECK: " + name);
      checked = true;
      CodecUtil.checkFooter((ChecksumIndexInput) in);
    }
  } finally {
    in.close();
  }
}
 
Example #11
Source File: Lucene80DocValuesProducer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private SortedNumericEntry readSortedNumeric(ChecksumIndexInput meta) throws IOException {
  SortedNumericEntry entry = new SortedNumericEntry();
  readNumeric(meta, entry);
  entry.numDocsWithField = meta.readInt();
  if (entry.numDocsWithField != entry.numValues) {
    entry.addressesOffset = meta.readLong();
    final int blockShift = meta.readVInt();
    entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, entry.numDocsWithField + 1, blockShift);
    ramBytesUsed += entry.addressesMeta.ramBytesUsed();
    entry.addressesLength = meta.readLong();
  }
  return entry;
}
 
Example #12
Source File: Lucene80NormsProducer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
Lucene80NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
  maxDoc = state.segmentInfo.maxDoc();
  String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
  int version = -1;

  // read in the entries from the metadata file.
  try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
    Throwable priorE = null;
    try {
      version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
      readFields(in, state.fieldInfos);
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(in, priorE);
    }
  }

  String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
  data = state.directory.openInput(dataName, state.context);
  boolean success = false;
  try {
    final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    if (version != version2) {
      throw new CorruptIndexException("Format versions mismatch: meta=" + version + ",data=" + version2, data);
    }

    // NOTE: data file is too costly to verify checksum against all the bytes on open,
    // but for now we at least verify proper structure of the checksum footer: which looks
    // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
    // such as file truncation.
    CodecUtil.retrieveChecksum(data);

    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(this.data);
    }
  }
}
 
Example #13
Source File: FieldsIndexReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
FieldsIndexReader(Directory dir, String name, String suffix, String extensionPrefix, String codecName, byte[] id) throws IOException {
  try (ChecksumIndexInput metaIn = dir.openChecksumInput(IndexFileNames.segmentFileName(name, suffix, extensionPrefix + FIELDS_META_EXTENSION_SUFFIX), IOContext.READONCE)) {
    Throwable priorE = null;
    try {
      CodecUtil.checkIndexHeader(metaIn, codecName + "Meta", VERSION_START, VERSION_CURRENT, id, suffix);
      maxDoc = metaIn.readInt();
      blockShift = metaIn.readInt();
      numChunks = metaIn.readInt();
      docsStartPointer = metaIn.readLong();
      docsMeta = DirectMonotonicReader.loadMeta(metaIn, numChunks, blockShift);
      docsEndPointer = startPointersStartPointer = metaIn.readLong();
      startPointersMeta = DirectMonotonicReader.loadMeta(metaIn, numChunks, blockShift);
      startPointersEndPointer = metaIn.readLong();
      maxPointer = metaIn.readLong();
    } finally {
      CodecUtil.checkFooter(metaIn, priorE);
    }
  }

  indexInput = dir.openInput(IndexFileNames.segmentFileName(name, suffix, extensionPrefix + FIELDS_INDEX_EXTENSION_SUFFIX), IOContext.READ);
  boolean success = false;
  try {
    CodecUtil.checkIndexHeader(indexInput, codecName + "Idx", VERSION_START, VERSION_CURRENT, id, suffix);
    CodecUtil.retrieveChecksum(indexInput);
    success = true;
  } finally {
    if (success == false) {
      indexInput.close();
    }
  }
  final RandomAccessInput docsSlice = indexInput.randomAccessSlice(docsStartPointer, docsEndPointer - docsStartPointer);
  final RandomAccessInput startPointersSlice = indexInput.randomAccessSlice(startPointersStartPointer, startPointersEndPointer - startPointersStartPointer);
  docs = DirectMonotonicReader.getInstance(docsMeta, docsSlice);
  startPointers = DirectMonotonicReader.getInstance(startPointersMeta, startPointersSlice);
}
 
Example #14
Source File: CodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** 
 * Validates the codec footer previously written by {@link #writeFooter}. 
 * @return actual checksum value
 * @throws IOException if the footer is invalid, if the checksum does not match, 
 *                     or if {@code in} is not properly positioned before the footer
 *                     at the end of the stream.
 */
public static long checkFooter(ChecksumIndexInput in) throws IOException {
  validateFooter(in);
  long actualChecksum = in.getChecksum();
  long expectedChecksum = readCRC(in);
  if (expectedChecksum != actualChecksum) {
    throw new CorruptIndexException("checksum failed (hardware problem?) : expected=" + Long.toHexString(expectedChecksum) +  
                                                     " actual=" + Long.toHexString(actualChecksum), in);
  }
  return actualChecksum;
}
 
Example #15
Source File: CodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** 
 * Validates the codec footer previously written by {@link #writeFooter}, optionally
 * passing an unexpected exception that has already occurred.
 * <p>
 * When a {@code priorException} is provided, this method will add a suppressed exception 
 * indicating whether the checksum for the stream passes, fails, or cannot be computed, and 
 * rethrow it. Otherwise it behaves the same as {@link #checkFooter(ChecksumIndexInput)}.
 * <p>
 * Example usage:
 * <pre class="prettyprint">
 * try (ChecksumIndexInput input = ...) {
 *   Throwable priorE = null;
 *   try {
 *     // ... read a bunch of stuff ... 
 *   } catch (Throwable exception) {
 *     priorE = exception;
 *   } finally {
 *     CodecUtil.checkFooter(input, priorE);
 *   }
 * }
 * </pre>
 */
public static void checkFooter(ChecksumIndexInput in, Throwable priorException) throws IOException {
  if (priorException == null) {
    checkFooter(in);
  } else {
    try {
      // If we have evidence of corruption then we return the corruption as the
      // main exception and the prior exception gets suppressed. Otherwise we
      // return the prior exception with a suppressed exception that notifies
      // the user that checksums matched.
      long remaining = in.length() - in.getFilePointer();
      if (remaining < footerLength()) {
        // corruption caused us to read into the checksum footer already: we can't proceed
        throw new CorruptIndexException("checksum status indeterminate: remaining=" + remaining +
                                        "; please run checkindex for more details", in);
      } else {
        // otherwise, skip any unread bytes.
        in.skipBytes(remaining - footerLength());
        
        // now check the footer
        long checksum = checkFooter(in);
        priorException.addSuppressed(new CorruptIndexException("checksum passed (" + Long.toHexString(checksum) +
                                                               "). possibly transient resource issue, or a Lucene or JVM bug", in));
      }
    } catch (CorruptIndexException corruptException) {
      corruptException.addSuppressed(priorException);
      throw corruptException;
    } catch (Throwable t) {
      // catch-all for things that shouldn't go wrong (e.g. OOM during readInt) but could...
      priorException.addSuppressed(new CorruptIndexException("checksum status indeterminate: unexpected exception", in, t));
    }
    throw IOUtils.rethrowAlways(priorException);
  }
}
 
Example #16
Source File: SegmentInfos.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Read a particular segmentFileName.  Note that this may
 * throw an IOException if a commit is in process.
 *
 * @param directory -- directory containing the segments file
 * @param segmentFileName -- segment file to load
 * @throws CorruptIndexException if the index is corrupt
 * @throws IOException if there is a low-level IO error
 */
public static final SegmentInfos readCommit(Directory directory, String segmentFileName) throws IOException {

  long generation = generationFromSegmentsFileName(segmentFileName);
  //System.out.println(Thread.currentThread() + ": SegmentInfos.readCommit " + segmentFileName);
  try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
    try {
      return readCommit(directory, input, generation);
    } catch (EOFException | NoSuchFileException | FileNotFoundException e) {
      throw new CorruptIndexException("Unexpected file read error while reading index.", input, e);
    }
  }
}
 
Example #17
Source File: SimpleTextPointsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public SimpleTextPointsReader(SegmentReadState readState) throws IOException {
  // Initialize readers now:

  // Read index:
  Map<String,Long> fieldToFileOffset = new HashMap<>();

  String indexFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name, readState.segmentSuffix, SimpleTextPointsFormat.POINT_INDEX_EXTENSION);
  try (ChecksumIndexInput in = readState.directory.openChecksumInput(indexFileName, IOContext.DEFAULT)) {
    readLine(in);
    int count = parseInt(FIELD_COUNT);
    for(int i=0;i<count;i++) {
      readLine(in);
      String fieldName = stripPrefix(FIELD_FP_NAME);
      readLine(in);
      long fp = parseLong(FIELD_FP);
      fieldToFileOffset.put(fieldName, fp);
    }
    SimpleTextUtil.checkFooter(in);
  }

  boolean success = false;
  String fileName = IndexFileNames.segmentFileName(readState.segmentInfo.name, readState.segmentSuffix, SimpleTextPointsFormat.POINT_EXTENSION);
  dataIn = readState.directory.openInput(fileName, IOContext.DEFAULT);
  try {
    for(Map.Entry<String,Long> ent : fieldToFileOffset.entrySet()) {
      readers.put(ent.getKey(), initReader(ent.getValue()));
    }
    success = true;
  } finally {
    if (success == false) {
      IOUtils.closeWhileHandlingException(this);
    }
  }
      
  this.readState = readState;
}
 
Example #18
Source File: TestOfflineSorter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testOverNexting() throws Exception {
  Directory dir = newDirectory();
  IndexOutput out = dir.createTempOutput("unsorted", "tmp", IOContext.DEFAULT);
  try (ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(out)) {
    byte[] bytes = new byte[Integer.BYTES];
    random().nextBytes(bytes);
    w.write(bytes);
    CodecUtil.writeFooter(out);
  }

  new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(4), OfflineSorter.MAX_TEMPFILES, Integer.BYTES, null, 0) {
    @Override
    protected ByteSequencesReader getReader(ChecksumIndexInput in, String name) throws IOException {
      ByteSequencesReader other = super.getReader(in, name);

      return new ByteSequencesReader(in, name) {

        private boolean alreadyEnded;
            
        @Override
        public BytesRef next() throws IOException {
          // if we returned null already, OfflineSorter should not call next() again
          assertFalse(alreadyEnded);
          BytesRef result = other.next();
          if (result == null) {
            alreadyEnded = true;
          }
          return result;
        }

        @Override
        public void close() throws IOException {
          other.close();
        }
      };
    }
  }.sort(out.getName());
  dir.close();
}
 
Example #19
Source File: Lucene80DocValuesProducer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private SortedEntry readSorted(ChecksumIndexInput meta) throws IOException {
  SortedEntry entry = new SortedEntry();
  entry.docsWithFieldOffset = meta.readLong();
  entry.docsWithFieldLength = meta.readLong();
  entry.jumpTableEntryCount = meta.readShort();
  entry.denseRankPower = meta.readByte();
  entry.numDocsWithField = meta.readInt();
  entry.bitsPerValue = meta.readByte();
  entry.ordsOffset = meta.readLong();
  entry.ordsLength = meta.readLong();
  readTermDict(meta, entry);
  return entry;
}
 
Example #20
Source File: Lucene80DocValuesProducer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private BinaryEntry readBinary(ChecksumIndexInput meta) throws IOException {
  BinaryEntry entry = new BinaryEntry();
  entry.dataOffset = meta.readLong();
  entry.dataLength = meta.readLong();
  entry.docsWithFieldOffset = meta.readLong();
  entry.docsWithFieldLength = meta.readLong();
  entry.jumpTableEntryCount = meta.readShort();
  entry.denseRankPower = meta.readByte();
  entry.numDocsWithField = meta.readInt();
  entry.minLength = meta.readInt();
  entry.maxLength = meta.readInt();
  if ((version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED && entry.numDocsWithField > 0) ||  entry.minLength < entry.maxLength) {
    entry.addressesOffset = meta.readLong();

    // Old count of uncompressed addresses 
    long numAddresses = entry.numDocsWithField + 1L;
    // New count of compressed addresses - the number of compresseed blocks
    if (version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED) {
      entry.numCompressedChunks = meta.readVInt();
      entry.docsPerChunkShift = meta.readVInt();
      entry.maxUncompressedChunkSize = meta.readVInt();
      numAddresses = entry.numCompressedChunks;
    }      
    
    final int blockShift = meta.readVInt();
    entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numAddresses, blockShift);
    ramBytesUsed += entry.addressesMeta.ramBytesUsed();
    entry.addressesLength = meta.readLong();
  }
  return entry;
}
 
Example #21
Source File: Lucene80DocValuesProducer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void readNumeric(ChecksumIndexInput meta, NumericEntry entry) throws IOException {
  entry.docsWithFieldOffset = meta.readLong();
  entry.docsWithFieldLength = meta.readLong();
  entry.jumpTableEntryCount = meta.readShort();
  entry.denseRankPower = meta.readByte();
  entry.numValues = meta.readLong();
  int tableSize = meta.readInt();
  if (tableSize > 256) {
    throw new CorruptIndexException("invalid table size: " + tableSize, meta);
  }
  if (tableSize >= 0) {
    entry.table = new long[tableSize];
    ramBytesUsed += RamUsageEstimator.sizeOf(entry.table);
    for (int i = 0; i < tableSize; ++i) {
      entry.table[i] = meta.readLong();
    }
  }
  if (tableSize < -1) {
    entry.blockShift = -2 - tableSize;
  } else {
    entry.blockShift = -1;
  }
  entry.bitsPerValue = meta.readByte();
  entry.minValue = meta.readLong();
  entry.gcd = meta.readLong();
  entry.valuesOffset = meta.readLong();
  entry.valuesLength = meta.readLong();
  entry.valueJumpTableOffset = meta.readLong();
}
 
Example #22
Source File: SimpleTextUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static void checkFooter(ChecksumIndexInput input) throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum());
  readLine(input, scratch);
  if (StringHelper.startsWith(scratch.get(), CHECKSUM) == false) {
    throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.get().utf8ToString(), input);
  }
  String actualChecksum = new BytesRef(scratch.bytes(), CHECKSUM.length, scratch.length() - CHECKSUM.length).utf8ToString();
  if (!expectedChecksum.equals(actualChecksum)) {
    throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum, input);
  }
  if (input.length() != input.getFilePointer()) {
    throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor!", input);
  }
}
 
Example #23
Source File: BloomFilteringPostingsFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public BloomFilteredFieldsProducer(SegmentReadState state)
    throws IOException {
  
  String bloomFileName = IndexFileNames.segmentFileName(
      state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
  ChecksumIndexInput bloomIn = null;
  boolean success = false;
  try {
    bloomIn = state.directory.openChecksumInput(bloomFileName, state.context);
    CodecUtil.checkIndexHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    // // Load the hash function used in the BloomFilter
    // hashFunction = HashFunction.forName(bloomIn.readString());
    // Load the delegate postings format
    PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
        .readString());
    
    this.delegateFieldsProducer = delegatePostingsFormat
        .fieldsProducer(state);
    int numBlooms = bloomIn.readInt();
    for (int i = 0; i < numBlooms; i++) {
      int fieldNum = bloomIn.readInt();
      FuzzySet bloom = FuzzySet.deserialize(bloomIn);
      FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
      bloomsByFieldName.put(fieldInfo.name, bloom);
    }
    CodecUtil.checkFooter(bloomIn);
    IOUtils.close(bloomIn);
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer);
    }
  }
}
 
Example #24
Source File: Lucene80DocValuesProducer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private SortedSetEntry readSortedSet(ChecksumIndexInput meta) throws IOException {
  SortedSetEntry entry = new SortedSetEntry();
  byte multiValued = meta.readByte();
  switch (multiValued) {
    case 0: // singlevalued
      entry.singleValueEntry = readSorted(meta);
      return entry;
    case 1: // multivalued
      break;
    default:
      throw new CorruptIndexException("Invalid multiValued flag: " + multiValued, meta);
  }
  entry.docsWithFieldOffset = meta.readLong();
  entry.docsWithFieldLength = meta.readLong();
  entry.jumpTableEntryCount = meta.readShort();
  entry.denseRankPower = meta.readByte();
  entry.bitsPerValue = meta.readByte();
  entry.ordsOffset = meta.readLong();
  entry.ordsLength = meta.readLong();
  entry.numDocsWithField = meta.readInt();
  entry.addressesOffset = meta.readLong();
  final int blockShift = meta.readVInt();
  entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, entry.numDocsWithField + 1, blockShift);
  ramBytesUsed += entry.addressesMeta.ramBytesUsed();
  entry.addressesLength = meta.readLong();
  readTermDict(meta, entry);
  return entry;
}
 
Example #25
Source File: CompletionFieldsProducer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
CompletionFieldsProducer(String codecName, SegmentReadState state, FSTLoadMode fstLoadMode) throws IOException {
  String indexFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, INDEX_EXTENSION);
  delegateFieldsProducer = null;
  boolean success = false;

  try (ChecksumIndexInput index = state.directory.openChecksumInput(indexFile, state.context)) {
    // open up dict file containing all fsts
    String dictFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DICT_EXTENSION);
    dictIn = state.directory.openInput(dictFile, state.context);
    CodecUtil.checkIndexHeader(dictIn, codecName, COMPLETION_CODEC_VERSION, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    // just validate the footer for the dictIn
    CodecUtil.retrieveChecksum(dictIn);

    // open up index file (fieldNumber, offset)
    CodecUtil.checkIndexHeader(index, codecName, COMPLETION_CODEC_VERSION, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    // load delegate PF
    PostingsFormat delegatePostingsFormat = PostingsFormat.forName(index.readString());
    delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state);

    // read suggest field numbers and their offsets in the terms file from index
    int numFields = index.readVInt();
    readers = new HashMap<>(numFields);
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = index.readVInt();
      long offset = index.readVLong();
      long minWeight = index.readVLong();
      long maxWeight = index.readVLong();
      byte type = index.readByte();
      FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNumber);
      // we don't load the FST yet
      readers.put(fieldInfo.name, new CompletionsTermsReader(dictIn, offset, minWeight, maxWeight, type, fstLoadMode));
    }
    CodecUtil.checkFooter(index);
    success = true;
  } finally {
    if (success == false) {
      IOUtils.closeWhileHandlingException(delegateFieldsProducer, dictIn);
    }
  }
}
 
Example #26
Source File: Lucene50CompoundReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Helper method that reads CFS entries from an input stream */
private Map<String, FileEntry> readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
  Map<String,FileEntry> mapping = null;
  try (ChecksumIndexInput entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE)) {
    Throwable priorE = null;
    try {
      version = CodecUtil.checkIndexHeader(entriesStream, Lucene50CompoundFormat.ENTRY_CODEC, 
                                                            Lucene50CompoundFormat.VERSION_START, 
                                                            Lucene50CompoundFormat.VERSION_CURRENT, segmentID, "");
      final int numEntries = entriesStream.readVInt();
      mapping = new HashMap<>(numEntries);
      for (int i = 0; i < numEntries; i++) {
        final FileEntry fileEntry = new FileEntry();
        final String id = entriesStream.readString();
        FileEntry previous = mapping.put(id, fileEntry);
        if (previous != null) {
          throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS ", entriesStream);
        }
        fileEntry.offset = entriesStream.readLong();
        fileEntry.length = entriesStream.readLong();
      }
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(entriesStream, priorE);
    }
  }
  return Collections.unmodifiableMap(mapping);
}
 
Example #27
Source File: Lucene50LiveDocsFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
  long gen = info.getDelGen();
  String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
  final int length = info.info.maxDoc();
  try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) {
    Throwable priorE = null;
    try {
      CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT, 
                                   info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
      long data[] = new long[FixedBitSet.bits2words(length)];
      for (int i = 0; i < data.length; i++) {
        data[i] = input.readLong();
      }
      FixedBitSet fbs = new FixedBitSet(data, length);
      if (fbs.length() - fbs.cardinality() != info.getDelCount()) {
        throw new CorruptIndexException("bits.deleted=" + (fbs.length() - fbs.cardinality()) + 
                                        " info.delcount=" + info.getDelCount(), input);
      }
      return fbs.asReadOnlyBits();
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(input, priorE);
    }
  }
  throw new AssertionError();
}
 
Example #28
Source File: OfflineSorter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Subclasses can override to change how byte sequences are read from disk. */
protected ByteSequencesReader getReader(ChecksumIndexInput in, String name) throws IOException {
  return new ByteSequencesReader(in, name);
}
 
Example #29
Source File: CorruptionUtils.java    From crate with Apache License 2.0 4 votes vote down vote up
/**
 * Corrupts a random file at a random position
 */
public static void corruptFile(Random random, Path... files) throws IOException {
    assertTrue("files must be non-empty", files.length > 0);
    final Path fileToCorrupt = RandomPicks.randomFrom(random, files);
    assertTrue(fileToCorrupt + " is not a file", Files.isRegularFile(fileToCorrupt));
    try (Directory dir = FSDirectory.open(fileToCorrupt.toAbsolutePath().getParent())) {
        long checksumBeforeCorruption;
        try (IndexInput input = dir.openInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) {
            checksumBeforeCorruption = CodecUtil.retrieveChecksum(input);
        }
        try (FileChannel raf = FileChannel.open(fileToCorrupt, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
            long maxPosition = raf.size();

            if (fileToCorrupt.getFileName().toString().endsWith(".cfs") && maxPosition > 4) {
                // TODO: it is known that Lucene does not check the checksum of CFS file (CompoundFileS, like an archive)
                // see note at https://github.com/elastic/elasticsearch/pull/33911
                // so far, don't corrupt crc32 part of checksum (last 4 bytes) of cfs file
                // checksum is 8 bytes: first 4 bytes have to be zeros, while crc32 value is not verified
                maxPosition -= 4;
            }
            final int position = random.nextInt((int) Math.min(Integer.MAX_VALUE, maxPosition));
            corruptAt(fileToCorrupt, raf, position);
        }

        long checksumAfterCorruption;
        long actualChecksumAfterCorruption;
        try (ChecksumIndexInput input = dir.openChecksumInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) {
            assertThat(input.getFilePointer(), is(0L));
            input.seek(input.length() - 8); // one long is the checksum... 8 bytes
            checksumAfterCorruption = input.getChecksum();
            actualChecksumAfterCorruption = input.readLong();
        }
        // we need to add assumptions here that the checksums actually really don't match there is a small chance to get collisions
        // in the checksum which is ok though....
        StringBuilder msg = new StringBuilder();
        msg.append("before: [").append(checksumBeforeCorruption).append("] ");
        msg.append("after: [").append(checksumAfterCorruption).append("] ");
        msg.append("checksum value after corruption: ").append(actualChecksumAfterCorruption).append("] ");
        msg.append("file: ").append(fileToCorrupt.getFileName()).append(" length: ");
        msg.append(dir.fileLength(fileToCorrupt.getFileName().toString()));
        logger.info("Checksum {}", msg);
        assumeTrue("Checksum collision - " + msg.toString(),
                checksumAfterCorruption != checksumBeforeCorruption // collision
                        || actualChecksumAfterCorruption != checksumBeforeCorruption); // checksum corrupted
        assertThat("no file corrupted", fileToCorrupt, notNullValue());
    }
}
 
Example #30
Source File: GenericRecordReader.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
private SegmentInfoPerCommit segmentInfosRead(Directory directory, String segmentFileName, String segmentInfoName)
    throws IOException {
  boolean success = false;

  ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName, IOContext.READ));
  try {
    final int format = input.readInt();
    if (format == CodecUtil.CODEC_MAGIC) {
      // 4.0+
      CodecUtil.checkHeaderNoMagic(input, "segments", SegmentInfos.VERSION_40, SegmentInfos.VERSION_40);
      input.readLong();// read version
      input.readInt(); // read counter
      int numSegments = input.readInt();
      if (numSegments < 0) {
        throw new CorruptIndexException("invalid segment count: " + numSegments + " (resource: " + input + ")");
      }
      for (int seg = 0; seg < numSegments; seg++) {
        String segName = input.readString();
        Codec codec = Codec.forName(input.readString());
        SegmentInfo info = codec.segmentInfoFormat().getSegmentInfoReader().read(directory, segName, IOContext.READ);
        info.setCodec(codec);
        long delGen = input.readLong();
        int delCount = input.readInt();
        if (delCount < 0 || delCount > info.getDocCount()) {
          throw new CorruptIndexException("invalid deletion count: " + delCount + " (resource: " + input + ")");
        }
        if (segName.equals(segmentInfoName)) {
          success = true;
          return new SegmentInfoPerCommit(info, delCount, delGen);
        }
      }
    } else {
      throw new IOException("Legacy Infos not supported for dir [" + directory + "].");
    }
    throw new IOException("Segment [" + segmentInfoName + "] nout found in dir [" + directory + "]");
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(input);
    } else {
      input.close();
    }
  }
}