Java Code Examples for org.apache.lucene.codecs.CodecUtil#checksumEntireFile()

The following examples show how to use org.apache.lucene.codecs.CodecUtil#checksumEntireFile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MetaDataStateFormat.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
/**
 * Reads the state from a given file and compares the expected version against the actual version of
 * the state.
 */
public final T read(Path file) throws IOException {
    try (Directory dir = newDirectory(file.getParent())) {
        try (final IndexInput indexInput = dir.openInput(file.getFileName().toString(), IOContext.DEFAULT)) {
             // We checksum the entire file before we even go and parse it. If it's corrupted we barf right here.
            CodecUtil.checksumEntireFile(indexInput);
            CodecUtil.checkHeader(indexInput, STATE_FILE_CODEC, STATE_FILE_VERSION, STATE_FILE_VERSION);
            final XContentType xContentType = XContentType.values()[indexInput.readInt()];
            indexInput.readLong(); // version currently unused
            long filePointer = indexInput.getFilePointer();
            long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
            try (IndexInput slice = indexInput.slice("state_xcontent", filePointer, contentSize)) {
                try (XContentParser parser = XContentFactory.xContent(xContentType).createParser(new InputStreamIndexInput(slice, contentSize))) {
                    return fromXContent(parser);
                }
            }
        } catch(CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        }
    }
}
 
Example 2
Source File: ChecksumBlobStoreFormat.java    From crate with Apache License 2.0 6 votes vote down vote up
/**
 * Reads blob with specified name without resolving the blobName using using {@link #blobName} method.
 *
 * @param blobContainer blob container
 * @param blobName blob name
 */
public T readBlob(BlobContainer blobContainer, String blobName) throws IOException {
    final BytesReference bytes = Streams.readFully(blobContainer.readBlob(blobName));
    final String resourceDesc = "ChecksumBlobStoreFormat.readBlob(blob=\"" + blobName + "\")";
    try (ByteArrayIndexInput indexInput =
             new ByteArrayIndexInput(resourceDesc, BytesReference.toBytes(bytes))) {
        CodecUtil.checksumEntireFile(indexInput);
        CodecUtil.checkHeader(indexInput, codec, VERSION, VERSION);
        long filePointer = indexInput.getFilePointer();
        long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
        try (XContentParser parser = XContentHelper.createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE,
                                                                 bytes.slice((int) filePointer, (int) contentSize), XContentType.SMILE)) {
            return reader.apply(parser);
        }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
        // we trick this into a dedicated exception with the original stacktrace
        throw new CorruptStateException(ex);
    }
}
 
Example 3
Source File: ChecksumBlobStoreFormat.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
/**
 * Reads blob with specified name without resolving the blobName using using {@link #blobName} method.
 *
 * @param blobContainer blob container
 * @param blobName blob name
 */
public T readBlob(BlobContainer blobContainer, String blobName) throws IOException {
    try (InputStream inputStream = blobContainer.readBlob(blobName)) {
        byte[] bytes = ByteStreams.toByteArray(inputStream);
        final String resourceDesc = "ChecksumBlobStoreFormat.readBlob(blob=\"" + blobName + "\")";
        try (ByteArrayIndexInput indexInput = new ByteArrayIndexInput(resourceDesc, bytes)) {
            CodecUtil.checksumEntireFile(indexInput);
            CodecUtil.checkHeader(indexInput, codec, VERSION, VERSION);
            long filePointer = indexInput.getFilePointer();
            long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
            BytesReference bytesReference = new BytesArray(bytes, (int) filePointer, (int) contentSize);
            return read(bytesReference);
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        }
    }
}
 
Example 4
Source File: FSTTermsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
  final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

  this.postingsReader = postingsReader;
  final IndexInput in = state.directory.openInput(termsFileName, state.context);

  boolean success = false;
  try {
    CodecUtil.checkIndexHeader(in, FSTTermsWriter.TERMS_CODEC_NAME,
                                     FSTTermsWriter.TERMS_VERSION_START,
                                     FSTTermsWriter.TERMS_VERSION_CURRENT,
                                     state.segmentInfo.getId(), state.segmentSuffix);
    CodecUtil.checksumEntireFile(in);
    this.postingsReader.init(in, state);
    seekDir(in);

    final FieldInfos fieldInfos = state.fieldInfos;
    final int numFields = in.readVInt();
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = in.readVInt();
      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
      long numTerms = in.readVLong();
      long sumTotalTermFreq = in.readVLong();
      // if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
      long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
      int docCount = in.readVInt();
      TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
      TermsReader previous = fields.put(fieldInfo.name, current);
      checkFieldSummary(state.segmentInfo, in, current, previous);
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
  }
}
 
Example 5
Source File: OrdsBlockTreeTermsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  // term dictionary
  CodecUtil.checksumEntireFile(in);
    
  // postings
  postingsReader.checkIntegrity();
}
 
Example 6
Source File: MetaDataStateFormat.java    From crate with Apache License 2.0 5 votes vote down vote up
/**
 * Reads the state from a given file and compares the expected version against the actual version of
 * the state.
 */
public final T read(NamedXContentRegistry namedXContentRegistry, Path file) throws IOException {
    try (Directory dir = newDirectory(file.getParent())) {
        try (IndexInput indexInput = dir.openInput(file.getFileName().toString(), IOContext.DEFAULT)) {
            // We checksum the entire file before we even go and parse it. If it's corrupted we barf right here.
            CodecUtil.checksumEntireFile(indexInput);
            CodecUtil.checkHeader(indexInput, STATE_FILE_CODEC, MIN_COMPATIBLE_STATE_FILE_VERSION, STATE_FILE_VERSION);
            final XContentType xContentType = XContentType.values()[indexInput.readInt()];
            if (xContentType != FORMAT) {
                throw new IllegalStateException("expected state in " + file + " to be " + FORMAT + " format but was " + xContentType);
            }
            long filePointer = indexInput.getFilePointer();
            long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
            try (IndexInput slice = indexInput.slice("state_xcontent", filePointer, contentSize)) {
                try (InputStreamIndexInput in = new InputStreamIndexInput(slice, contentSize)) {
                    try (XContentParser parser = XContentFactory.xContent(FORMAT)
                            .createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE,
                                in)) {
                        return fromXContent(parser);
                    }
                }
            }
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        }
    }
}
 
Example 7
Source File: Lucene50PostingsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  if (docIn != null) {
    CodecUtil.checksumEntireFile(docIn);
  }
  if (posIn != null) {
    CodecUtil.checksumEntireFile(posIn);
  }
  if (payIn != null) {
    CodecUtil.checksumEntireFile(payIn);
  }
}
 
Example 8
Source File: UniformSplitTermsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  // term dictionary
  CodecUtil.checksumEntireFile(blockInput);

  // postings
  postingsReader.checkIntegrity();
}
 
Example 9
Source File: VersionBlockTreeTermsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  // term dictionary
  CodecUtil.checksumEntireFile(in);
    
  // postings
  postingsReader.checkIntegrity();
}
 
Example 10
Source File: Lucene84PostingsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  if (docIn != null) {
    CodecUtil.checksumEntireFile(docIn);
  }
  if (posIn != null) {
    CodecUtil.checksumEntireFile(posIn);
  }
  if (payIn != null) {
    CodecUtil.checksumEntireFile(payIn);
  }
}
 
Example 11
Source File: BlockTreeTermsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void checkIntegrity() throws IOException { 
  // terms index
  CodecUtil.checksumEntireFile(indexIn);

  // term dictionary
  CodecUtil.checksumEntireFile(termsIn);
    
  // postings
  postingsReader.checkIntegrity();
}
 
Example 12
Source File: TestSegmentInfos.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testBitFlippedTriggersCorruptIndexException() throws IOException {
  BaseDirectoryWrapper dir = newDirectory();
  dir.setCheckIndexOnClose(false);
  byte id[] = StringHelper.randomId();
  Codec codec = Codec.getDefault();

  SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
  SegmentInfo info = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_0", 1, false, Codec.getDefault(),
                                     Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId());
  sis.add(commitInfo);

  info = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "_1", 1, false, Codec.getDefault(),
                         Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1, StringHelper.randomId());
  sis.add(commitInfo);

  sis.commit(dir);

  BaseDirectoryWrapper corruptDir = newDirectory();
  corruptDir.setCheckIndexOnClose(false);
  boolean corrupt = false;
  for (String file : dir.listAll()) {
    if (file.startsWith(IndexFileNames.SEGMENTS)) {
      try (IndexInput in = dir.openInput(file, IOContext.DEFAULT);
          IndexOutput out = corruptDir.createOutput(file, IOContext.DEFAULT)) {
        final long corruptIndex = TestUtil.nextLong(random(), 0, in.length() - 1);
        out.copyBytes(in, corruptIndex);
        final int b = Byte.toUnsignedInt(in.readByte()) + TestUtil.nextInt(random(), 0x01, 0xff);
        out.writeByte((byte) b);
        out.copyBytes(in, in.length() - in.getFilePointer());
      }
      try (IndexInput in = corruptDir.openInput(file, IOContext.DEFAULT)) {
        CodecUtil.checksumEntireFile(in);
        if (VERBOSE) {
          System.out.println("TEST: Altering the file did not update the checksum, aborting...");
        }
        return;
      } catch (CorruptIndexException e) {
        // ok
      }
      corrupt = true;
    } else if (ExtrasFS.isExtra(file) == false) {
      corruptDir.copyFrom(dir, file, file, IOContext.DEFAULT);
    }
  }
  assertTrue("No segments file found", corrupt);

  expectThrowsAnyOf(
      Arrays.asList(CorruptIndexException.class, IndexFormatTooOldException.class, IndexFormatTooNewException.class),
      () -> SegmentInfos.readLatestCommit(corruptDir));
  dir.close();
  corruptDir.close();
}
 
Example 13
Source File: CompressingTermVectorsReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  indexReader.checkIntegrity();
  CodecUtil.checksumEntireFile(vectorsStream);
}
 
Example 14
Source File: Lucene60PointsReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  CodecUtil.checksumEntireFile(dataIn);
}
 
Example 15
Source File: FieldsIndexReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
void checkIntegrity() throws IOException {
  CodecUtil.checksumEntireFile(indexInput);
}
 
Example 16
Source File: Lucene80NormsProducer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  CodecUtil.checksumEntireFile(data);
}
 
Example 17
Source File: TestAllFilesHaveChecksumFooter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void checkFooter(Directory dir, String file) throws IOException {
  try (IndexInput in = dir.openInput(file, newIOContext(random()))) {
    CodecUtil.checksumEntireFile(in);
  }
}
 
Example 18
Source File: Lucene50CompoundReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  CodecUtil.checksumEntireFile(handle);
}
 
Example 19
Source File: FixedGapTermsIndexReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public FixedGapTermsIndexReader(SegmentReadState state) throws IOException {
  final PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS);
  
  String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
                                                   state.segmentSuffix, 
                                                   FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION);
  final IndexInput in = state.directory.openInput(fileName, state.context);
  
  boolean success = false;

  try {
    
    CodecUtil.checkIndexHeader(in, FixedGapTermsIndexWriter.CODEC_NAME,
                                     FixedGapTermsIndexWriter.VERSION_CURRENT, 
                                     FixedGapTermsIndexWriter.VERSION_CURRENT,
                                     state.segmentInfo.getId(), state.segmentSuffix);
    
    CodecUtil.checksumEntireFile(in);
    
    indexInterval = in.readVInt();
    if (indexInterval < 1) {
      throw new CorruptIndexException("invalid indexInterval: " + indexInterval, in);
    }
    packedIntsVersion = in.readVInt();
    blocksize = in.readVInt();
    
    seekDir(in);

    // Read directory
    final int numFields = in.readVInt();     
    if (numFields < 0) {
      throw new CorruptIndexException("invalid numFields: " + numFields, in);
    }
    //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
    for(int i=0;i<numFields;i++) {
      final int field = in.readVInt();
      final long numIndexTerms = in.readVInt(); // TODO: change this to a vLong if we fix writer to support > 2B index terms
      if (numIndexTerms < 0) {
        throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms, in);
      }
      final long termsStart = in.readVLong();
      final long indexStart = in.readVLong();
      final long packedIndexStart = in.readVLong();
      final long packedOffsetsStart = in.readVLong();
      if (packedIndexStart < indexStart) {
        throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms, in);
      }
      final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
      FieldIndexData previous = fields.put(fieldInfo.name, new FieldIndexData(in, termBytes, indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms));
      if (previous != null) {
        throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
      }
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
    termBytesReader = termBytes.freeze(true);
  }
}
 
Example 20
Source File: VariableGapTermsIndexReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public VariableGapTermsIndexReader(SegmentReadState state) throws IOException {
  String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
                                                   state.segmentSuffix, 
                                                   VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION);
  final IndexInput in = state.directory.openInput(fileName, new IOContext(state.context, true));
  boolean success = false;

  try {
    
    CodecUtil.checkIndexHeader(in, VariableGapTermsIndexWriter.CODEC_NAME,
                                     VariableGapTermsIndexWriter.VERSION_START,
                                     VariableGapTermsIndexWriter.VERSION_CURRENT,
                                     state.segmentInfo.getId(), state.segmentSuffix);
    
    CodecUtil.checksumEntireFile(in);

    seekDir(in);

    // Read directory
    final int numFields = in.readVInt();
    if (numFields < 0) {
      throw new CorruptIndexException("invalid numFields: " + numFields, in);
    }

    for(int i=0;i<numFields;i++) {
      final int field = in.readVInt();
      final long indexStart = in.readVLong();
      final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
      FieldIndexData previous = fields.put(fieldInfo.name, new FieldIndexData(in, fieldInfo, indexStart));
      if (previous != null) {
        throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
      }
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
  }
}