Java Code Examples for org.apache.lucene.codecs.CodecUtil#footerLength()

The following examples show how to use org.apache.lucene.codecs.CodecUtil#footerLength() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ChecksumBlobStoreFormat.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
/**
 * Reads blob with specified name without resolving the blobName using using {@link #blobName} method.
 *
 * @param blobContainer blob container
 * @param blobName blob name
 */
public T readBlob(BlobContainer blobContainer, String blobName) throws IOException {
    try (InputStream inputStream = blobContainer.readBlob(blobName)) {
        byte[] bytes = ByteStreams.toByteArray(inputStream);
        final String resourceDesc = "ChecksumBlobStoreFormat.readBlob(blob=\"" + blobName + "\")";
        try (ByteArrayIndexInput indexInput = new ByteArrayIndexInput(resourceDesc, bytes)) {
            CodecUtil.checksumEntireFile(indexInput);
            CodecUtil.checkHeader(indexInput, codec, VERSION, VERSION);
            long filePointer = indexInput.getFilePointer();
            long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
            BytesReference bytesReference = new BytesArray(bytes, (int) filePointer, (int) contentSize);
            return read(bytesReference);
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        }
    }
}
 
Example 2
Source File: Store.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private static void checksumFromLuceneFile(Directory directory, String file, ImmutableMap.Builder<String, StoreFileMetaData> builder, ESLogger logger, Version version, boolean readFileAsHash) throws IOException {
    final String checksum;
    final BytesRefBuilder fileHash = new BytesRefBuilder();
    try (final IndexInput in = directory.openInput(file, IOContext.READONCE)) {
        final long length;
        try {
            length = in.length();
            if (length < CodecUtil.footerLength()) {
                // truncated files trigger IAE if we seek negative... these files are really corrupted though
                throw new CorruptIndexException("Can't retrieve checksum from file: " + file + " file length must be >= " + CodecUtil.footerLength() + " but was: " + in.length(), in);
            }
            if (readFileAsHash) {
                final VerifyingIndexInput verifyingIndexInput = new VerifyingIndexInput(in); // additional safety we checksum the entire file we read the hash for...
                hashFile(fileHash, new InputStreamIndexInput(verifyingIndexInput, length), length);
                checksum = digestToString(verifyingIndexInput.verify());
            } else {
                checksum = digestToString(CodecUtil.retrieveChecksum(in));
            }

        } catch (Throwable ex) {
            logger.debug("Can retrieve checksum from file [{}]", ex, file);
            throw ex;
        }
        builder.put(file, new StoreFileMetaData(file, length, checksum, version, fileHash.get()));
    }
}
 
Example 3
Source File: MetaDataStateFormat.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
/**
 * Reads the state from a given file and compares the expected version against the actual version of
 * the state.
 */
public final T read(Path file) throws IOException {
    try (Directory dir = newDirectory(file.getParent())) {
        try (final IndexInput indexInput = dir.openInput(file.getFileName().toString(), IOContext.DEFAULT)) {
             // We checksum the entire file before we even go and parse it. If it's corrupted we barf right here.
            CodecUtil.checksumEntireFile(indexInput);
            CodecUtil.checkHeader(indexInput, STATE_FILE_CODEC, STATE_FILE_VERSION, STATE_FILE_VERSION);
            final XContentType xContentType = XContentType.values()[indexInput.readInt()];
            indexInput.readLong(); // version currently unused
            long filePointer = indexInput.getFilePointer();
            long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
            try (IndexInput slice = indexInput.slice("state_xcontent", filePointer, contentSize)) {
                try (XContentParser parser = XContentFactory.xContent(xContentType).createParser(new InputStreamIndexInput(slice, contentSize))) {
                    return fromXContent(parser);
                }
            }
        } catch(CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        }
    }
}
 
Example 4
Source File: ChecksumBlobStoreFormat.java    From crate with Apache License 2.0 6 votes vote down vote up
/**
 * Reads blob with specified name without resolving the blobName using using {@link #blobName} method.
 *
 * @param blobContainer blob container
 * @param blobName blob name
 */
public T readBlob(BlobContainer blobContainer, String blobName) throws IOException {
    final BytesReference bytes = Streams.readFully(blobContainer.readBlob(blobName));
    final String resourceDesc = "ChecksumBlobStoreFormat.readBlob(blob=\"" + blobName + "\")";
    try (ByteArrayIndexInput indexInput =
             new ByteArrayIndexInput(resourceDesc, BytesReference.toBytes(bytes))) {
        CodecUtil.checksumEntireFile(indexInput);
        CodecUtil.checkHeader(indexInput, codec, VERSION, VERSION);
        long filePointer = indexInput.getFilePointer();
        long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
        try (XContentParser parser = XContentHelper.createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE,
                                                                 bytes.slice((int) filePointer, (int) contentSize), XContentType.SMILE)) {
            return reader.apply(parser);
        }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
        // we trick this into a dedicated exception with the original stacktrace
        throw new CorruptStateException(ex);
    }
}
 
Example 5
Source File: Lucene50CompoundReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Create a new CompoundFileDirectory.
 */
// TODO: we should just pre-strip "entries" and append segment name up-front like simpletext?
// this need not be a "general purpose" directory anymore (it only writes index files)
public Lucene50CompoundReader(Directory directory, SegmentInfo si, IOContext context) throws IOException {
  this.directory = directory;
  this.segmentName = si.name;
  String dataFileName = IndexFileNames.segmentFileName(segmentName, "", Lucene50CompoundFormat.DATA_EXTENSION);
  String entriesFileName = IndexFileNames.segmentFileName(segmentName, "", Lucene50CompoundFormat.ENTRIES_EXTENSION);
  this.entries = readEntries(si.getId(), directory, entriesFileName);
  boolean success = false;

  long expectedLength = CodecUtil.indexHeaderLength(Lucene50CompoundFormat.DATA_CODEC, "");
  for(Map.Entry<String,FileEntry> ent : entries.entrySet()) {
    expectedLength += ent.getValue().length;
  }
  expectedLength += CodecUtil.footerLength(); 

  handle = directory.openInput(dataFileName, context);
  try {
    CodecUtil.checkIndexHeader(handle, Lucene50CompoundFormat.DATA_CODEC, version, version, si.getId(), "");
    
    // NOTE: data file is too costly to verify checksum against all the bytes on open,
    // but for now we at least verify proper structure of the checksum footer: which looks
    // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
    // such as file truncation.
    CodecUtil.retrieveChecksum(handle);

    // We also validate length, because e.g. if you strip 16 bytes off the .cfs we otherwise
    // would not detect it:
    if (handle.length() != expectedLength) {
      throw new CorruptIndexException("length should be " + expectedLength + " bytes, but is " + handle.length() + " instead", handle);
    }

    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(handle);
    }
  }
}
 
Example 6
Source File: OfflinePointReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length, byte[] reusableBuffer) throws IOException {
  this.bytesPerDoc = packedBytesLength + Integer.BYTES;
  this.packedValueLength = packedBytesLength;

  if ((start + length) * bytesPerDoc + CodecUtil.footerLength() > tempDir.fileLength(tempFileName)) {
    throw new IllegalArgumentException("requested slice is beyond the length of this file: start=" + start + " length=" + length + " bytesPerDoc=" + bytesPerDoc + " fileLength=" + tempDir.fileLength(tempFileName) + " tempFileName=" + tempFileName);
  }
  if (reusableBuffer == null) {
    throw new IllegalArgumentException("[reusableBuffer] cannot be null");
  }
  if (reusableBuffer.length < bytesPerDoc) {
    throw new IllegalArgumentException("Length of [reusableBuffer] must be bigger than " + bytesPerDoc);
  }

  this.maxPointOnHeap =  reusableBuffer.length / bytesPerDoc;
  // Best-effort checksumming:
  if (start == 0 && length*bytesPerDoc == tempDir.fileLength(tempFileName) - CodecUtil.footerLength()) {
    // If we are going to read the entire file, e.g. because BKDWriter is now
    // partitioning it, we open with checksums:
    in = tempDir.openChecksumInput(tempFileName, IOContext.READONCE);
  } else {
    // Since we are going to seek somewhere in the middle of a possibly huge
    // file, and not read all bytes from there, don't use ChecksumIndexInput here.
    // This is typically fine, because this same file will later be read fully,
    // at another level of the BKDWriter recursion
    in = tempDir.openInput(tempFileName, IOContext.READONCE);
  }

  name = tempFileName;

  long seekFP = start * bytesPerDoc;
  in.seek(seekFP);
  countLeft = length;
  this.onHeapBuffer = reusableBuffer;
  this.pointValue = new OfflinePointValue(onHeapBuffer, packedValueLength);
}
 
Example 7
Source File: Store.java    From crate with Apache License 2.0 5 votes vote down vote up
private static void checksumFromLuceneFile(Directory directory, String file, Map<String, StoreFileMetaData> builder,
        Logger logger, Version version, boolean readFileAsHash) throws IOException {
    final String checksum;
    final BytesRefBuilder fileHash = new BytesRefBuilder();
    try (IndexInput in = directory.openInput(file, IOContext.READONCE)) {
        final long length;
        try {
            length = in.length();
            if (length < CodecUtil.footerLength()) {
                // truncated files trigger IAE if we seek negative... these files are really corrupted though
                throw new CorruptIndexException("Can't retrieve checksum from file: " + file + " file length must be >= " + CodecUtil.footerLength() + " but was: " + in.length(), in);
            }
            if (readFileAsHash) {
                final VerifyingIndexInput verifyingIndexInput = new VerifyingIndexInput(in); // additional safety we checksum the entire file we read the hash for...
                hashFile(fileHash, new InputStreamIndexInput(verifyingIndexInput, length), length);
                checksum = digestToString(verifyingIndexInput.verify());
            } else {
                checksum = digestToString(CodecUtil.retrieveChecksum(in));
            }

        } catch (Exception ex) {
            logger.debug(() -> new ParameterizedMessage("Can retrieve checksum from file [{}]", file), ex);
            throw ex;
        }
        builder.put(file, new StoreFileMetaData(file, length, checksum, version, fileHash.get()));
    }
}
 
Example 8
Source File: MetaDataStateFormat.java    From crate with Apache License 2.0 5 votes vote down vote up
/**
 * Reads the state from a given file and compares the expected version against the actual version of
 * the state.
 */
public final T read(NamedXContentRegistry namedXContentRegistry, Path file) throws IOException {
    try (Directory dir = newDirectory(file.getParent())) {
        try (IndexInput indexInput = dir.openInput(file.getFileName().toString(), IOContext.DEFAULT)) {
            // We checksum the entire file before we even go and parse it. If it's corrupted we barf right here.
            CodecUtil.checksumEntireFile(indexInput);
            CodecUtil.checkHeader(indexInput, STATE_FILE_CODEC, MIN_COMPATIBLE_STATE_FILE_VERSION, STATE_FILE_VERSION);
            final XContentType xContentType = XContentType.values()[indexInput.readInt()];
            if (xContentType != FORMAT) {
                throw new IllegalStateException("expected state in " + file + " to be " + FORMAT + " format but was " + xContentType);
            }
            long filePointer = indexInput.getFilePointer();
            long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
            try (IndexInput slice = indexInput.slice("state_xcontent", filePointer, contentSize)) {
                try (InputStreamIndexInput in = new InputStreamIndexInput(slice, contentSize)) {
                    try (XContentParser parser = XContentFactory.xContent(FORMAT)
                            .createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE,
                                in)) {
                        return fromXContent(parser);
                    }
                }
            }
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        }
    }
}
 
Example 9
Source File: OfflineSorter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Constructs a ByteSequencesReader from the provided IndexInput */
public ByteSequencesReader(ChecksumIndexInput in, String name) {
  this.in = in;
  this.name = name;
  end = in.length() - CodecUtil.footerLength();
}