Java Code Examples for org.apache.lucene.codecs.CodecUtil#checkHeader()

The following examples show how to use org.apache.lucene.codecs.CodecUtil#checkHeader() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ChecksumBlobStoreFormat.java From Elasticsearch with Apache License 2.0

6 votes

/**
 * Reads blob with specified name without resolving the blobName using using {@link #blobName} method.
 *
 * @param blobContainer blob container
 * @param blobName blob name
 */
public T readBlob(BlobContainer blobContainer, String blobName) throws IOException {
    try (InputStream inputStream = blobContainer.readBlob(blobName)) {
        byte[] bytes = ByteStreams.toByteArray(inputStream);
        final String resourceDesc = "ChecksumBlobStoreFormat.readBlob(blob=\"" + blobName + "\")";
        try (ByteArrayIndexInput indexInput = new ByteArrayIndexInput(resourceDesc, bytes)) {
            CodecUtil.checksumEntireFile(indexInput);
            CodecUtil.checkHeader(indexInput, codec, VERSION, VERSION);
            long filePointer = indexInput.getFilePointer();
            long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
            BytesReference bytesReference = new BytesArray(bytes, (int) filePointer, (int) contentSize);
            return read(bytesReference);
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        }
    }
}

Example 2

Source File: MetaDataStateFormat.java From Elasticsearch with Apache License 2.0

6 votes

/**
 * Reads the state from a given file and compares the expected version against the actual version of
 * the state.
 */
public final T read(Path file) throws IOException {
    try (Directory dir = newDirectory(file.getParent())) {
        try (final IndexInput indexInput = dir.openInput(file.getFileName().toString(), IOContext.DEFAULT)) {
             // We checksum the entire file before we even go and parse it. If it's corrupted we barf right here.
            CodecUtil.checksumEntireFile(indexInput);
            CodecUtil.checkHeader(indexInput, STATE_FILE_CODEC, STATE_FILE_VERSION, STATE_FILE_VERSION);
            final XContentType xContentType = XContentType.values()[indexInput.readInt()];
            indexInput.readLong(); // version currently unused
            long filePointer = indexInput.getFilePointer();
            long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
            try (IndexInput slice = indexInput.slice("state_xcontent", filePointer, contentSize)) {
                try (XContentParser parser = XContentFactory.xContent(xContentType).createParser(new InputStreamIndexInput(slice, contentSize))) {
                    return fromXContent(parser);
                }
            }
        } catch(CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        }
    }
}

Example 3

Source File: FreeTextSuggester.java From lucene-solr with Apache License 2.0

6 votes

@Override
public boolean load(DataInput input) throws IOException {
  CodecUtil.checkHeader(input, CODEC_NAME, VERSION_START, VERSION_START);
  count = input.readVLong();
  byte separatorOrig = input.readByte();
  if (separatorOrig != separator) {
    throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig);
  }
  int gramsOrig = input.readVInt();
  if (gramsOrig != grams) {
    throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig);
  }
  totTokens = input.readVLong();

  fst = new FST<>(input, input, PositiveIntOutputs.getSingleton());

  return true;
}

Example 4

Source File: RAMOnlyPostingsFormat.java From lucene-solr with Apache License 2.0

6 votes

@Override
public FieldsProducer fieldsProducer(SegmentReadState readState)
  throws IOException {

  // Load our ID:
  final String idFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name, readState.segmentSuffix, ID_EXTENSION);
  IndexInput in = readState.directory.openInput(idFileName, readState.context);
  boolean success = false;
  final int id;
  try {
    CodecUtil.checkHeader(in, RAM_ONLY_NAME, VERSION_START, VERSION_LATEST);
    id = in.readVInt();
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(in);
    } else {
      IOUtils.close(in);
    }
  }
  
  synchronized(state) {
    return state.get(id);
  }
}

Example 5

Source File: ChecksumBlobStoreFormat.java From crate with Apache License 2.0

6 votes

/**
 * Reads blob with specified name without resolving the blobName using using {@link #blobName} method.
 *
 * @param blobContainer blob container
 * @param blobName blob name
 */
public T readBlob(BlobContainer blobContainer, String blobName) throws IOException {
    final BytesReference bytes = Streams.readFully(blobContainer.readBlob(blobName));
    final String resourceDesc = "ChecksumBlobStoreFormat.readBlob(blob=\"" + blobName + "\")";
    try (ByteArrayIndexInput indexInput =
             new ByteArrayIndexInput(resourceDesc, BytesReference.toBytes(bytes))) {
        CodecUtil.checksumEntireFile(indexInput);
        CodecUtil.checkHeader(indexInput, codec, VERSION, VERSION);
        long filePointer = indexInput.getFilePointer();
        long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
        try (XContentParser parser = XContentHelper.createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE,
                                                                 bytes.slice((int) filePointer, (int) contentSize), XContentType.SMILE)) {
            return reader.apply(parser);
        }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
        // we trick this into a dedicated exception with the original stacktrace
        throw new CorruptStateException(ex);
    }
}

Example 6

Source File: Completion090PostingsFormat.java From Elasticsearch with Apache License 2.0

5 votes

public CompletionFieldsProducer(SegmentReadState state) throws IOException {
    String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
    IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
    version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
    FieldsProducer delegateProducer = null;
    boolean success = false;
    try {
        PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
        String providerName = input.readString();
        CompletionLookupProvider completionLookupProvider = providers.get(providerName);
        if (completionLookupProvider == null) {
            throw new IllegalStateException("no provider with name [" + providerName + "] registered");
        }
        // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
        delegateProducer = delegatePostingsFormat.fieldsProducer(state);
        /*
         * If we are merging we don't load the FSTs at all such that we
         * don't consume so much memory during merge
         */
        if (state.context.context != Context.MERGE) {
            // TODO: maybe we can do this in a fully lazy fashion based on some configuration
            // eventually we should have some kind of curciut breaker that prevents us from going OOM here
            // with some configuration
            this.lookupFactory = completionLookupProvider.load(input);
        } else {
            this.lookupFactory = null;
        }
        this.delegateProducer = delegateProducer;
        success = true;
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(delegateProducer, input);
        } else {
            IOUtils.close(input);
        }
    }
}

Example 7

Source File: Blur022SegmentInfoReader.java From incubator-retired-blur with Apache License 2.0

5 votes

@Override
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
  final String fileName = IndexFileNames.segmentFileName(segment, "", Blur022SegmentInfoFormat.SI_EXTENSION);
  final IndexInput input = dir.openInput(fileName, context);
  boolean success = false;
  try {
    CodecUtil.checkHeader(input, Blur022SegmentInfoFormat.CODEC_NAME, Blur022SegmentInfoFormat.VERSION_START,
        Blur022SegmentInfoFormat.VERSION_CURRENT);
    final String version = input.readString();
    final int docCount = input.readInt();
    if (docCount < 0) {
      throw new CorruptIndexException("invalid docCount: " + docCount + " (resource=" + input + ")");
    }
    final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
    final Map<String, String> diagnostics = input.readStringStringMap();
    final Map<String, String> attributes = input.readStringStringMap();
    final Set<String> files = input.readStringSet();

    if (input.getFilePointer() != input.length()) {
      throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read "
          + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
    }

    final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics,
        Collections.unmodifiableMap(attributes));
    si.setFiles(files);

    success = true;

    return si;

  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(input);
    } else {
      input.close();
    }
  }
}

Example 8

Source File: DiskDocValuesProducer.java From incubator-retired-blur with Apache License 2.0

5 votes

DiskDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec,
    String metaExtension) throws IOException {
  String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
  // read in the entries from the metadata file.
  IndexInput in = state.directory.openInput(metaName, state.context);
  boolean success = false;
  try {
    CodecUtil.checkHeader(in, metaCodec, DiskDocValuesFormat.VERSION_START, DiskDocValuesFormat.VERSION_START);
    numerics = new ConcurrentHashMap<Integer, NumericEntry>();
    ords = new ConcurrentHashMap<Integer, NumericEntry>();
    ordIndexes = new ConcurrentHashMap<Integer, NumericEntry>();
    binaries = new ConcurrentHashMap<Integer, BinaryEntry>();
    _binaryDocValuesCache = new ConcurrentHashMap<Integer, BinaryDocValues>();
    _numericDocValuesCache = new ConcurrentHashMap<Integer, NumericDocValues>();
    _sortedDocValuesCache = new ConcurrentHashMap<Integer, SortedDocValues>();
    _sortedSetDocValuesCache = new ConcurrentHashMap<Integer, SortedSetDocValues>();
    readFields(in, state.fieldInfos);
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
  }

  String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
  data = state.directory.openInput(dataName, state.context);
  CodecUtil.checkHeader(data, dataCodec, DiskDocValuesFormat.VERSION_START, DiskDocValuesFormat.VERSION_START);
}

Example 9

Source File: MetaDataStateFormat.java From crate with Apache License 2.0

5 votes

/**
 * Reads the state from a given file and compares the expected version against the actual version of
 * the state.
 */
public final T read(NamedXContentRegistry namedXContentRegistry, Path file) throws IOException {
    try (Directory dir = newDirectory(file.getParent())) {
        try (IndexInput indexInput = dir.openInput(file.getFileName().toString(), IOContext.DEFAULT)) {
            // We checksum the entire file before we even go and parse it. If it's corrupted we barf right here.
            CodecUtil.checksumEntireFile(indexInput);
            CodecUtil.checkHeader(indexInput, STATE_FILE_CODEC, MIN_COMPATIBLE_STATE_FILE_VERSION, STATE_FILE_VERSION);
            final XContentType xContentType = XContentType.values()[indexInput.readInt()];
            if (xContentType != FORMAT) {
                throw new IllegalStateException("expected state in " + file + " to be " + FORMAT + " format but was " + xContentType);
            }
            long filePointer = indexInput.getFilePointer();
            long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
            try (IndexInput slice = indexInput.slice("state_xcontent", filePointer, contentSize)) {
                try (InputStreamIndexInput in = new InputStreamIndexInput(slice, contentSize)) {
                    try (XContentParser parser = XContentFactory.xContent(FORMAT)
                            .createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE,
                                in)) {
                        return fromXContent(parser);
                    }
                }
            }
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        }
    }
}

Example 10

Source File: Lucene80DocValuesConsumer.java From lucene-solr with Apache License 2.0

4 votes

void writeMetaData() throws IOException {
  if (totalChunks == 0) {
    return;
  }
  
  long startDMW = data.getFilePointer();
  meta.writeLong(startDMW);
  
  meta.writeVInt(totalChunks);
  meta.writeVInt(Lucene80DocValuesFormat.BINARY_BLOCK_SHIFT);
  meta.writeVInt(maxUncompressedBlockLength);
  meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
  

  CodecUtil.writeFooter(tempBinaryOffsets);
  IOUtils.close(tempBinaryOffsets);             
  //write the compressed block offsets info to the meta file by reading from temp file
  try (ChecksumIndexInput filePointersIn = state.directory.openChecksumInput(tempBinaryOffsets.getName(), IOContext.READONCE)) {
    CodecUtil.checkHeader(filePointersIn, Lucene80DocValuesFormat.META_CODEC + "FilePointers", Lucene80DocValuesFormat.VERSION_CURRENT,
      Lucene80DocValuesFormat.VERSION_CURRENT);
    Throwable priorE = null;
    try {
      final DirectMonotonicWriter filePointers = DirectMonotonicWriter.getInstance(meta, data, totalChunks, DIRECT_MONOTONIC_BLOCK_SHIFT);
      long fp = blockAddressesStart;
      for (int i = 0; i < totalChunks; ++i) {
        filePointers.add(fp);
        fp += filePointersIn.readVLong();
      }
      if (maxPointer < fp) {
        throw new CorruptIndexException("File pointers don't add up ("+fp+" vs expected "+maxPointer+")", filePointersIn);
      }
      filePointers.finish();
    } catch (Throwable e) {
      priorE = e;
    } finally {
      CodecUtil.checkFooter(filePointersIn, priorE);
    }
  }
  // Write the length of the DMW block in the data 
  meta.writeLong(data.getFilePointer() - startDMW);
}