Java Code Examples for org.apache.lucene.codecs.CodecUtil#checkIndexHeader()

The following examples show how to use org.apache.lucene.codecs.CodecUtil#checkIndexHeader() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: Lucene50CompoundReader.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Create a new CompoundFileDirectory.
 */
// TODO: we should just pre-strip "entries" and append segment name up-front like simpletext?
// this need not be a "general purpose" directory anymore (it only writes index files)
public Lucene50CompoundReader(Directory directory, SegmentInfo si, IOContext context) throws IOException {
  this.directory = directory;
  this.segmentName = si.name;
  String dataFileName = IndexFileNames.segmentFileName(segmentName, "", Lucene50CompoundFormat.DATA_EXTENSION);
  String entriesFileName = IndexFileNames.segmentFileName(segmentName, "", Lucene50CompoundFormat.ENTRIES_EXTENSION);
  this.entries = readEntries(si.getId(), directory, entriesFileName);
  boolean success = false;

  long expectedLength = CodecUtil.indexHeaderLength(Lucene50CompoundFormat.DATA_CODEC, "");
  for(Map.Entry<String,FileEntry> ent : entries.entrySet()) {
    expectedLength += ent.getValue().length;
  }
  expectedLength += CodecUtil.footerLength(); 

  handle = directory.openInput(dataFileName, context);
  try {
    CodecUtil.checkIndexHeader(handle, Lucene50CompoundFormat.DATA_CODEC, version, version, si.getId(), "");
    
    // NOTE: data file is too costly to verify checksum against all the bytes on open,
    // but for now we at least verify proper structure of the checksum footer: which looks
    // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
    // such as file truncation.
    CodecUtil.retrieveChecksum(handle);

    // We also validate length, because e.g. if you strip 16 bytes off the .cfs we otherwise
    // would not detect it:
    if (handle.length() != expectedLength) {
      throw new CorruptIndexException("length should be " + expectedLength + " bytes, but is " + handle.length() + " instead", handle);
    }

    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(handle);
    }
  }
}

Example 2

Source File: Lucene50PostingsReader.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
  // Make sure we are talking to the matching postings writer
  CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
  final int indexBlockSize = termsIn.readVInt();
  if (indexBlockSize != BLOCK_SIZE) {
    throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
  }
}

Example 3

Source File: Lucene84PostingsReader.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
  // Make sure we are talking to the matching postings writer
  CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
  final int indexBlockSize = termsIn.readVInt();
  if (indexBlockSize != BLOCK_SIZE) {
    throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
  }
}

Example 4

Source File: FieldsIndexReader.java From lucene-solr with Apache License 2.0

5 votes

FieldsIndexReader(Directory dir, String name, String suffix, String extensionPrefix, String codecName, byte[] id) throws IOException {
  try (ChecksumIndexInput metaIn = dir.openChecksumInput(IndexFileNames.segmentFileName(name, suffix, extensionPrefix + FIELDS_META_EXTENSION_SUFFIX), IOContext.READONCE)) {
    Throwable priorE = null;
    try {
      CodecUtil.checkIndexHeader(metaIn, codecName + "Meta", VERSION_START, VERSION_CURRENT, id, suffix);
      maxDoc = metaIn.readInt();
      blockShift = metaIn.readInt();
      numChunks = metaIn.readInt();
      docsStartPointer = metaIn.readLong();
      docsMeta = DirectMonotonicReader.loadMeta(metaIn, numChunks, blockShift);
      docsEndPointer = startPointersStartPointer = metaIn.readLong();
      startPointersMeta = DirectMonotonicReader.loadMeta(metaIn, numChunks, blockShift);
      startPointersEndPointer = metaIn.readLong();
      maxPointer = metaIn.readLong();
    } finally {
      CodecUtil.checkFooter(metaIn, priorE);
    }
  }

  indexInput = dir.openInput(IndexFileNames.segmentFileName(name, suffix, extensionPrefix + FIELDS_INDEX_EXTENSION_SUFFIX), IOContext.READ);
  boolean success = false;
  try {
    CodecUtil.checkIndexHeader(indexInput, codecName + "Idx", VERSION_START, VERSION_CURRENT, id, suffix);
    CodecUtil.retrieveChecksum(indexInput);
    success = true;
  } finally {
    if (success == false) {
      indexInput.close();
    }
  }
  final RandomAccessInput docsSlice = indexInput.randomAccessSlice(docsStartPointer, docsEndPointer - docsStartPointer);
  final RandomAccessInput startPointersSlice = indexInput.randomAccessSlice(startPointersStartPointer, startPointersEndPointer - startPointersStartPointer);
  docs = DirectMonotonicReader.getInstance(docsMeta, docsSlice);
  startPointers = DirectMonotonicReader.getInstance(startPointersMeta, startPointersSlice);
}

Example 5

Source File: Lucene80NormsProducer.java From lucene-solr with Apache License 2.0

5 votes

Lucene80NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
  maxDoc = state.segmentInfo.maxDoc();
  String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
  int version = -1;

  // read in the entries from the metadata file.
  try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
    Throwable priorE = null;
    try {
      version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
      readFields(in, state.fieldInfos);
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(in, priorE);
    }
  }

  String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
  data = state.directory.openInput(dataName, state.context);
  boolean success = false;
  try {
    final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    if (version != version2) {
      throw new CorruptIndexException("Format versions mismatch: meta=" + version + ",data=" + version2, data);
    }

    // NOTE: data file is too costly to verify checksum against all the bytes on open,
    // but for now we at least verify proper structure of the checksum footer: which looks
    // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
    // such as file truncation.
    CodecUtil.retrieveChecksum(data);

    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(this.data);
    }
  }
}

Example 6

Source File: Lucene50CompoundReader.java From lucene-solr with Apache License 2.0

5 votes

/** Helper method that reads CFS entries from an input stream */
private Map<String, FileEntry> readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
  Map<String,FileEntry> mapping = null;
  try (ChecksumIndexInput entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE)) {
    Throwable priorE = null;
    try {
      version = CodecUtil.checkIndexHeader(entriesStream, Lucene50CompoundFormat.ENTRY_CODEC, 
                                                            Lucene50CompoundFormat.VERSION_START, 
                                                            Lucene50CompoundFormat.VERSION_CURRENT, segmentID, "");
      final int numEntries = entriesStream.readVInt();
      mapping = new HashMap<>(numEntries);
      for (int i = 0; i < numEntries; i++) {
        final FileEntry fileEntry = new FileEntry();
        final String id = entriesStream.readString();
        FileEntry previous = mapping.put(id, fileEntry);
        if (previous != null) {
          throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS ", entriesStream);
        }
        fileEntry.offset = entriesStream.readLong();
        fileEntry.length = entriesStream.readLong();
      }
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(entriesStream, priorE);
    }
  }
  return Collections.unmodifiableMap(mapping);
}

Example 7

Source File: FSTTermsReader.java From lucene-solr with Apache License 2.0

5 votes

public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
  final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

  this.postingsReader = postingsReader;
  final IndexInput in = state.directory.openInput(termsFileName, state.context);

  boolean success = false;
  try {
    CodecUtil.checkIndexHeader(in, FSTTermsWriter.TERMS_CODEC_NAME,
                                     FSTTermsWriter.TERMS_VERSION_START,
                                     FSTTermsWriter.TERMS_VERSION_CURRENT,
                                     state.segmentInfo.getId(), state.segmentSuffix);
    CodecUtil.checksumEntireFile(in);
    this.postingsReader.init(in, state);
    seekDir(in);

    final FieldInfos fieldInfos = state.fieldInfos;
    final int numFields = in.readVInt();
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = in.readVInt();
      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
      long numTerms = in.readVLong();
      long sumTotalTermFreq = in.readVLong();
      // if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
      long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
      int docCount = in.readVInt();
      TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
      TermsReader previous = fields.put(fieldInfo.name, current);
      checkFieldSummary(state.segmentInfo, in, current, previous);
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
  }
}

Example 8

Source File: Lucene50LiveDocsFormat.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
  long gen = info.getDelGen();
  String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
  final int length = info.info.maxDoc();
  try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) {
    Throwable priorE = null;
    try {
      CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT, 
                                   info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
      long data[] = new long[FixedBitSet.bits2words(length)];
      for (int i = 0; i < data.length; i++) {
        data[i] = input.readLong();
      }
      FixedBitSet fbs = new FixedBitSet(data, length);
      if (fbs.length() - fbs.cardinality() != info.getDelCount()) {
        throw new CorruptIndexException("bits.deleted=" + (fbs.length() - fbs.cardinality()) + 
                                        " info.delcount=" + info.getDelCount(), input);
      }
      return fbs.asReadOnlyBits();
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(input, priorE);
    }
  }
  throw new AssertionError();
}

Example 9

Source File: IDVersionPostingsReader.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
  // Make sure we are talking to the matching postings writer
  CodecUtil.checkIndexHeader(termsIn,
                               IDVersionPostingsWriter.TERMS_CODEC,
                               IDVersionPostingsWriter.VERSION_START,
                               IDVersionPostingsWriter.VERSION_CURRENT,
                               state.segmentInfo.getId(), state.segmentSuffix);
}

Example 10

Source File: CompletionFieldsProducer.java From lucene-solr with Apache License 2.0

5 votes

CompletionFieldsProducer(String codecName, SegmentReadState state, FSTLoadMode fstLoadMode) throws IOException {
  String indexFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, INDEX_EXTENSION);
  delegateFieldsProducer = null;
  boolean success = false;

  try (ChecksumIndexInput index = state.directory.openChecksumInput(indexFile, state.context)) {
    // open up dict file containing all fsts
    String dictFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DICT_EXTENSION);
    dictIn = state.directory.openInput(dictFile, state.context);
    CodecUtil.checkIndexHeader(dictIn, codecName, COMPLETION_CODEC_VERSION, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    // just validate the footer for the dictIn
    CodecUtil.retrieveChecksum(dictIn);

    // open up index file (fieldNumber, offset)
    CodecUtil.checkIndexHeader(index, codecName, COMPLETION_CODEC_VERSION, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    // load delegate PF
    PostingsFormat delegatePostingsFormat = PostingsFormat.forName(index.readString());
    delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state);

    // read suggest field numbers and their offsets in the terms file from index
    int numFields = index.readVInt();
    readers = new HashMap<>(numFields);
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = index.readVInt();
      long offset = index.readVLong();
      long minWeight = index.readVLong();
      long maxWeight = index.readVLong();
      byte type = index.readByte();
      FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNumber);
      // we don't load the FST yet
      readers.put(fieldInfo.name, new CompletionsTermsReader(dictIn, offset, minWeight, maxWeight, type, fstLoadMode));
    }
    CodecUtil.checkFooter(index);
    success = true;
  } finally {
    if (success == false) {
      IOUtils.closeWhileHandlingException(delegateFieldsProducer, dictIn);
    }
  }
}

Example 11

Source File: BloomFilteringPostingsFormat.java From lucene-solr with Apache License 2.0

5 votes

public BloomFilteredFieldsProducer(SegmentReadState state)
    throws IOException {
  
  String bloomFileName = IndexFileNames.segmentFileName(
      state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
  ChecksumIndexInput bloomIn = null;
  boolean success = false;
  try {
    bloomIn = state.directory.openChecksumInput(bloomFileName, state.context);
    CodecUtil.checkIndexHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    // // Load the hash function used in the BloomFilter
    // hashFunction = HashFunction.forName(bloomIn.readString());
    // Load the delegate postings format
    PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
        .readString());
    
    this.delegateFieldsProducer = delegatePostingsFormat
        .fieldsProducer(state);
    int numBlooms = bloomIn.readInt();
    for (int i = 0; i < numBlooms; i++) {
      int fieldNum = bloomIn.readInt();
      FuzzySet bloom = FuzzySet.deserialize(bloomIn);
      FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
      bloomsByFieldName.put(fieldInfo.name, bloom);
    }
    CodecUtil.checkFooter(bloomIn);
    IOUtils.close(bloomIn);
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer);
    }
  }
}

Example 12

Source File: BaseCompoundFormatTestCase.java From lucene-solr with Apache License 2.0

4 votes

public void testManySubFiles() throws IOException {
  final MockDirectoryWrapper dir = newMockFSDirectory(createTempDir("CFSManySubFiles"));
  
  final int FILE_COUNT = atLeast(500);
  
  List<String> files = new ArrayList<>();
  SegmentInfo si = newSegmentInfo(dir, "_123");
  for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
    String file = "_123." + fileIdx;
    files.add(file);
    try (IndexOutput out = dir.createOutput(file, newIOContext(random()))) {
      CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
      out.writeByte((byte) fileIdx);
      CodecUtil.writeFooter(out);
    }
  }
  
  assertEquals(0, dir.getFileHandleCount());
  
  si.setFiles(files);
  si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
  Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
  
  final IndexInput[] ins = new IndexInput[FILE_COUNT];
  for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
    ins[fileIdx] = cfs.openInput("_123." + fileIdx, newIOContext(random()));
    CodecUtil.checkIndexHeader(ins[fileIdx], "Foo", 0, 0, si.getId(), "suffix");
  }
  
  assertEquals(1, dir.getFileHandleCount());

  for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
    assertEquals((byte) fileIdx, ins[fileIdx].readByte());
  }
  
  assertEquals(1, dir.getFileHandleCount());
  
  for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
    ins[fileIdx].close();
  }
  cfs.close();
  
  dir.close();
}

Example 13

Source File: Lucene80DocValuesProducer.java From lucene-solr with Apache License 2.0

4 votes

/** expert: instantiates a new reader */
Lucene80DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
  String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
  this.maxDoc = state.segmentInfo.maxDoc();
  ramBytesUsed = RamUsageEstimator.shallowSizeOfInstance(getClass());

  // read in the entries from the metadata file.
  try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
    Throwable priorE = null;
    
    try {
      version = CodecUtil.checkIndexHeader(in, metaCodec,
                                      Lucene80DocValuesFormat.VERSION_START,
                                      Lucene80DocValuesFormat.VERSION_CURRENT,
                                      state.segmentInfo.getId(),
                                      state.segmentSuffix);
      readFields(in, state.fieldInfos);
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(in, priorE);
    }
  }

  String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
  this.data = state.directory.openInput(dataName, state.context);
  boolean success = false;
  try {
    final int version2 = CodecUtil.checkIndexHeader(data, dataCodec,
                                               Lucene80DocValuesFormat.VERSION_START,
                                               Lucene80DocValuesFormat.VERSION_CURRENT,
                                               state.segmentInfo.getId(),
                                               state.segmentSuffix);
    if (version != version2) {
      throw new CorruptIndexException("Format versions mismatch: meta=" + version + ", data=" + version2, data);
    }

    // NOTE: data file is too costly to verify checksum against all the bytes on open,
    // but for now we at least verify proper structure of the checksum footer: which looks
    // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
    // such as file truncation.
    CodecUtil.retrieveChecksum(data);

    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(this.data);
    }
  }
}

Example 14

Source File: UniformSplitTermsReader.java From lucene-solr with Apache License 2.0

4 votes

/**
 * @see #UniformSplitTermsReader(PostingsReaderBase, SegmentReadState, BlockDecoder, boolean)
 */
protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
                                  boolean dictionaryOnHeap, FieldMetadata.Serializer fieldMetadataReader,
                                  String codecName, int versionStart, int versionCurrent,
                                  String termsBlocksExtension, String dictionaryExtension) throws IOException {
   IndexInput dictionaryInput = null;
   IndexInput blockInput = null;
   boolean success = false;
   try {
     this.postingsReader = postingsReader;
     String segmentName = state.segmentInfo.name;
     String termsName = IndexFileNames.segmentFileName(segmentName, state.segmentSuffix, termsBlocksExtension);
     blockInput = state.directory.openInput(termsName, state.context);

     version = CodecUtil.checkIndexHeader(blockInput, codecName, versionStart,
         versionCurrent, state.segmentInfo.getId(), state.segmentSuffix);
     String indexName = IndexFileNames.segmentFileName(segmentName, state.segmentSuffix, dictionaryExtension);
     dictionaryInput = state.directory.openInput(indexName, state.context);

     CodecUtil.checkIndexHeader(dictionaryInput, codecName, version, version, state.segmentInfo.getId(), state.segmentSuffix);
     CodecUtil.checksumEntireFile(dictionaryInput);

     postingsReader.init(blockInput, state);
     CodecUtil.retrieveChecksum(blockInput);

     seekFieldsMetadata(blockInput);
     Collection<FieldMetadata> fieldMetadataCollection =
         readFieldsMetadata(blockInput, blockDecoder, state.fieldInfos, fieldMetadataReader, state.segmentInfo.maxDoc());

     fieldToTermsMap = new HashMap<>();
     this.blockInput = blockInput;
     this.dictionaryInput = dictionaryInput;

     fillFieldMap(postingsReader, state, blockDecoder, dictionaryOnHeap, dictionaryInput, blockInput, fieldMetadataCollection, state.fieldInfos);

     List<String> fieldNames = new ArrayList<>(fieldToTermsMap.keySet());
     Collections.sort(fieldNames);
     sortedFieldNames = Collections.unmodifiableList(fieldNames);

     success = true;
   } finally {
     if (!success) {
       IOUtils.closeWhileHandlingException(blockInput, dictionaryInput);
     }
   }
 }

Example 15

Source File: Lucene84PostingsReader.java From lucene-solr with Apache License 2.0

4 votes

/** Sole constructor. */
public Lucene84PostingsReader(SegmentReadState state) throws IOException {
  boolean success = false;
  IndexInput docIn = null;
  IndexInput posIn = null;
  IndexInput payIn = null;
  
  // NOTE: these data files are too costly to verify checksum against all the bytes on open,
  // but for now we at least verify proper structure of the checksum footer: which looks
  // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
  // such as file truncation.
  
  String docName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene84PostingsFormat.DOC_EXTENSION);
  try {
    docIn = state.directory.openInput(docName, state.context);
    version = CodecUtil.checkIndexHeader(docIn, DOC_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    CodecUtil.retrieveChecksum(docIn);

    if (state.fieldInfos.hasProx()) {
      String proxName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene84PostingsFormat.POS_EXTENSION);
      posIn = state.directory.openInput(proxName, state.context);
      CodecUtil.checkIndexHeader(posIn, POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
      CodecUtil.retrieveChecksum(posIn);

      if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
        String payName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene84PostingsFormat.PAY_EXTENSION);
        payIn = state.directory.openInput(payName, state.context);
        CodecUtil.checkIndexHeader(payIn, PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
        CodecUtil.retrieveChecksum(payIn);
      }
    }

    this.docIn = docIn;
    this.posIn = posIn;
    this.payIn = payIn;
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(docIn, posIn, payIn);
    }
  }
}

Example 16

Source File: FixedGapTermsIndexReader.java From lucene-solr with Apache License 2.0

4 votes

public FixedGapTermsIndexReader(SegmentReadState state) throws IOException {
  final PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS);
  
  String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
                                                   state.segmentSuffix, 
                                                   FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION);
  final IndexInput in = state.directory.openInput(fileName, state.context);
  
  boolean success = false;

  try {
    
    CodecUtil.checkIndexHeader(in, FixedGapTermsIndexWriter.CODEC_NAME,
                                     FixedGapTermsIndexWriter.VERSION_CURRENT, 
                                     FixedGapTermsIndexWriter.VERSION_CURRENT,
                                     state.segmentInfo.getId(), state.segmentSuffix);
    
    CodecUtil.checksumEntireFile(in);
    
    indexInterval = in.readVInt();
    if (indexInterval < 1) {
      throw new CorruptIndexException("invalid indexInterval: " + indexInterval, in);
    }
    packedIntsVersion = in.readVInt();
    blocksize = in.readVInt();
    
    seekDir(in);

    // Read directory
    final int numFields = in.readVInt();     
    if (numFields < 0) {
      throw new CorruptIndexException("invalid numFields: " + numFields, in);
    }
    //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
    for(int i=0;i<numFields;i++) {
      final int field = in.readVInt();
      final long numIndexTerms = in.readVInt(); // TODO: change this to a vLong if we fix writer to support > 2B index terms
      if (numIndexTerms < 0) {
        throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms, in);
      }
      final long termsStart = in.readVLong();
      final long indexStart = in.readVLong();
      final long packedIndexStart = in.readVLong();
      final long packedOffsetsStart = in.readVLong();
      if (packedIndexStart < indexStart) {
        throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms, in);
      }
      final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
      FieldIndexData previous = fields.put(fieldInfo.name, new FieldIndexData(in, termBytes, indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms));
      if (previous != null) {
        throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
      }
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
    termBytesReader = termBytes.freeze(true);
  }
}

Example 17

Source File: Lucene86PointsReader.java From lucene-solr with Apache License 2.0

4 votes

/** Sole constructor */
public Lucene86PointsReader(SegmentReadState readState) throws IOException {
  this.readState = readState;

  String metaFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name,
      readState.segmentSuffix,
      Lucene86PointsFormat.META_EXTENSION);
  String indexFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name,
      readState.segmentSuffix,
      Lucene86PointsFormat.INDEX_EXTENSION);
  String dataFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name,
      readState.segmentSuffix,
      Lucene86PointsFormat.DATA_EXTENSION);

  boolean success = false;
  try {
    indexIn = readState.directory.openInput(indexFileName, readState.context);
    CodecUtil.checkIndexHeader(indexIn,
        Lucene86PointsFormat.INDEX_CODEC_NAME,
        Lucene86PointsFormat.VERSION_START,
        Lucene86PointsFormat.VERSION_CURRENT,
        readState.segmentInfo.getId(),
        readState.segmentSuffix);

    dataIn = readState.directory.openInput(dataFileName, readState.context);
    CodecUtil.checkIndexHeader(dataIn,
        Lucene86PointsFormat.DATA_CODEC_NAME,
        Lucene86PointsFormat.VERSION_START,
        Lucene86PointsFormat.VERSION_CURRENT,
        readState.segmentInfo.getId(),
        readState.segmentSuffix);

    long indexLength = -1, dataLength = -1;
    try (ChecksumIndexInput metaIn = readState.directory.openChecksumInput(metaFileName, readState.context)) {
      Throwable priorE = null;
      try {
        CodecUtil.checkIndexHeader(metaIn,
            Lucene86PointsFormat.META_CODEC_NAME,
            Lucene86PointsFormat.VERSION_START,
            Lucene86PointsFormat.VERSION_CURRENT,
            readState.segmentInfo.getId(),
            readState.segmentSuffix);

        while (true) {
          int fieldNumber = metaIn.readInt();
          if (fieldNumber == -1) {
            break;
          } else if (fieldNumber < 0) {
            throw new CorruptIndexException("Illegal field number: " + fieldNumber, metaIn);
          }
          BKDReader reader = new BKDReader(metaIn, indexIn, dataIn);
          readers.put(fieldNumber, reader);
        }
        indexLength = metaIn.readLong();
        dataLength = metaIn.readLong();
      } catch (Throwable t) {
        priorE = t;
      } finally {
        CodecUtil.checkFooter(metaIn, priorE);
      }
    }
    // At this point, checksums of the meta file have been validated so we
    // know that indexLength and dataLength are very likely correct.
    CodecUtil.retrieveChecksum(indexIn, indexLength);
    CodecUtil.retrieveChecksum(dataIn, dataLength);
    success = true;
  } finally {
    if (success == false) {
      IOUtils.closeWhileHandlingException(this);
    }
  }

}

Example 18

Source File: Lucene50PostingsReader.java From lucene-solr with Apache License 2.0

4 votes

/** Sole constructor. */
public Lucene50PostingsReader(SegmentReadState state) throws IOException {
  boolean success = false;
  IndexInput docIn = null;
  IndexInput posIn = null;
  IndexInput payIn = null;
  
  // NOTE: these data files are too costly to verify checksum against all the bytes on open,
  // but for now we at least verify proper structure of the checksum footer: which looks
  // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
  // such as file truncation.
  
  String docName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.DOC_EXTENSION);
  try {
    docIn = state.directory.openInput(docName, state.context);
    version = CodecUtil.checkIndexHeader(docIn, DOC_CODEC, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    forUtil = new ForUtil(docIn);
    CodecUtil.retrieveChecksum(docIn);

    if (state.fieldInfos.hasProx()) {
      String proxName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.POS_EXTENSION);
      posIn = state.directory.openInput(proxName, state.context);
      CodecUtil.checkIndexHeader(posIn, POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
      CodecUtil.retrieveChecksum(posIn);

      if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
        String payName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene50PostingsFormat.PAY_EXTENSION);
        payIn = state.directory.openInput(payName, state.context);
        CodecUtil.checkIndexHeader(payIn, PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
        CodecUtil.retrieveChecksum(payIn);
      }
    }

    this.docIn = docIn;
    this.posIn = posIn;
    this.payIn = payIn;
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(docIn, posIn, payIn);
    }
  }
}

Example 19

Source File: VariableGapTermsIndexReader.java From lucene-solr with Apache License 2.0

4 votes

public VariableGapTermsIndexReader(SegmentReadState state) throws IOException {
  String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, 
                                                   state.segmentSuffix, 
                                                   VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION);
  final IndexInput in = state.directory.openInput(fileName, new IOContext(state.context, true));
  boolean success = false;

  try {
    
    CodecUtil.checkIndexHeader(in, VariableGapTermsIndexWriter.CODEC_NAME,
                                     VariableGapTermsIndexWriter.VERSION_START,
                                     VariableGapTermsIndexWriter.VERSION_CURRENT,
                                     state.segmentInfo.getId(), state.segmentSuffix);
    
    CodecUtil.checksumEntireFile(in);

    seekDir(in);

    // Read directory
    final int numFields = in.readVInt();
    if (numFields < 0) {
      throw new CorruptIndexException("invalid numFields: " + numFields, in);
    }

    for(int i=0;i<numFields;i++) {
      final int field = in.readVInt();
      final long indexStart = in.readVLong();
      final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
      FieldIndexData previous = fields.put(fieldInfo.name, new FieldIndexData(in, fieldInfo, indexStart));
      if (previous != null) {
        throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
      }
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
  }
}