Java Code Examples for org.apache.lucene.codecs.CodecUtil

The following examples show how to use org.apache.lucene.codecs.CodecUtil. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Elasticsearch   Source File: ChecksumBlobStoreFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Reads blob with specified name without resolving the blobName using using {@link #blobName} method.
 *
 * @param blobContainer blob container
 * @param blobName blob name
 */
public T readBlob(BlobContainer blobContainer, String blobName) throws IOException {
    try (InputStream inputStream = blobContainer.readBlob(blobName)) {
        byte[] bytes = ByteStreams.toByteArray(inputStream);
        final String resourceDesc = "ChecksumBlobStoreFormat.readBlob(blob=\"" + blobName + "\")";
        try (ByteArrayIndexInput indexInput = new ByteArrayIndexInput(resourceDesc, bytes)) {
            CodecUtil.checksumEntireFile(indexInput);
            CodecUtil.checkHeader(indexInput, codec, VERSION, VERSION);
            long filePointer = indexInput.getFilePointer();
            long contentSize = indexInput.length() - CodecUtil.footerLength() - filePointer;
            BytesReference bytesReference = new BytesArray(bytes, (int) filePointer, (int) contentSize);
            return read(bytesReference);
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            // we trick this into a dedicated exception with the original stacktrace
            throw new CorruptStateException(ex);
        }
    }
}
 
Example 2
Source Project: Elasticsearch   Source File: ChecksumBlobStoreFormat.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Writes blob in atomic manner without resolving the blobName using using {@link #blobName} method.
 * <p>
 * The blob will be compressed and checksum will be written if required.
 *
 * @param obj           object to be serialized
 * @param blobContainer blob container
 * @param blobName          blob name
 */
protected void writeBlob(T obj, BlobContainer blobContainer, String blobName) throws IOException {
    BytesReference bytes = write(obj);
    try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
        final String resourceDesc = "ChecksumBlobStoreFormat.writeBlob(blob=\"" + blobName + "\")";
        try (OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput(resourceDesc, byteArrayOutputStream, BUFFER_SIZE)) {
            CodecUtil.writeHeader(indexOutput, codec, VERSION);
            try (OutputStream indexOutputOutputStream = new IndexOutputOutputStream(indexOutput) {
                @Override
                public void close() throws IOException {
                    // this is important since some of the XContentBuilders write bytes on close.
                    // in order to write the footer we need to prevent closing the actual index input.
                } }) {
                bytes.writeTo(indexOutputOutputStream);
            }
            CodecUtil.writeFooter(indexOutput);
        }
        blobContainer.writeBlob(blobName, new BytesArray(byteArrayOutputStream.toByteArray()));
    }
}
 
Example 3
Source Project: Elasticsearch   Source File: Completion090PostingsFormat.java    License: Apache License 2.0 6 votes vote down vote up
public CompletionFieldsConsumer(SegmentWriteState state) throws IOException {
    this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
    String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
    IndexOutput output = null;
    boolean success = false;
    try {
        output = state.directory.createOutput(suggestFSTFile, state.context);
        CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_VERSION_CURRENT);
        /*
         * we write the delegate postings format name so we can load it
         * without getting an instance in the ctor
         */
        output.writeString(delegatePostingsFormat.getName());
        output.writeString(writeProvider.getName());
        this.suggestFieldsConsumer = writeProvider.consumer(output);
        success = true;
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(output);
        }
    }
}
 
Example 4
Source Project: Elasticsearch   Source File: TranslogWriter.java    License: Apache License 2.0 6 votes vote down vote up
public static TranslogWriter create(Type type, ShardId shardId, String translogUUID, long fileGeneration, Path file, Callback<ChannelReference> onClose, int bufferSize, ChannelFactory channelFactory) throws IOException {
    final BytesRef ref = new BytesRef(translogUUID);
    final int headerLength = getHeaderLength(ref.length);
    final FileChannel channel = channelFactory.open(file);
    try {
        // This OutputStreamDataOutput is intentionally not closed because
        // closing it will close the FileChannel
        final OutputStreamDataOutput out = new OutputStreamDataOutput(java.nio.channels.Channels.newOutputStream(channel));
        CodecUtil.writeHeader(out, TRANSLOG_CODEC, VERSION);
        out.writeInt(ref.length);
        out.writeBytes(ref.bytes, ref.offset, ref.length);
        channel.force(true);
        writeCheckpoint(headerLength, 0, file.getParent(), fileGeneration, StandardOpenOption.WRITE);
        final TranslogWriter writer = type.create(shardId, fileGeneration, new ChannelReference(file, fileGeneration, channel, onClose), bufferSize);
        return writer;
    } catch (Throwable throwable){
        // if we fail to bake the file-generation into the checkpoint we stick with the file and once we recover and that
        // file exists we remove it. We only apply this logic to the checkpoint.generation+1 any other file with a higher generation is an error condition
        IOUtils.closeWhileHandlingException(channel);
        throw throwable;
    }
}
 
Example 5
Source Project: Elasticsearch   Source File: Store.java    License: Apache License 2.0 6 votes vote down vote up
private static void checksumFromLuceneFile(Directory directory, String file, ImmutableMap.Builder<String, StoreFileMetaData> builder, ESLogger logger, Version version, boolean readFileAsHash) throws IOException {
    final String checksum;
    final BytesRefBuilder fileHash = new BytesRefBuilder();
    try (final IndexInput in = directory.openInput(file, IOContext.READONCE)) {
        final long length;
        try {
            length = in.length();
            if (length < CodecUtil.footerLength()) {
                // truncated files trigger IAE if we seek negative... these files are really corrupted though
                throw new CorruptIndexException("Can't retrieve checksum from file: " + file + " file length must be >= " + CodecUtil.footerLength() + " but was: " + in.length(), in);
            }
            if (readFileAsHash) {
                final VerifyingIndexInput verifyingIndexInput = new VerifyingIndexInput(in); // additional safety we checksum the entire file we read the hash for...
                hashFile(fileHash, new InputStreamIndexInput(verifyingIndexInput, length), length);
                checksum = digestToString(verifyingIndexInput.verify());
            } else {
                checksum = digestToString(CodecUtil.retrieveChecksum(in));
            }

        } catch (Throwable ex) {
            logger.debug("Can retrieve checksum from file [{}]", ex, file);
            throw ex;
        }
        builder.put(file, new StoreFileMetaData(file, length, checksum, version, fileHash.get()));
    }
}
 
Example 6
Source Project: Elasticsearch   Source File: Lucene.java    License: Apache License 2.0 6 votes vote down vote up
public static void checkSegmentInfoIntegrity(final Directory directory) throws IOException {
    new SegmentInfos.FindSegmentsFile(directory) {

        @Override
        protected Object doBody(String segmentFileName) throws IOException {
            try (IndexInput input = directory.openInput(segmentFileName, IOContext.READ)) {
                final int format = input.readInt();
                final int actualFormat;
                if (format == CodecUtil.CODEC_MAGIC) {
                    // 4.0+
                    actualFormat = CodecUtil.checkHeaderNoMagic(input, "segments", SegmentInfos.VERSION_40, Integer.MAX_VALUE);
                    if (actualFormat >= SegmentInfos.VERSION_48) {
                        CodecUtil.checksumEntireFile(input);
                    }
                }
                // legacy....
            }
            return null;
        }
    }.run();
}
 
Example 7
Source Project: lucene-solr   Source File: Lucene84PostingsWriter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void close() throws IOException {
  // TODO: add a finish() at least to PushBase? DV too...?
  boolean success = false;
  try {
    if (docOut != null) {
      CodecUtil.writeFooter(docOut);
    }
    if (posOut != null) {
      CodecUtil.writeFooter(posOut);
    }
    if (payOut != null) {
      CodecUtil.writeFooter(payOut);
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(docOut, posOut, payOut);
    } else {
      IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
    }
    docOut = posOut = payOut = null;
  }
}
 
Example 8
Source Project: lucene-solr   Source File: BlockTermsWriter.java    License: Apache License 2.0 6 votes vote down vote up
public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
    SegmentWriteState state, PostingsWriterBase postingsWriter)
    throws IOException {
  final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
  this.termsIndexWriter = termsIndexWriter;
  maxDoc = state.segmentInfo.maxDoc();
  out = state.directory.createOutput(termsFileName, state.context);
  boolean success = false;
  try {
    fieldInfos = state.fieldInfos;
    CodecUtil.writeIndexHeader(out, CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    currentField = null;
    this.postingsWriter = postingsWriter;
    // segment = state.segmentName;
    
    //System.out.println("BTW.init seg=" + state.segmentName);
    
    postingsWriter.init(out, state); // have consumer write its format/header
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(out);
    }
  }
}
 
Example 9
Source Project: lucene-solr   Source File: TestOfflineSorter.java    License: Apache License 2.0 6 votes vote down vote up
public void testFixedLengthLiesLiesLies() throws Exception {
  // Make sure OfflineSorter catches me if I lie about the fixed value length:
  Directory dir = newDirectory();
  IndexOutput out = dir.createTempOutput("unsorted", "tmp", IOContext.DEFAULT);
  try (ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(out)) {
    byte[] bytes = new byte[Integer.BYTES];
    random().nextBytes(bytes);
    w.write(bytes);
    CodecUtil.writeFooter(out);
  }

  OfflineSorter sorter = new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(4), OfflineSorter.MAX_TEMPFILES, Long.BYTES, null, 0);
  IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
    sorter.sort(out.getName());
    });
  assertEquals("value length is 4 but is supposed to always be 8", e.getMessage());
  dir.close();
}
 
Example 10
Source Project: lucene-solr   Source File: FreeTextSuggester.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public boolean load(DataInput input) throws IOException {
  CodecUtil.checkHeader(input, CODEC_NAME, VERSION_START, VERSION_START);
  count = input.readVLong();
  byte separatorOrig = input.readByte();
  if (separatorOrig != separator) {
    throw new IllegalStateException("separator=" + separator + " is incorrect: original model was built with separator=" + separatorOrig);
  }
  int gramsOrig = input.readVInt();
  if (gramsOrig != grams) {
    throw new IllegalStateException("grams=" + grams + " is incorrect: original model was built with grams=" + gramsOrig);
  }
  totTokens = input.readVLong();

  fst = new FST<>(input, input, PositiveIntOutputs.getSingleton());

  return true;
}
 
Example 11
Source Project: lucene-solr   Source File: CompletionFieldsConsumer.java    License: Apache License 2.0 6 votes vote down vote up
CompletionFieldsConsumer(String codecName, PostingsFormat delegatePostingsFormat, SegmentWriteState state) throws IOException {
  this.codecName = codecName;
  this.delegatePostingsFormatName = delegatePostingsFormat.getName();
  this.state = state;
  String dictFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DICT_EXTENSION);
  boolean success = false;
  try {
    this.delegateFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
    dictOut = state.directory.createOutput(dictFile, state.context);
    CodecUtil.writeIndexHeader(dictOut, codecName, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    success = true;
  } finally {
    if (success == false) {
      IOUtils.closeWhileHandlingException(dictOut, delegateFieldsConsumer);
    }
  }
}
 
Example 12
Source Project: lucene-solr   Source File: BinaryDictionaryWriter.java    License: Apache License 2.0 6 votes vote down vote up
private void writePosDict(Path path) throws IOException {
  Files.createDirectories(path.getParent());
  try (OutputStream os = Files.newOutputStream(path);
       OutputStream bos = new BufferedOutputStream(os)) {
    final DataOutput out = new OutputStreamDataOutput(bos);
    CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
    out.writeVInt(posDict.size());
    for (String s : posDict) {
      if (s == null) {
        out.writeByte((byte)0);
        out.writeByte((byte)0);
        out.writeByte((byte)0);
      } else {
        String[] data = CSVUtil.parse(s);
        if (data.length != 3) {
          throw new IllegalArgumentException("Malformed pos/inflection: " + s + "; expected 3 characters");
        }
        out.writeString(data[0]);
        out.writeString(data[1]);
        out.writeString(data[2]);
      }
    }
  }
}
 
Example 13
Source Project: lucene-solr   Source File: CharacterDefinitionWriter.java    License: Apache License 2.0 6 votes vote down vote up
public void write(Path baseDir) throws IOException {
  Path path = baseDir.resolve(CharacterDefinition.class.getName().replace('.', '/') + CharacterDefinition.FILENAME_SUFFIX);
  Files.createDirectories(path.getParent());
  try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))){
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  }
}
 
Example 14
Source Project: lucene-solr   Source File: CharacterDefinitionWriter.java    License: Apache License 2.0 6 votes vote down vote up
public void write(Path baseDir) throws IOException {
  Path path = baseDir.resolve(CharacterDefinition.class.getName().replace('.', '/') + CharacterDefinition.FILENAME_SUFFIX);
  Files.createDirectories(path.getParent());
  try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))){
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  }
}
 
Example 15
Source Project: lucene-solr   Source File: ConnectionCostsWriter.java    License: Apache License 2.0 6 votes vote down vote up
public void write(Path baseDir) throws IOException {
  Files.createDirectories(baseDir);
  String fileName = ConnectionCosts.class.getName().replace('.', '/') + ConnectionCosts.FILENAME_SUFFIX;
  try (OutputStream os = Files.newOutputStream(baseDir.resolve(fileName));
       OutputStream bos = new BufferedOutputStream(os)) {
    final DataOutput out = new OutputStreamDataOutput(bos);
    CodecUtil.writeHeader(out, ConnectionCosts.HEADER, ConnectionCosts.VERSION);
    out.writeVInt(forwardSize);
    out.writeVInt(backwardSize);
    int last = 0;
    for (int i = 0; i < costs.limit() / 2; i++) {
      short cost = costs.getShort(i * 2);
      int delta = (int) cost - last;
      out.writeZInt(delta);
      last = cost;
    }
  }
}
 
Example 16
Source Project: lucene-solr   Source File: RAMOnlyPostingsFormat.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public FieldsProducer fieldsProducer(SegmentReadState readState)
  throws IOException {

  // Load our ID:
  final String idFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name, readState.segmentSuffix, ID_EXTENSION);
  IndexInput in = readState.directory.openInput(idFileName, readState.context);
  boolean success = false;
  final int id;
  try {
    CodecUtil.checkHeader(in, RAM_ONLY_NAME, VERSION_START, VERSION_LATEST);
    id = in.readVInt();
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(in);
    } else {
      IOUtils.close(in);
    }
  }
  
  synchronized(state) {
    return state.get(id);
  }
}
 
Example 17
Source Project: lucene-solr   Source File: BaseCompoundFormatTestCase.java    License: Apache License 2.0 6 votes vote down vote up
public void testDoubleClose() throws IOException {
  final String testfile = "_123.test";

  Directory dir = newDirectory();
  SegmentInfo si = newSegmentInfo(dir, "_123");
  try (IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT)) {
    CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
    out.writeInt(3);
    CodecUtil.writeFooter(out);
  }
  
  si.setFiles(Collections.singleton(testfile));
  si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
  Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
  assertEquals(1, cfs.listAll().length);
  cfs.close();
  cfs.close(); // second close should not throw exception
  dir.close();
}
 
Example 18
Source Project: lucene-solr   Source File: BaseCompoundFormatTestCase.java    License: Apache License 2.0 6 votes vote down vote up
public void testPassIOContext() throws IOException {
  final String testfile = "_123.test";
  final IOContext myContext = new IOContext();

  Directory dir = new FilterDirectory(newDirectory()) {
    @Override
    public IndexOutput createOutput(String name, IOContext context) throws IOException {
      assertSame(myContext, context);
      return super.createOutput(name, context);
    }
  };
  SegmentInfo si = newSegmentInfo(dir, "_123");
  try (IndexOutput out = dir.createOutput(testfile, myContext)) {
    CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
    out.writeInt(3);
    CodecUtil.writeFooter(out);
  }
  
  si.setFiles(Collections.singleton(testfile));
  si.getCodec().compoundFormat().write(dir, si, myContext);
  dir.close();
}
 
Example 19
Source Project: lucene-solr   Source File: Lucene50LiveDocsFormat.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void writeLiveDocs(Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException {
  long gen = info.getNextDelGen();
  String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
  int delCount = 0;
  try (IndexOutput output = dir.createOutput(name, context)) {
    CodecUtil.writeIndexHeader(output, CODEC_NAME, VERSION_CURRENT, info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
    final int longCount = FixedBitSet.bits2words(bits.length());
    for (int i = 0; i < longCount; ++i) {
      long currentBits = 0;
      for (int j = i << 6, end = Math.min(j + 63, bits.length() - 1); j <= end; ++j) {
        if (bits.get(j)) {
          currentBits |= 1L << j; // mod 64
        } else {
          delCount += 1;
        }
      }
      output.writeLong(currentBits);
    }
    CodecUtil.writeFooter(output);
  }
  if (delCount != info.getDelCount() + newDelCount) {
    throw new CorruptIndexException("bits.deleted=" + delCount + 
        " info.delcount=" + info.getDelCount() + " newdelcount=" + newDelCount, name);
  }
}
 
Example 20
Source Project: lucene-solr   Source File: TestOfflineSorter.java    License: Apache License 2.0 6 votes vote down vote up
@Nightly
public void testFixedLengthHeap() throws Exception {
  // Make sure the RAM accounting is correct, i.e. if we are sorting fixed width
  // ints (4 bytes) then the heap used is really only 4 bytes per value:
  Directory dir = newDirectory();
  IndexOutput out = dir.createTempOutput("unsorted", "tmp", IOContext.DEFAULT);
  try (ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(out)) {
    byte[] bytes = new byte[Integer.BYTES];
    for (int i=0;i<1024*1024;i++) {
      random().nextBytes(bytes);
      w.write(bytes);
    }
    CodecUtil.writeFooter(out);
  }

  ExecutorService exec = randomExecutorServiceOrNull();
  OfflineSorter sorter = new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(4), OfflineSorter.MAX_TEMPFILES, Integer.BYTES, exec, TestUtil.nextInt(random(), 1, 4));
  sorter.sort(out.getName());
  if (exec != null) {
    exec.shutdownNow();
  }
  // 1 MB of ints with 4 MH heap allowed should have been sorted in a single heap partition:
  assertEquals(0, sorter.sortInfo.mergeRounds);
  dir.close();
}
 
Example 21
Source Project: lucene-solr   Source File: Lucene80NormsConsumer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void close() throws IOException {
  boolean success = false;
  try {
    if (meta != null) {
      meta.writeInt(-1); // write EOF marker
      CodecUtil.writeFooter(meta); // write checksum
    }
    if (data != null) {
      CodecUtil.writeFooter(data); // write checksum
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(data, meta);
    } else {
      IOUtils.closeWhileHandlingException(data, meta);
    }
    meta = data = null;
  }
}
 
Example 22
Source Project: crate   Source File: Store.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Marks this store as corrupted. This method writes a {@code corrupted_${uuid}} file containing the given exception
 * message. If a store contains a {@code corrupted_${uuid}} file {@link #isMarkedCorrupted()} will return <code>true</code>.
 */
public void markStoreCorrupted(IOException exception) throws IOException {
    ensureOpen();
    if (!isMarkedCorrupted()) {
        String uuid = CORRUPTED + UUIDs.randomBase64UUID();
        try (IndexOutput output = this.directory().createOutput(uuid, IOContext.DEFAULT)) {
            CodecUtil.writeHeader(output, CODEC, VERSION);
            BytesStreamOutput out = new BytesStreamOutput();
            out.writeException(exception);
            BytesReference bytes = out.bytes();
            output.writeVInt(bytes.length());
            BytesRef ref = bytes.toBytesRef();
            output.writeBytes(ref.bytes, ref.offset, ref.length);
            CodecUtil.writeFooter(output);
        } catch (IOException ex) {
            logger.warn("Can't mark store as corrupted", ex);
        }
        directory().sync(Collections.singleton(uuid));
    }
}
 
Example 23
Source Project: lucene-solr   Source File: Lucene80DocValuesConsumer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void close() throws IOException {
  boolean success = false;
  try {
    if (meta != null) {
      meta.writeInt(-1); // write EOF marker
      CodecUtil.writeFooter(meta); // write checksum
    }
    if (data != null) {
      CodecUtil.writeFooter(data); // write checksum
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(data, meta);
    } else {
      IOUtils.closeWhileHandlingException(data, meta);
    }
    meta = data = null;
  }
}
 
Example 24
Source Project: Elasticsearch   Source File: Completion090PostingsFormat.java    License: Apache License 2.0 5 votes vote down vote up
public CompletionFieldsProducer(SegmentReadState state) throws IOException {
    String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
    IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
    version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
    FieldsProducer delegateProducer = null;
    boolean success = false;
    try {
        PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
        String providerName = input.readString();
        CompletionLookupProvider completionLookupProvider = providers.get(providerName);
        if (completionLookupProvider == null) {
            throw new IllegalStateException("no provider with name [" + providerName + "] registered");
        }
        // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
        delegateProducer = delegatePostingsFormat.fieldsProducer(state);
        /*
         * If we are merging we don't load the FSTs at all such that we
         * don't consume so much memory during merge
         */
        if (state.context.context != Context.MERGE) {
            // TODO: maybe we can do this in a fully lazy fashion based on some configuration
            // eventually we should have some kind of curciut breaker that prevents us from going OOM here
            // with some configuration
            this.lookupFactory = completionLookupProvider.load(input);
        } else {
            this.lookupFactory = null;
        }
        this.delegateProducer = delegateProducer;
        success = true;
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(delegateProducer, input);
        } else {
            IOUtils.close(input);
        }
    }
}
 
Example 25
Source Project: lucene-solr   Source File: Lucene84PostingsReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void checkIntegrity() throws IOException {
  if (docIn != null) {
    CodecUtil.checksumEntireFile(docIn);
  }
  if (posIn != null) {
    CodecUtil.checksumEntireFile(posIn);
  }
  if (payIn != null) {
    CodecUtil.checksumEntireFile(payIn);
  }
}
 
Example 26
Source Project: lucene-solr   Source File: Lucene80NormsProducer.java    License: Apache License 2.0 5 votes vote down vote up
Lucene80NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
  maxDoc = state.segmentInfo.maxDoc();
  String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
  int version = -1;

  // read in the entries from the metadata file.
  try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
    Throwable priorE = null;
    try {
      version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
      readFields(in, state.fieldInfos);
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(in, priorE);
    }
  }

  String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
  data = state.directory.openInput(dataName, state.context);
  boolean success = false;
  try {
    final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    if (version != version2) {
      throw new CorruptIndexException("Format versions mismatch: meta=" + version + ",data=" + version2, data);
    }

    // NOTE: data file is too costly to verify checksum against all the bytes on open,
    // but for now we at least verify proper structure of the checksum footer: which looks
    // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
    // such as file truncation.
    CodecUtil.retrieveChecksum(data);

    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(this.data);
    }
  }
}
 
Example 27
Source Project: crate   Source File: Store.java    License: Apache License 2.0 5 votes vote down vote up
private static void checksumFromLuceneFile(Directory directory, String file, Map<String, StoreFileMetaData> builder,
        Logger logger, Version version, boolean readFileAsHash) throws IOException {
    final String checksum;
    final BytesRefBuilder fileHash = new BytesRefBuilder();
    try (IndexInput in = directory.openInput(file, IOContext.READONCE)) {
        final long length;
        try {
            length = in.length();
            if (length < CodecUtil.footerLength()) {
                // truncated files trigger IAE if we seek negative... these files are really corrupted though
                throw new CorruptIndexException("Can't retrieve checksum from file: " + file + " file length must be >= " + CodecUtil.footerLength() + " but was: " + in.length(), in);
            }
            if (readFileAsHash) {
                final VerifyingIndexInput verifyingIndexInput = new VerifyingIndexInput(in); // additional safety we checksum the entire file we read the hash for...
                hashFile(fileHash, new InputStreamIndexInput(verifyingIndexInput, length), length);
                checksum = digestToString(verifyingIndexInput.verify());
            } else {
                checksum = digestToString(CodecUtil.retrieveChecksum(in));
            }

        } catch (Exception ex) {
            logger.debug(() -> new ParameterizedMessage("Can retrieve checksum from file [{}]", file), ex);
            throw ex;
        }
        builder.put(file, new StoreFileMetaData(file, length, checksum, version, fileHash.get()));
    }
}
 
Example 28
Source Project: lucene-solr   Source File: FSTTermsWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void close() throws IOException {
  if (out != null) {
    boolean success = false;
    try {
      // write field summary
      final long dirStart = out.getFilePointer();
      
      out.writeVInt(fields.size());
      for (FieldMetaData field : fields) {
        out.writeVInt(field.fieldInfo.number);
        out.writeVLong(field.numTerms);
        if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
          out.writeVLong(field.sumTotalTermFreq);
        }
        out.writeVLong(field.sumDocFreq);
        out.writeVInt(field.docCount);
        field.dict.save(out, out);
      }
      writeTrailer(out, dirStart);
      CodecUtil.writeFooter(out);
      success = true;
    } finally {
      if (success) {
        IOUtils.close(out, postingsWriter);
      } else {
        IOUtils.closeWhileHandlingException(out, postingsWriter);
      }
      out = null;
    }
  }
}
 
Example 29
Source Project: lucene-solr   Source File: FSTTermsReader.java    License: Apache License 2.0 5 votes vote down vote up
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
  final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

  this.postingsReader = postingsReader;
  final IndexInput in = state.directory.openInput(termsFileName, state.context);

  boolean success = false;
  try {
    CodecUtil.checkIndexHeader(in, FSTTermsWriter.TERMS_CODEC_NAME,
                                     FSTTermsWriter.TERMS_VERSION_START,
                                     FSTTermsWriter.TERMS_VERSION_CURRENT,
                                     state.segmentInfo.getId(), state.segmentSuffix);
    CodecUtil.checksumEntireFile(in);
    this.postingsReader.init(in, state);
    seekDir(in);

    final FieldInfos fieldInfos = state.fieldInfos;
    final int numFields = in.readVInt();
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = in.readVInt();
      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
      long numTerms = in.readVLong();
      long sumTotalTermFreq = in.readVLong();
      // if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
      long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
      int docCount = in.readVInt();
      TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
      TermsReader previous = fields.put(fieldInfo.name, current);
      checkFieldSummary(state.segmentInfo, in, current, previous);
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
  }
}
 
Example 30
Source Project: lucene-solr   Source File: OrdsBlockTreeTermsWriter.java    License: Apache License 2.0 5 votes vote down vote up
/** Create a new writer.  The number of items (terms or
 *  sub-blocks) per block will aim to be between
 *  minItemsPerBlock and maxItemsPerBlock, though in some
 *  cases the blocks may be smaller than the min. */
public OrdsBlockTreeTermsWriter(
                                SegmentWriteState state,
                                PostingsWriterBase postingsWriter,
                                int minItemsInBlock,
                                int maxItemsInBlock)
  throws IOException
{
  BlockTreeTermsWriter.validateSettings(minItemsInBlock, maxItemsInBlock);

  maxDoc = state.segmentInfo.maxDoc();

  final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
  out = state.directory.createOutput(termsFileName, state.context);
  boolean success = false;
  IndexOutput indexOut = null;
  try {
    fieldInfos = state.fieldInfos;
    this.minItemsInBlock = minItemsInBlock;
    this.maxItemsInBlock = maxItemsInBlock;
    CodecUtil.writeIndexHeader(out, TERMS_CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);

    final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
    indexOut = state.directory.createOutput(termsIndexFileName, state.context);
    CodecUtil.writeIndexHeader(indexOut, TERMS_INDEX_CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);

    this.postingsWriter = postingsWriter;
    // segment = state.segmentInfo.name;

    // System.out.println("BTW.init seg=" + state.segmentName);

    postingsWriter.init(out, state);                          // have consumer write its format/header
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(out, indexOut);
    }
  }
  this.indexOut = indexOut;
}