Java Code Examples for org.apache.lucene.store.IndexInput#readString()

The following examples show how to use org.apache.lucene.store.IndexInput#readString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Retrieves the full index header from the provided {@link IndexInput}.
 *  This throws {@link CorruptIndexException} if this file does
 * not appear to be an index file. */
public static byte[] readIndexHeader(IndexInput in) throws IOException {
  in.seek(0);
  final int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in);
  }
  String codec = in.readString();
  in.readInt();
  in.seek(in.getFilePointer() + StringHelper.ID_LENGTH);
  int suffixLength = in.readByte() & 0xFF;
  byte[] bytes = new byte[headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffixLength];
  in.seek(0);
  in.readBytes(bytes, 0, bytes.length);
  return bytes;
}
 
Example 2
Source File: CodecInfo.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Gets the terms.
 *
 * @param refs
 *          the refs
 * @return the terms
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
public ArrayList<MtasTreeHit<String>> getTerms(ArrayList<MtasTreeHit<?>> refs)
    throws IOException {
  try {
    ArrayList<MtasTreeHit<String>> terms = new ArrayList<MtasTreeHit<String>>();
    IndexInput inTerm = indexInputList.get("term");
    for (MtasTreeHit<?> hit : refs) {
      inTerm.seek(hit.ref);
      String term = inTerm.readString();
      MtasTreeHit<String> newHit = new MtasTreeHit<String>(hit.startPosition,
          hit.endPosition, hit.ref, hit.additionalId, hit.additionalRef,
          term);
      terms.add(newHit);
    }
    return terms;
  } catch (Exception e) {
    throw new IOException(e);
  }
}
 
Example 3
Source File: MtasFieldsProducer.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Adds the index input to list.
 *
 * @param name the name
 * @param in the in
 * @param postingsFormatName the postings format name
 * @return the string
 * @throws IOException Signals that an I/O exception has occurred.
 */
private String addIndexInputToList(String name, IndexInput in,
    String postingsFormatName) throws IOException {
  if (indexInputList.get(name) != null) {
    indexInputList.get(name).close();
  }
  if (in != null) {
    String localPostingsFormatName = postingsFormatName;
    if (localPostingsFormatName == null) {
      localPostingsFormatName = in.readString();
    } else if (!in.readString().equals(localPostingsFormatName)) {
      throw new IOException("delegate codec " + name + " doesn't equal "
          + localPostingsFormatName);
    }
    indexInputList.put(name, in);
    indexInputOffsetList.put(name, in.getFilePointer());
    return localPostingsFormatName;
  } else {
    log.debug("no " + name + " registered");
    return null;
  }
}
 
Example 4
Source File: Completion090PostingsFormat.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public CompletionFieldsProducer(SegmentReadState state) throws IOException {
    String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
    IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
    version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
    FieldsProducer delegateProducer = null;
    boolean success = false;
    try {
        PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
        String providerName = input.readString();
        CompletionLookupProvider completionLookupProvider = providers.get(providerName);
        if (completionLookupProvider == null) {
            throw new IllegalStateException("no provider with name [" + providerName + "] registered");
        }
        // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
        delegateProducer = delegatePostingsFormat.fieldsProducer(state);
        /*
         * If we are merging we don't load the FSTs at all such that we
         * don't consume so much memory during merge
         */
        if (state.context.context != Context.MERGE) {
            // TODO: maybe we can do this in a fully lazy fashion based on some configuration
            // eventually we should have some kind of curciut breaker that prevents us from going OOM here
            // with some configuration
            this.lookupFactory = completionLookupProvider.load(input);
        } else {
            this.lookupFactory = null;
        }
        this.delegateProducer = delegateProducer;
        success = true;
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(delegateProducer, input);
        } else {
            IOUtils.close(input);
        }
    }
}
 
Example 5
Source File: CodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Expert: verifies the incoming {@link IndexInput} has an index header
 * and that its segment ID matches the expected one, and then copies
 * that index header into the provided {@link DataOutput}.  This is
 * useful when building compound files.
 *
 * @param in Input stream, positioned at the point where the
 *        index header was previously written. Typically this is located
 *        at the beginning of the file.
 * @param out Output stream, where the header will be copied to.
 * @param expectedID Expected segment ID
 * @throws CorruptIndexException If the first four bytes are not
 *         {@link #CODEC_MAGIC}, or if the <code>expectedID</code>
 *         does not match.
 * @throws IOException If there is an I/O error reading from the underlying medium.
 *
 * @lucene.internal 
 */
public static void verifyAndCopyIndexHeader(IndexInput in, DataOutput out, byte[] expectedID) throws IOException {
  // make sure it's large enough to have a header and footer
  if (in.length() < footerLength() + headerLength("")) {
    throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: file is too small (" + in.length() + " bytes)", in);
  }

  int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CodecUtil.CODEC_MAGIC, in);
  }

  // we can't verify these, so we pass-through:
  String codec = in.readString();
  int version = in.readInt();

  // verify id:
  checkIndexHeaderID(in, expectedID);

  // we can't verify extension either, so we pass-through:
  int suffixLength = in.readByte() & 0xFF;
  byte[] suffixBytes = new byte[suffixLength];
  in.readBytes(suffixBytes, 0, suffixLength);

  // now write the header we just verified
  out.writeInt(CodecUtil.CODEC_MAGIC);
  out.writeString(codec);
  out.writeInt(version);
  out.writeBytes(expectedID, 0, expectedID.length);
  out.writeByte((byte) suffixLength);
  out.writeBytes(suffixBytes, 0, suffixLength);
}
 
Example 6
Source File: CodecInfo.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Inits the.
 *
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
private void init() throws IOException {
  // move to begin
  IndexInput inField = indexInputList.get("field");
  inField.seek(indexInputOffsetList.get("field"));
  // store field references in memory
  fieldReferences = new HashMap<String, FieldReferences>();
  boolean doInit = true;
  while (doInit) {
    try {
      String field = inField.readString();
      long refIndexDoc = inField.readVLong();
      long refIndexDocId = inField.readVLong();
      int numberOfDocs = inField.readVInt();
      inField.readVLong(); // refTerm
      inField.readVInt(); // numberOfTerms
      long refPrefix = inField.readVLong();
      int numberOfPrefixes = inField.readVInt();
      fieldReferences.put(field, new FieldReferences(refIndexDoc,
          refIndexDocId, numberOfDocs, refPrefix, numberOfPrefixes));
    } catch (IOException e) {
      log.debug(e);
      doInit = false;
    }
  }
  // prefixReferences
  prefixReferences = new HashMap<String, LinkedHashMap<String, Long>>();
}
 
Example 7
Source File: CodecInfo.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Gets the prefixes.
 *
 * @param field
 *          the field
 * @return the prefixes
 */
private LinkedHashMap<String, Long> getPrefixRefs(String field) {
  if (fieldReferences.containsKey(field)) {
    FieldReferences fr = fieldReferences.get(field);
    if (!prefixReferences.containsKey(field)) {
      LinkedHashMap<String, Long> refs = new LinkedHashMap<String, Long>();
      try {
        IndexInput inPrefix = indexInputList.get("prefix");
        inPrefix.seek(fr.refPrefix);
        for (int i = 0; i < fr.numberOfPrefixes; i++) {
          Long ref = inPrefix.getFilePointer();
          String prefix = inPrefix.readString();
          refs.put(prefix, ref);
        }
      } catch (Exception e) {
        log.error(e);
        refs.clear();
      }
      prefixReferences.put(field, refs);
      return refs;
    } else {
      return prefixReferences.get(field);
    }
  } else {
    return null;
  }
}
 
Example 8
Source File: MtasCodecPostingsFormat.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Gets the term.
 *
 * @param inTerm the in term
 * @param ref the ref
 * @return the term
 * @throws IOException Signals that an I/O exception has occurred.
 */
public static String getTerm(IndexInput inTerm, Long ref) throws IOException {
  try {
    inTerm.seek(ref);
    return inTerm.readString();
  } catch (Exception e) {
    throw new IOException(e);
  }
}
 
Example 9
Source File: Blur022SegmentInfoReader.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
  final String fileName = IndexFileNames.segmentFileName(segment, "", Blur022SegmentInfoFormat.SI_EXTENSION);
  final IndexInput input = dir.openInput(fileName, context);
  boolean success = false;
  try {
    CodecUtil.checkHeader(input, Blur022SegmentInfoFormat.CODEC_NAME, Blur022SegmentInfoFormat.VERSION_START,
        Blur022SegmentInfoFormat.VERSION_CURRENT);
    final String version = input.readString();
    final int docCount = input.readInt();
    if (docCount < 0) {
      throw new CorruptIndexException("invalid docCount: " + docCount + " (resource=" + input + ")");
    }
    final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
    final Map<String, String> diagnostics = input.readStringStringMap();
    final Map<String, String> attributes = input.readStringStringMap();
    final Set<String> files = input.readStringSet();

    if (input.getFilePointer() != input.length()) {
      throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read "
          + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
    }

    final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics,
        Collections.unmodifiableMap(attributes));
    si.setFiles(files);

    success = true;

    return si;

  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(input);
    } else {
      input.close();
    }
  }
}