Java Code Examples for org.apache.lucene.index.FieldInfo#getIndexOptions()

The following examples show how to use org.apache.lucene.index.FieldInfo#getIndexOptions() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException {
  final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
  if (fieldInfo == null) {
    // field does not exist or has no value
    return new Bits.MatchNoBits(reader.maxDoc());
  } 
  
  if (fieldInfo.getDocValuesType() != DocValuesType.NONE) {
    // doc values case
  } else if (parser instanceof PointParser) {
    // points case
  } else {
    // postings case
    if (fieldInfo.getIndexOptions() == IndexOptions.NONE) {
      return new Bits.MatchNoBits(reader.maxDoc());
    }
  }
  BitsEntry bitsEntry = (BitsEntry) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, parser));
  return bitsEntry.bits;
}
 
Example 2
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  SortedDocValues valuesIn = reader.getSortedDocValues(field);
  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  } else {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
      return DocValues.emptySorted();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
      // we don't try to build a sorted instance from numeric/binary doc
      // values because dedup can be very costly
      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
      return DocValues.emptySorted();
    }
    SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
  }
}
 
Example 3
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
  if (valuesIn == null) {
    valuesIn = reader.getSortedDocValues(field);
  }

  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  }

  final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
  if (info == null) {
    return DocValues.emptyBinary();
  } else if (info.getDocValuesType() != DocValuesType.NONE) {
    throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
  } else if (info.getIndexOptions() == IndexOptions.NONE) {
    return DocValues.emptyBinary();
  }

  BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
  return impl.iterator();
}
 
Example 4
Source File: DocumentField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static DocumentField of(FieldInfo finfo, IndexableField field, IndexReader reader, int docId)
    throws IOException {

  Objects.requireNonNull(finfo);
  Objects.requireNonNull(reader);

  DocumentField dfield = new DocumentField();

  dfield.name = finfo.name;
  dfield.idxOptions = finfo.getIndexOptions();
  dfield.hasTermVectors = finfo.hasVectors();
  dfield.hasPayloads = finfo.hasPayloads();
  dfield.hasNorms = finfo.hasNorms();

  if (finfo.hasNorms()) {
    NumericDocValues norms = MultiDocValues.getNormValues(reader, finfo.name);
    if (norms.advanceExact(docId)) {
      dfield.norm = norms.longValue();
    }
  }

  dfield.dvType = finfo.getDocValuesType();

  dfield.pointDimensionCount = finfo.getPointDimensionCount();
  dfield.pointNumBytes = finfo.getPointNumBytes();

  if (field != null) {
    dfield.isStored = field.fieldType().stored();
    dfield.stringValue = field.stringValue();
    if (field.binaryValue() != null) {
      dfield.binaryValue = BytesRef.deepCopyOf(field.binaryValue());
    }
    dfield.numericValue = field.numericValue();
  }

  return dfield;
}
 
Example 5
Source File: FSTTermsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
  final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

  this.postingsReader = postingsReader;
  final IndexInput in = state.directory.openInput(termsFileName, state.context);

  boolean success = false;
  try {
    CodecUtil.checkIndexHeader(in, FSTTermsWriter.TERMS_CODEC_NAME,
                                     FSTTermsWriter.TERMS_VERSION_START,
                                     FSTTermsWriter.TERMS_VERSION_CURRENT,
                                     state.segmentInfo.getId(), state.segmentSuffix);
    CodecUtil.checksumEntireFile(in);
    this.postingsReader.init(in, state);
    seekDir(in);

    final FieldInfos fieldInfos = state.fieldInfos;
    final int numFields = in.readVInt();
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = in.readVInt();
      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
      long numTerms = in.readVLong();
      long sumTotalTermFreq = in.readVLong();
      // if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
      long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
      int docCount = in.readVInt();
      TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
      TermsReader previous = fields.put(fieldInfo.name, current);
      checkFieldSummary(state.segmentInfo, in, current, previous);
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
  }
}
 
Example 6
Source File: IDVersionPostingsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void setField(FieldInfo fieldInfo) {
  super.setField(fieldInfo);
  if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
    throw new IllegalArgumentException("field must be index using IndexOptions.DOCS_AND_FREQS_AND_POSITIONS");
  }
  // LUCENE-5693: because CheckIndex cross-checks term vectors with postings even for deleted docs, and because our PF only indexes the
  // non-deleted documents on flush, CheckIndex will see this as corruption:
  if (fieldInfo.hasVectors()) {
    throw new IllegalArgumentException("field cannot index term vectors: CheckIndex will report this as index corruption");
  }
  lastState = emptyState;
}
 
Example 7
Source File: PushPostingsWriterBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** 
 * Sets the current field for writing, and returns the
 * fixed length of long[] metadata (which is fixed per
 * field), called when the writing switches to another field. */
@Override
public void setField(FieldInfo fieldInfo) {
  this.fieldInfo = fieldInfo;
  indexOptions = fieldInfo.getIndexOptions();

  writeFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
  writePositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
  writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;        
  writePayloads = fieldInfo.hasPayloads();

  if (writeFreqs == false) {
    enumFlags = 0;
  } else if (writePositions == false) {
    enumFlags = PostingsEnum.FREQS;
  } else if (writeOffsets == false) {
    if (writePayloads) {
      enumFlags = PostingsEnum.PAYLOADS;
    } else {
      enumFlags = PostingsEnum.POSITIONS;
    }
  } else {
    if (writePayloads) {
      enumFlags = PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS;
    } else {
      enumFlags = PostingsEnum.OFFSETS;
    }
  }
}
 
Example 8
Source File: PerFieldPostingsFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public FieldsReader(final SegmentReadState readState) throws IOException {

      // Read _X.per and init each format:
      boolean success = false;
      try {
        // Read field name -> format name
        for (FieldInfo fi : readState.fieldInfos) {
          if (fi.getIndexOptions() != IndexOptions.NONE) {
            final String fieldName = fi.name;
            final String formatName = fi.getAttribute(PER_FIELD_FORMAT_KEY);
            if (formatName != null) {
              // null formatName means the field is in fieldInfos, but has no postings!
              final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY);
              if (suffix == null) {
                throw new IllegalStateException("missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
              }
              PostingsFormat format = PostingsFormat.forName(formatName);
              String segmentSuffix = getSuffix(formatName, suffix);
              if (!formats.containsKey(segmentSuffix)) {
                formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix)));
              }
              fields.put(fieldName, formats.get(segmentSuffix));
            }
          }
        }
        success = true;
      } finally {
        if (!success) {
          IOUtils.closeWhileHandlingException(formats.values());
        }
      }

      this.segment = readState.segmentInfo.name;
    }
 
Example 9
Source File: PerFieldMergeState.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
FilterFieldInfos(FieldInfos src, Collection<String> filterFields) {
  // Copy all the input FieldInfo objects since the field numbering must be kept consistent
  super(toArray(src));

  boolean hasVectors = false;
  boolean hasProx = false;
  boolean hasPayloads = false;
  boolean hasOffsets = false;
  boolean hasFreq = false;
  boolean hasNorms = false;
  boolean hasDocValues = false;
  boolean hasPointValues = false;

  this.filteredNames = new HashSet<>(filterFields);
  this.filtered = new ArrayList<>(filterFields.size());
  for (FieldInfo fi : src) {
    if (this.filteredNames.contains(fi.name)) {
      this.filtered.add(fi);
      hasVectors |= fi.hasVectors();
      hasProx |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      hasFreq |= fi.getIndexOptions() != IndexOptions.DOCS;
      hasOffsets |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      hasNorms |= fi.hasNorms();
      hasDocValues |= fi.getDocValuesType() != DocValuesType.NONE;
      hasPayloads |= fi.hasPayloads();
      hasPointValues |= (fi.getPointDimensionCount() != 0);
    }
  }

  this.filteredHasVectors = hasVectors;
  this.filteredHasProx = hasProx;
  this.filteredHasPayloads = hasPayloads;
  this.filteredHasOffsets = hasOffsets;
  this.filteredHasFreq = hasFreq;
  this.filteredHasNorms = hasNorms;
  this.filteredHasDocValues = hasDocValues;
  this.filteredHasPointValues = hasPointValues;
}
 
Example 10
Source File: CollapsingQParserPlugin.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
ReaderWrapper(LeafReader leafReader, String field) {
  super(leafReader);

  // TODO can we just do "field" and not bother with the other fields?
  List<FieldInfo> newInfos = new ArrayList<>(in.getFieldInfos().size());
  for (FieldInfo fieldInfo : in.getFieldInfos()) {
    if (fieldInfo.name.equals(field)) {
      FieldInfo f = new FieldInfo(fieldInfo.name,
          fieldInfo.number,
          fieldInfo.hasVectors(),
          fieldInfo.hasNorms(),
          fieldInfo.hasPayloads(),
          fieldInfo.getIndexOptions(),
          DocValuesType.NONE,
          fieldInfo.getDocValuesGen(),
          fieldInfo.attributes(),
          fieldInfo.getPointDimensionCount(),
          fieldInfo.getPointIndexDimensionCount(),
          fieldInfo.getPointNumBytes(),
          fieldInfo.isSoftDeletesField());
      newInfos.add(f);
    } else {
      newInfos.add(fieldInfo);
    }
  }
  FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
  this.fieldInfos = infos;
}
 
Example 11
Source File: UninvertDocValuesMergePolicyFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private UninvertingReader.Type getUninversionType(FieldInfo fi) {
  SchemaField sf = schema.getFieldOrNull(fi.name);
  
  if (null != sf &&
      sf.hasDocValues() &&
      fi.getDocValuesType() == DocValuesType.NONE &&
      fi.getIndexOptions() != IndexOptions.NONE) {
    return sf.getType().getUninversionType(sf);
  } else {
    return null;
  }
}
 
Example 12
Source File: FSTTermOutputs.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
protected FSTTermOutputs(FieldInfo fieldInfo) {
  this.hasPos = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
}
 
Example 13
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser) throws IOException {
  if (parser == null) {
    throw new NullPointerException();
  }
  final NumericDocValues valuesIn = reader.getNumericDocValues(field);
  if (valuesIn != null) {
    return valuesIn;
  } else {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
      return DocValues.emptyNumeric();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    }
    
    if (parser instanceof PointParser) {
      // points case
      // no points in this segment
      if (info.getPointDimensionCount() == 0) {
        return DocValues.emptyNumeric();
      }
      if (info.getPointDimensionCount() != 1) {
        throw new IllegalStateException("Type mismatch: " + field + " was indexed with dimensions=" + info.getPointDimensionCount());
      }
      PointValues values = reader.getPointValues(field);
      // no actual points for this field (e.g. all points deleted)
      if (values == null || values.size() == 0) {
        return DocValues.emptyNumeric();
      }
      // not single-valued
      if (values.size() != values.getDocCount()) {
        throw new IllegalStateException("Type mismatch: " + field + " was indexed with multiple values, numValues=" + values.size() + ",numDocs=" + values.getDocCount());
      }
    } else {
      // postings case 
      // not indexed
      if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptyNumeric();
      }
    }

    return ((LongsFromArray) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser))).iterator();
  }
}
 
Example 14
Source File: UnifiedHighlighter.java    From lucene-solr with Apache License 2.0 3 votes vote down vote up
/**
 * Determine the offset source for the specified field.  The default algorithm is as follows:
 * <ol>
 * <li>This calls {@link #getFieldInfo(String)}. Note this returns null if there is no searcher or if the
 * field isn't found there.</li>
 * <li> If there's a field info it has
 * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} then {@link OffsetSource#POSTINGS} is
 * returned.</li>
 * <li>If there's a field info and {@link FieldInfo#hasVectors()} then {@link OffsetSource#TERM_VECTORS} is
 * returned (note we can't check here if the TV has offsets; if there isn't then an exception will get thrown
 * down the line).</li>
 * <li>Fall-back: {@link OffsetSource#ANALYSIS} is returned.</li>
 * </ol>
 * <p>
 * Note that the highlighter sometimes switches to something else based on the query, such as if you have
 * {@link OffsetSource#POSTINGS_WITH_TERM_VECTORS} but in fact don't need term vectors.
 */
protected OffsetSource getOffsetSource(String field) {
  FieldInfo fieldInfo = getFieldInfo(field);
  if (fieldInfo != null) {
    if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
      return fieldInfo.hasVectors() ? OffsetSource.POSTINGS_WITH_TERM_VECTORS : OffsetSource.POSTINGS;
    }
    if (fieldInfo.hasVectors()) { // unfortunately we can't also check if the TV has offsets
      return OffsetSource.TERM_VECTORS;
    }
  }
  return OffsetSource.ANALYSIS;
}