org.apache.lucene.index.FieldInfo Java Examples

The following examples show how to use org.apache.lucene.index.FieldInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PointsWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Default merge implementation to merge incoming points readers by visiting all their points and
 *  adding to this writer */
public void merge(MergeState mergeState) throws IOException {
  // check each incoming reader
  for (PointsReader reader : mergeState.pointsReaders) {
    if (reader != null) {
      reader.checkIntegrity();
    }
  }
  // merge field at a time
  for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
    if (fieldInfo.getPointDimensionCount() != 0) {
      mergeOneField(mergeState, fieldInfo);
    }
  }
  finish();
}
 
Example #2
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
  if (valuesIn == null) {
    valuesIn = reader.getSortedDocValues(field);
  }

  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  }

  final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
  if (info == null) {
    return DocValues.emptyBinary();
  } else if (info.getDocValuesType() != DocValuesType.NONE) {
    throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
  } else if (info.getIndexOptions() == IndexOptions.NONE) {
    return DocValues.emptyBinary();
  }

  BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
  return impl.iterator();
}
 
Example #3
Source File: DiskDocValuesProducer.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
  SortedSetDocValues sortedSetDocValues = _sortedSetDocValuesCache.get(field.number);
  if (sortedSetDocValues != null) {
    return sortedSetDocValues;
  }
  synchronized (_sortedSetDocValuesCache) {
    sortedSetDocValues = _sortedSetDocValuesCache.get(field.number);
    if (sortedSetDocValues != null) {
      return sortedSetDocValues;
    }
    sortedSetDocValues = newSortedSetDocValues(field);
    if (_cache && sortedSetDocValues != null) {
      _sortedSetDocValuesCache.put(field.number, sortedSetDocValues);
    }
    return sortedSetDocValues;
  }
}
 
Example #4
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  SortedDocValues valuesIn = reader.getSortedDocValues(field);
  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  } else {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
      return DocValues.emptySorted();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
      // we don't try to build a sorted instance from numeric/binary doc
      // values because dedup can be very costly
      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
      return DocValues.emptySorted();
    }
    SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
  }
}
 
Example #5
Source File: RecoverySourcePruneMergePolicy.java    From crate with Apache License 2.0 6 votes vote down vote up
@Override
public StoredFieldsReader getFieldsReader() {
    StoredFieldsReader fieldsReader = super.getFieldsReader();
    return new FilterStoredFieldsReader(fieldsReader) {
        @Override
        public void visitDocument(int docID, StoredFieldVisitor visitor) throws IOException {
            if (recoverySourceToKeep != null && recoverySourceToKeep.get(docID)) {
                super.visitDocument(docID, visitor);
            } else {
                super.visitDocument(docID, new FilterStoredFieldVisitor(visitor) {
                    @Override
                    public Status needsField(FieldInfo fieldInfo) throws IOException {
                        if (recoverySourceField.equals(fieldInfo.name)) {
                            return Status.NO;
                        }
                        return super.needsField(fieldInfo);
                    }
                });
            }
        }
    };
}
 
Example #6
Source File: Lucene84PostingsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException {
  if (state.docFreq <= BLOCK_SIZE) {
    // no skip data
    return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
  }

  final boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
  final boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
  final boolean indexHasPayloads = fieldInfo.hasPayloads();

  if (indexHasPositions == false || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
    return new BlockImpactsDocsEnum(fieldInfo, (IntBlockTermState) state);
  }

  if (indexHasPositions &&
      PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) &&
      (indexHasOffsets == false || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false) &&
      (indexHasPayloads == false || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
    return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
  }

  return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags);
}
 
Example #7
Source File: DiskDocValuesConsumer.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.SORTED_SET);
  // write the ord -> byte[] as a binary field
  addBinaryField(field, values);
  // write the stream of ords as a numeric field
  // NOTE: we could return an iterator that delta-encodes these within a doc
  addNumericField(field, ords);
  
  // write the doc -> ord count as a absolute index to the stream
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.NUMERIC);
  meta.writeVInt(PackedInts.VERSION_CURRENT);
  meta.writeLong(data.getFilePointer());
  meta.writeVLong(maxDoc);
  meta.writeVInt(BLOCK_SIZE);

  final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
  long addr = 0;
  for (Number v : docToOrdCount) {
    addr += v.longValue();
    writer.add(addr);
  }
  writer.finish();
}
 
Example #8
Source File: FacetFieldProcessorByHashDV.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
FacetFieldProcessorByHashDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
  super(fcontext, freq, sf);
  if (freq.mincount == 0) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
        getClass()+" doesn't support mincount=0");
  }
  if (freq.prefix != null) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
        getClass()+" doesn't support prefix"); // yet, but it could
  }
  FieldInfo fieldInfo = fcontext.searcher.getFieldInfos().fieldInfo(sf.getName());
  if (fieldInfo != null &&
      fieldInfo.getDocValuesType() != DocValuesType.NUMERIC &&
      fieldInfo.getDocValuesType() != DocValuesType.SORTED &&
      fieldInfo.getDocValuesType() != DocValuesType.SORTED_NUMERIC) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
        getClass()+" only support single valued number/string with docValues");
  }
}
 
Example #9
Source File: OrdsBlockTreeTermsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public FieldMetaData(FieldInfo fieldInfo, Output rootCode, long numTerms, long indexStartFP,
                     long sumTotalTermFreq, long sumDocFreq, int docCount,
                     BytesRef minTerm, BytesRef maxTerm) {
  assert numTerms > 0;
  this.fieldInfo = fieldInfo;
  assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms;
  this.rootCode = rootCode;
  this.indexStartFP = indexStartFP;
  this.numTerms = numTerms;
  this.sumTotalTermFreq = sumTotalTermFreq;
  this.sumDocFreq = sumDocFreq;
  this.docCount = docCount;
  this.minTerm = minTerm;
  this.maxTerm = maxTerm;
}
 
Example #10
Source File: FSTTermsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
  for(String field : fields) {
    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }
    FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
    boolean hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
    TermsEnum termsEnum = terms.iterator();
    TermsWriter termsWriter = new TermsWriter(fieldInfo);

    long sumTotalTermFreq = 0;
    long sumDocFreq = 0;
    FixedBitSet docsSeen = new FixedBitSet(maxDoc);

    while (true) {
      BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
          
      BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms);
      if (termState != null) {
        termsWriter.finishTerm(term, termState);
        sumTotalTermFreq += termState.totalTermFreq;
        sumDocFreq += termState.docFreq;
      }
    }

    termsWriter.finish(hasFreq ? sumTotalTermFreq : -1, sumDocFreq, docsSeen.cardinality());
  }
}
 
Example #11
Source File: SolrDocumentFetcher.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Executes a stored field visitor against a hit from the document cache */
private void visitFromCached(Document document, StoredFieldVisitor visitor) throws IOException {
  for (IndexableField f : document) {
    final FieldInfo info = searcher.getFieldInfos().fieldInfo(f.name());
    final StoredFieldVisitor.Status needsField = visitor.needsField(info);
    if (needsField == StoredFieldVisitor.Status.STOP) return;
    if (needsField == StoredFieldVisitor.Status.NO) continue;
    BytesRef binaryValue = f.binaryValue();
    if (binaryValue != null) {
      visitor.binaryField(info, toByteArrayUnwrapIfPossible(binaryValue));
      continue;
    }
    Number numericValue = f.numericValue();
    if (numericValue != null) {
      if (numericValue instanceof Double) {
        visitor.doubleField(info, numericValue.doubleValue());
      } else if (numericValue instanceof Integer) {
        visitor.intField(info, numericValue.intValue());
      } else if (numericValue instanceof Float) {
        visitor.floatField(info, numericValue.floatValue());
      } else if (numericValue instanceof Long) {
        visitor.longField(info, numericValue.longValue());
      } else {
        throw new AssertionError();
      }
      continue;
    }
    // must be String
    if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
      visitor.stringField(info, toStringUnwrapIfPossible(((LargeLazyField) f).readBytes()));
    } else {
      visitor.stringField(info, f.stringValue());
    }
  }
}
 
Example #12
Source File: SecureAtomicReader.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private boolean checkReadMask(FieldInfo fieldInfo) throws IOException {
  final String message = _readMaskFieldsAndMessages.get(fieldInfo.name);
  if (message != null) {
    if (message.isEmpty()) {
      return true;
    }
    _visitor.stringField(fieldInfo, message);
    return true;
  }
  return false;
}
 
Example #13
Source File: BlockTreeTermsReader.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, IndexInput indexIn) throws IOException {
  assert numTerms > 0;
  this.fieldInfo = fieldInfo;
  //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
  this.numTerms = numTerms;
  this.sumTotalTermFreq = sumTotalTermFreq; 
  this.sumDocFreq = sumDocFreq; 
  this.docCount = docCount;
  this.indexStartFP = indexStartFP;
  this.rootCode = rootCode;
  // if (DEBUG) {
  //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
  // }

  rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;

  if (indexIn != null) {
    final IndexInput clone = indexIn.clone();
    //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
    clone.seek(indexStartFP);
    index = new FST<BytesRef>(clone, ByteSequenceOutputs.getSingleton());
    
    /*
    if (false) {
      final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
      Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
      Util.toDot(index, w, false, false);
      System.out.println("FST INDEX: SAVED to " + dotFileName);
      w.close();
    }
    */
  } else {
    index = null;
  }
}
 
Example #14
Source File: STUniformSplitTermsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void merge(MergeState mergeState, NormsProducer normsProducer) throws IOException {
  if (mergeState.needsIndexSort) {
    // This custom merging does not support sorted index.
    // Fall back to the default merge, which is inefficient for this postings format.
    super.merge(mergeState, normsProducer);
    return;
  }
  FieldsProducer[] fieldsProducers = mergeState.fieldsProducers;
  List<TermIterator<SegmentTerms>> segmentTermsList = new ArrayList<>(fieldsProducers.length);
  for (int segmentIndex = 0; segmentIndex < fieldsProducers.length; segmentIndex++) {
    FieldsProducer fieldsProducer = fieldsProducers[segmentIndex];
    // Iterate the FieldInfo provided by mergeState.fieldInfos because they may be
    // filtered by PerFieldMergeState.
    for (FieldInfo fieldInfo : mergeState.fieldInfos[segmentIndex]) {
      // Iterate all fields only the get the *first* Terms instanceof STUniformSplitTerms.
      // See the break below.
      Terms terms = fieldsProducer.terms(fieldInfo.name);
      if (terms != null) {
        if (!(terms instanceof STUniformSplitTerms)) {
          // Terms is not directly an instance of STUniformSplitTerms, it is wrapped/filtered.
          // Fall back to the default merge, which is inefficient for this postings format.
          super.merge(mergeState, normsProducer);
          return;
        }
        STUniformSplitTerms sharedTerms = (STUniformSplitTerms) terms;
        segmentTermsList.add(new SegmentTerms(
            segmentIndex, sharedTerms.createMergingBlockReader(), mergeState.docMaps[segmentIndex]));
        // We have the STUniformSplitTerms for the segment. Break the field
        // loop to iterate the next segment.
        break;
      }
    }
  }
  writeSegment((blockWriter, dictionaryBuilder) -> mergeSegments(mergeState, normsProducer, segmentTermsList, blockWriter, dictionaryBuilder));
}
 
Example #15
Source File: Lucene60PointsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void finish() throws IOException {
  if (finished) {
    throw new IllegalStateException("already finished");
  }
  finished = true;
  CodecUtil.writeFooter(dataOut);

  String indexFileName = IndexFileNames.segmentFileName(writeState.segmentInfo.name,
                                                        writeState.segmentSuffix,
                                                        Lucene60PointsFormat.INDEX_EXTENSION);
  // Write index file
  try (IndexOutput indexOut = writeState.directory.createOutput(indexFileName, writeState.context)) {
    CodecUtil.writeIndexHeader(indexOut,
                               Lucene60PointsFormat.META_CODEC_NAME,
                               Lucene60PointsFormat.INDEX_VERSION_CURRENT,
                               writeState.segmentInfo.getId(),
                               writeState.segmentSuffix);
    int count = indexFPs.size();
    indexOut.writeVInt(count);
    for(Map.Entry<String,Long> ent : indexFPs.entrySet()) {
      FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(ent.getKey());
      if (fieldInfo == null) {
        throw new IllegalStateException("wrote field=\"" + ent.getKey() + "\" but that field doesn't exist in FieldInfos");
      }
      indexOut.writeVInt(fieldInfo.number);
      indexOut.writeVLong(ent.getValue());
    }
    CodecUtil.writeFooter(indexOut);
  }
}
 
Example #16
Source File: Lucene80DocValuesConsumer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  meta.writeInt(field.number);
  meta.writeByte(Lucene80DocValuesFormat.SORTED_NUMERIC);

  long[] stats = writeValues(field, valuesProducer);
  int numDocsWithField = Math.toIntExact(stats[0]);
  long numValues = stats[1];
  assert numValues >= numDocsWithField;

  meta.writeInt(numDocsWithField);
  if (numValues > numDocsWithField) {
    long start = data.getFilePointer();
    meta.writeLong(start);
    meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);

    final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1L, DIRECT_MONOTONIC_BLOCK_SHIFT);
    long addr = 0;
    addressesWriter.add(addr);
    SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
      addr += values.docValueCount();
      addressesWriter.add(addr);
    }
    addressesWriter.finish();
    meta.writeLong(data.getFilePointer() - start);
  }
}
 
Example #17
Source File: DiskDocValuesProducer.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
  final IndexInput data = this.data.clone();

  return new LongBinaryDocValues() {

    private final ThreadValue<IndexInput> in = new ThreadValue<IndexInput>() {
      @Override
      protected IndexInput initialValue() {
        return data.clone();
      }
    };

    @Override
    public void get(long id, BytesRef result) {
      long address = bytes.offset + id * bytes.maxLength;
      try {
        IndexInput indexInput = in.get();
        indexInput.seek(address);
        // NOTE: we could have one buffer, but various consumers (e.g.
        // FieldComparatorSource)
        // assume "they" own the bytes after calling this!
        final byte[] buffer = new byte[bytes.maxLength];
        indexInput.readBytes(buffer, 0, buffer.length);
        result.bytes = buffer;
        result.offset = 0;
        result.length = buffer.length;
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  };
}
 
Example #18
Source File: AssertingNormsFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void addNormsField(FieldInfo field, NormsProducer valuesProducer) throws IOException {
  NumericDocValues values = valuesProducer.getNorms(field);

  int docID;
  int lastDocID = -1;
  while ((docID = values.nextDoc()) != NO_MORE_DOCS) {
    assert docID >= 0 && docID < maxDoc;
    assert docID > lastDocID;
    lastDocID = docID;
    long value = values.longValue();
  }

  in.addNormsField(field, valuesProducer);
}
 
Example #19
Source File: PerFieldDocValuesFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void merge(MergeState mergeState) throws IOException {
  Map<DocValuesConsumer, Collection<String>> consumersToField = new IdentityHashMap<>();

  // Group each consumer by the fields it handles
  for (FieldInfo fi : mergeState.mergeFieldInfos) {
    if (fi.getDocValuesType() == DocValuesType.NONE) {
      continue;
    }
    // merge should ignore current format for the fields being merged
    DocValuesConsumer consumer = getInstance(fi, true);
    Collection<String> fieldsForConsumer = consumersToField.get(consumer);
    if (fieldsForConsumer == null) {
      fieldsForConsumer = new ArrayList<>();
      consumersToField.put(consumer, fieldsForConsumer);
    }
    fieldsForConsumer.add(fi.name);
  }

  // Delegate the merge to the appropriate consumer
  PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
  try {
    for (Map.Entry<DocValuesConsumer, Collection<String>> e : consumersToField.entrySet()) {
      e.getKey().merge(pfMergeState.apply(e.getValue()));
    }
  } finally {
    pfMergeState.reset();
  }
}
 
Example #20
Source File: FieldReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
FieldReader(BlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
            long indexStartFP, IndexInput metaIn, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
  assert numTerms > 0;
  this.fieldInfo = fieldInfo;
  //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
  this.parent = parent;
  this.numTerms = numTerms;
  this.sumTotalTermFreq = sumTotalTermFreq;
  this.sumDocFreq = sumDocFreq;
  this.docCount = docCount;
  this.rootCode = rootCode;
  this.minTerm = minTerm;
  this.maxTerm = maxTerm;
  // if (DEBUG) {
  //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
  // }
  rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
  // Initialize FST always off-heap.
  final IndexInput clone = indexIn.clone();
  clone.seek(indexStartFP);
  if (metaIn == indexIn) { // Only true before Lucene 8.6
    index = new FST<>(clone, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
  } else {
    index = new FST<>(metaIn, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
  }
  /*
    if (false) {
    final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
    Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
    Util.toDot(index, w, false, false);
    System.out.println("FST INDEX: SAVED to " + dotFileName);
    w.close();
    }
   */
}
 
Example #21
Source File: IDVersionPostingsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void setField(FieldInfo fieldInfo) {
  super.setField(fieldInfo);
  if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
    throw new IllegalArgumentException("field must be index using IndexOptions.DOCS_AND_FREQS_AND_POSITIONS");
  }
  // LUCENE-5693: because CheckIndex cross-checks term vectors with postings even for deleted docs, and because our PF only indexes the
  // non-deleted documents on flush, CheckIndex will see this as corruption:
  if (fieldInfo.hasVectors()) {
    throw new IllegalArgumentException("field cannot index term vectors: CheckIndex will report this as index corruption");
  }
  lastState = emptyState;
}
 
Example #22
Source File: FieldsVisitor.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
    final String value = new String(bytes, StandardCharsets.UTF_8);
    if (UidFieldMapper.NAME.equals(fieldInfo.name)) {
        uid = Uid.createUid(value);
    } else {
        addValue(fieldInfo.name, value);
    }
}
 
Example #23
Source File: StoredFieldsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
void reset(FieldInfo field) {
  if (remapper != null) {
    // field numbers are not aligned, we need to remap to the new field number
    currentField = remapper.fieldInfo(field.name);
  } else {
    currentField = field;
  }
  binaryValue = null;
  stringValue = null;
  numericValue = null;
}
 
Example #24
Source File: Lucene60PointsReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Returns the underlying {@link BKDReader}.
 *
 * @lucene.internal */
@Override
public PointValues getValues(String fieldName) {
  FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName);
  if (fieldInfo == null) {
    throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized");
  }
  if (fieldInfo.getPointDimensionCount() == 0) {
    throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index point values");
  }

  return readers.get(fieldInfo.number);
}
 
Example #25
Source File: IndexManager.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private static String getRowId(IndexReader reader, int docId) throws CorruptIndexException, IOException {
  reader.document(docId, new StoredFieldVisitor() {
    @Override
    public Status needsField(FieldInfo fieldInfo) throws IOException {
      if (ROW_ID.equals(fieldInfo.name)) {
        return StoredFieldVisitor.Status.STOP;
      }
      return StoredFieldVisitor.Status.NO;
    }
  });
  return reader.document(docId).get(ROW_ID);
}
 
Example #26
Source File: FieldReadCallback.java    From deprecated-security-advanced-modules with Apache License 2.0 5 votes vote down vote up
public void stringFieldRead(final FieldInfo fieldInfo, final byte[] fieldValue) {
    try {
        if(!recordField(fieldInfo.name, true)) {
            return;
        }
        fieldRead0(fieldInfo.name, new String(fieldValue, StandardCharsets.UTF_8));
    } catch (Exception e) {
        log.error("Unexpected error reading string field '{}' in index '{}'", fieldInfo.name, index.getName());
    }
}
 
Example #27
Source File: Lucene84PostingsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
  IntBlockTermState state = (IntBlockTermState)_state;
  if (absolute) {
    lastState = emptyState;
    assert lastState.docStartFP == 0;
  }

  if (lastState.singletonDocID != -1 && state.singletonDocID != -1 && state.docStartFP == lastState.docStartFP) {
    // With runs of rare values such as ID fields, the increment of pointers in the docs file is often 0.
    // Furthermore some ID schemes like auto-increment IDs or Flake IDs are monotonic, so we encode the delta
    // between consecutive doc IDs to save space.
    final long delta = (long) state.singletonDocID - lastState.singletonDocID;
    out.writeVLong((BitUtil.zigZagEncode(delta) << 1) | 0x01);
  } else {
    out.writeVLong((state.docStartFP - lastState.docStartFP) << 1);
    if (state.singletonDocID != -1) {
      out.writeVInt(state.singletonDocID);
    }
  }

  if (writePositions) {
    out.writeVLong(state.posStartFP - lastState.posStartFP);
    if (writePayloads || writeOffsets) {
      out.writeVLong(state.payStartFP - lastState.payStartFP);
    }
  }
  if (writePositions) {
    if (state.lastPosBlockOffset != -1) {
      out.writeVLong(state.lastPosBlockOffset);
    }
  }
  if (state.skipOffset != -1) {
    out.writeVLong(state.skipOffset);
  }
  lastState = state;
}
 
Example #28
Source File: FieldMetadata.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Constructs field metadata for reading or writing.
 * @param maxDoc The total number of documents in the segment being written.
 * @param isMutable Set true if this FieldMetadata is created for writing the index. Set false if it is used for reading the index.
 */
protected FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable) {
  assert isMutable || maxDoc == 0;
  this.fieldInfo = fieldInfo;
  this.isMutable = isMutable;
  // docsSeen must not be set if this FieldMetadata is immutable, that means it is used for reading the index.
  this.docsSeen = isMutable ? new FixedBitSet(maxDoc) : null;
  this.dictionaryStartFP = -1;
  this.firstBlockStartFP = -1;
  this.lastBlockStartFP = -1;
}
 
Example #29
Source File: BloomFilteringPostingsFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void saveAppropriatelySizedBloomFilter(IndexOutput bloomOutput,
    FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException {
  
  FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo,
      bloomFilter);
  if (rightSizedSet == null) {
    rightSizedSet = bloomFilter;
  }
  rightSizedSet.serialize(bloomOutput);
}
 
Example #30
Source File: Lucene50FieldInfosFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
  final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
  try (IndexOutput output = directory.createOutput(fileName, context)) {
    CodecUtil.writeIndexHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
    output.writeVInt(infos.size());
    for (FieldInfo fi : infos) {
      fi.checkConsistency();

      output.writeString(fi.name);
      output.writeVInt(fi.number);

      byte bits = 0x0;
      if (fi.hasVectors()) bits |= STORE_TERMVECTOR;
      if (fi.omitsNorms()) bits |= OMIT_NORMS;
      if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
      output.writeByte(bits);

      output.writeByte(indexOptionsByte(fi.getIndexOptions()));

      // pack the DV type and hasNorms in one byte
      output.writeByte(docValuesByte(fi.getDocValuesType()));
      output.writeLong(fi.getDocValuesGen());
      output.writeMapOfStrings(fi.attributes());
    }
    CodecUtil.writeFooter(output);
  }
}