org.apache.lucene.index.Fields Java Examples

The following examples show how to use org.apache.lucene.index.Fields. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: STUniformSplitTermsWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private Collection<FieldMetadata> writeSingleSegment(Fields fields, NormsProducer normsProducer, STBlockWriter blockWriter, IndexDictionary.Builder dictionaryBuilder) throws IOException {
  List<FieldMetadata> fieldMetadataList = createFieldMetadataList(new FieldsIterator(fields, fieldInfos), maxDoc);
  TermIteratorQueue<FieldTerms> fieldTermsQueue = createFieldTermsQueue(fields, fieldMetadataList);
  List<TermIterator<FieldTerms>> groupedFieldTerms = new ArrayList<>(fieldTermsQueue.size());
  List<FieldMetadataTermState> termStates = new ArrayList<>(fieldTermsQueue.size());

  while (fieldTermsQueue.size() != 0) {
    TermIterator<FieldTerms> topFieldTerms = fieldTermsQueue.popTerms();
    BytesRef term = BytesRef.deepCopyOf(topFieldTerms.term);
    groupByTerm(fieldTermsQueue, topFieldTerms, groupedFieldTerms);
    writePostingLines(term, groupedFieldTerms, normsProducer, termStates);
    blockWriter.addLine(term, termStates, dictionaryBuilder);
    nextTermForIterators(groupedFieldTerms, fieldTermsQueue);
  }
  return fieldMetadataList;
}
 
Example #2
Source File: DfsOnlyRequest.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
 
Example #3
Source File: MutatableAction.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
  IndexReader indexReader = searcher.getIndexReader();
  BytesRef rowIdRef = new BytesRef(rowId);
  List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
  for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
    AtomicReader atomicReader = atomicReaderContext.reader();
    Fields fields = atomicReader.fields();
    if (fields == null) {
      continue;
    }
    Terms terms = fields.terms(BlurConstants.ROW_ID);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(rowIdRef, true)) {
      continue;
    }
    // need atomic read as well...
    possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
  }
  if (possibleRowIds.isEmpty()) {
    return null;
  }
  return new IterableRow(rowId, getRecords(possibleRowIds));
}
 
Example #4
Source File: IndexImporter.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private void runOldMergeSortRowIdCheckAndDelete(boolean emitDeletes, IndexReader currentIndexReader,
    BlurPartitioner blurPartitioner, Text key, int numberOfShards, int shardId, Action action,
    AtomicReader atomicReader) throws IOException {
  MergeSortRowIdLookup lookup = new MergeSortRowIdLookup(currentIndexReader);
  Fields fields = atomicReader.fields();
  Terms terms = fields.terms(BlurConstants.ROW_ID);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef ref = null;
    while ((ref = termsEnum.next()) != null) {
      key.set(ref.bytes, ref.offset, ref.length);
      int partition = blurPartitioner.getPartition(key, null, numberOfShards);
      if (shardId != partition) {
        throw new IOException("Index is corrupted, RowIds are found in wrong shard, partition [" + partition
            + "] does not shard [" + shardId + "], this can happen when rows are not hashed correctly.");
      }
      if (emitDeletes) {
        lookup.lookup(ref, action);
      }
    }
  }
}
 
Example #5
Source File: TokenSources.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * A convenience method that tries a number of approaches to getting a token
 * stream. The cost of finding there are no termVectors in the index is
 * minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?)
 * approach to coding is probably acceptable
 * 
 * @return null if field not stored correctly
 * @throws IOException If there is a low-level I/O error
 */
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
    String field, Analyzer analyzer) throws IOException {
  TokenStream ts = null;

  Fields vectors = reader.getTermVectors(docId);
  if (vectors != null) {
    Terms vector = vectors.terms(field);
    if (vector != null) {
      ts = getTokenStream(vector);
    }
  }

  // No token info stored so fall back to analyzing raw content
  if (ts == null) {
    ts = getTokenStream(reader, docId, field, analyzer);
  }
  return ts;
}
 
Example #6
Source File: MtasFieldsConsumer.java    From mtas with Apache License 2.0 6 votes vote down vote up
@Override
public void merge(MergeState mergeState) throws IOException {
  final List<Fields> fields = new ArrayList<>();
  final List<ReaderSlice> slices = new ArrayList<>();

  int docBase = 0;

  for (int readerIndex = 0; readerIndex < mergeState.fieldsProducers.length; readerIndex++) {
    final FieldsProducer f = mergeState.fieldsProducers[readerIndex];

    final int maxDoc = mergeState.maxDocs[readerIndex];
    f.checkIntegrity();
    slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
    fields.add(f);
    docBase += maxDoc;
  }

  Fields mergedFields = new MappedMultiFields(mergeState,
      new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
          slices.toArray(ReaderSlice.EMPTY_ARRAY)));
  write(mergedFields);
}
 
Example #7
Source File: IndexSizeEstimator.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void estimateTermVectors(Map<String, Object> result) throws IOException {
  log.info("- estimating term vectors...");
  Map<String, Map<String, Object>> stats = new HashMap<>();
  for (LeafReaderContext leafReaderContext : reader.leaves()) {
    LeafReader leafReader = leafReaderContext.reader();
    Bits liveDocs = leafReader.getLiveDocs();
    for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
      if (liveDocs != null && !liveDocs.get(docId)) {
        continue;
      }
      Fields termVectors = leafReader.getTermVectors(docId);
      if (termVectors == null) {
        continue;
      }
      for (String field : termVectors) {
        Terms terms = termVectors.terms(field);
        if (terms == null) {
          continue;
        }
        estimateTermStats(field, terms, stats, true);
      }
    }
  }
  result.put(TERM_VECTORS, stats);
}
 
Example #8
Source File: TokenSources.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * A convenience method that tries to first get a {@link TokenStreamFromTermVector} for the
 * specified docId, then, falls back to using the passed in
 * {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
 * This is useful when you already have the document, but would prefer to use
 * the vector first.
 *
 * @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
 *        and get the vector from
 * @param docId The docId to retrieve.
 * @param field The field to retrieve on the document
 * @param document The document to fall back on
 * @param analyzer The analyzer to use for creating the TokenStream if the
 *        vector doesn't exist
 * @return The {@link org.apache.lucene.analysis.TokenStream} for the
 *         {@link org.apache.lucene.index.IndexableField} on the
 *         {@link org.apache.lucene.document.Document}
 * @throws IOException if there was an error loading
 */
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
    String field, Document document, Analyzer analyzer) throws IOException {
  TokenStream ts = null;

  Fields vectors = reader.getTermVectors(docId);
  if (vectors != null) {
    Terms vector = vectors.terms(field);
    if (vector != null) {
      ts = getTokenStream(vector);
    }
  }

  // No token info stored so fall back to analyzing raw content
  if (ts == null) {
    ts = getTokenStream(document, field, analyzer);
  }
  return ts;
}
 
Example #9
Source File: TokenSources.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a {@link TokenStream} with positions and offsets constructed from
 * field termvectors.  If the field has no termvectors or offsets
 * are not included in the termvector, return null.  See {@link #getTokenStream(org.apache.lucene.index.Terms)}
 * for an explanation of what happens when positions aren't present.
 *
 * @param reader the {@link IndexReader} to retrieve term vectors from
 * @param docId the document to retrieve termvectors for
 * @param field the field to retrieve termvectors for
 * @return a {@link TokenStream}, or null if offsets are not available
 * @throws IOException If there is a low-level I/O error
 *
 * @see #getTokenStream(org.apache.lucene.index.Terms)
 */
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getTokenStreamWithOffsets(IndexReader reader, int docId,
                                                    String field) throws IOException {

  Fields vectors = reader.getTermVectors(docId);
  if (vectors == null) {
    return null;
  }

  Terms vector = vectors.terms(field);
  if (vector == null) {
    return null;
  }

  if (!vector.hasOffsets()) {
    return null;
  }
  
  return getTokenStream(vector);
}
 
Example #10
Source File: SearchTravRetHighlightTask.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
  IndexReader reader = searcher.getIndexReader();
  highlighter.setFragmentScorer(new QueryScorer(q));
  // highlighter.setTextFragmenter();  unfortunately no sentence mechanism, not even regex. Default here is trivial
  for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
    Document document = reader.document(scoreDoc.doc, hlFields);
    Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
    for (IndexableField indexableField : document) {
      TokenStream tokenStream;
      if (termVecs) {
        tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields,
            indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
      } else {
        tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue());
      }
      // will close TokenStream:
      String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
      preventOptimizeAway = fragments.length;
    }
  }
}
 
Example #11
Source File: FieldsConsumer.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Merges in the fields from the readers in 
 *  <code>mergeState</code>. The default implementation skips
 *  and maps around deleted documents, and calls {@link #write(Fields,NormsProducer)}.
 *  Implementations can override this method for more sophisticated
 *  merging (bulk-byte copying, etc). */
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
  final List<Fields> fields = new ArrayList<>();
  final List<ReaderSlice> slices = new ArrayList<>();

  int docBase = 0;

  for(int readerIndex=0;readerIndex<mergeState.fieldsProducers.length;readerIndex++) {
    final FieldsProducer f = mergeState.fieldsProducers[readerIndex];

    final int maxDoc = mergeState.maxDocs[readerIndex];
    f.checkIntegrity();
    slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
    fields.add(f);
    docBase += maxDoc;
  }

  Fields mergedFields = new MappedMultiFields(mergeState, 
                                              new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
                                                              slices.toArray(ReaderSlice.EMPTY_ARRAY)));
  write(mergedFields, norms);
}
 
Example #12
Source File: BlockTermsWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {

  for(String field : fields) {

    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }

    TermsEnum termsEnum = terms.iterator();

    TermsWriter termsWriter = addField(fieldInfos.fieldInfo(field));

    while (true) {
      BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }

      termsWriter.write(term, termsEnum, norms);
    }

    termsWriter.finish();
  }
}
 
Example #13
Source File: UniformSplitTermsWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer normsProducer) throws IOException {
  BlockWriter blockWriter = new BlockWriter(blockOutput, targetNumBlockLines, deltaNumLines, blockEncoder);
  ByteBuffersDataOutput fieldsOutput = new ByteBuffersDataOutput();
  int fieldsNumber = 0;
  for (String field : fields) {
    Terms terms = fields.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator();
      FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
      fieldsNumber += writeFieldTerms(blockWriter, fieldsOutput, termsEnum, fieldInfo, normsProducer);
    }
  }
  writeFieldsMetadata(fieldsNumber, fieldsOutput);
  CodecUtil.writeFooter(dictionaryOutput);
}
 
Example #14
Source File: PerFieldPostingsFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
  Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields);

  // Write postings
  boolean success = false;
  try {
    for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
      PostingsFormat format = ent.getKey();
      final FieldsGroup group = ent.getValue();

      // Exposes only the fields from this group:
      Fields maskedFields = new FilterFields(fields) {
        @Override
        public Iterator<String> iterator() {
          return group.fields.iterator();
        }
      };

      FieldsConsumer consumer = format.fieldsConsumer(group.state);
      toClose.add(consumer);
      consumer.write(maskedFields, norms);
    }
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(toClose);
    }
  }
}
 
Example #15
Source File: CustomSpellCheckListner.java    From customized-symspell with MIT License 5 votes vote down vote up
/**
 * Relod method of spellcheck listner
 * @param newSearcher
 * @param checker
 * @throws IOException
 * @throws SpellCheckException
 */
public void reload(SolrIndexSearcher newSearcher, SpellChecker checker)
    throws IOException, SpellCheckException {

  DirectoryReader productsIndexReader = newSearcher.getIndexReader();
  Fields fields = MultiFields.getFields(productsIndexReader);
  IndexSchema schema = newSearcher.getCore().getLatestSchema();
  long time = System.currentTimeMillis();
  for (String field : fields) {
    if (!fieldArr.contains(field)) {
      continue;
    }
    FieldType type = schema.getField(field).getType();
    int insertionsCount = 0;
    for (TermsEnum iterator = fields.terms(field).iterator(); iterator.next() != null; ) {
      BytesRef term = iterator.term();
      CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
      type.indexedToReadable(term, charsRefBuilder);
      insertionsCount++;
      checker.getDataHolder().addItem(
          new DictionaryItem(charsRefBuilder.toString().trim(), (double) iterator.totalTermFreq(),
              0.0));
    }
    log.info("Spellcheck Dictionary populated for Field Name {}, Count {}", field,
        insertionsCount);
  }
  log.info("Data for SpellChecker  was populated. Time={} ms",
      (System.currentTimeMillis() - time));
}
 
Example #16
Source File: BlurUtil.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private static void applyFamily(OpenBitSet bits, String family, AtomicReader atomicReader, int primeDocRowId,
    int numberOfDocsInRow, Bits liveDocs) throws IOException {
  Fields fields = atomicReader.fields();
  Terms terms = fields.terms(BlurConstants.FAMILY);
  TermsEnum iterator = terms.iterator(null);
  BytesRef text = new BytesRef(family);
  int lastDocId = primeDocRowId + numberOfDocsInRow;
  if (iterator.seekExact(text, true)) {
    DocsEnum docs = iterator.docs(liveDocs, null, DocsEnum.FLAG_NONE);
    int doc = primeDocRowId;
    while ((doc = docs.advance(doc)) < lastDocId) {
      bits.set(doc - primeDocRowId);
    }
  }
}
 
Example #17
Source File: TestMultiThreadTermVectors.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void testTermVectors() throws Exception {
  // check:
  int numDocs = reader.numDocs();
  for (int docId = 0; docId < numDocs; docId++) {
    Fields vectors = reader.getTermVectors(docId);      
    // verify vectors result
    verifyVectors(vectors, docId);
    Terms vector = reader.getTermVectors(docId).terms("field");
    verifyVector(vector.iterator(), docId);
  }
}
 
Example #18
Source File: DocumentVisibilityFilter.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
  AtomicReader reader = context.reader();
  List<DocIdSet> list = new ArrayList<DocIdSet>();

  Fields fields = reader.fields();
  Terms terms = fields.terms(_fieldName);
  if (terms == null) {
    // if field is not present then show nothing.
    return DocIdSet.EMPTY_DOCIDSET;
  }
  TermsEnum iterator = terms.iterator(null);
  BytesRef bytesRef;
  DocumentVisibilityEvaluator visibilityEvaluator = new DocumentVisibilityEvaluator(_authorizations);
  while ((bytesRef = iterator.next()) != null) {
    if (isVisible(visibilityEvaluator, bytesRef)) {
      DocIdSet docIdSet = _filterCacheStrategy.getDocIdSet(_fieldName, bytesRef, reader);
      if (docIdSet != null) {
        list.add(docIdSet);
      } else {
        // Do not use acceptDocs because we want the acl cache to be version
        // agnostic.
        DocsEnum docsEnum = iterator.docs(null, null);
        list.add(buildCache(reader, docsEnum, bytesRef));
      }
    }
  }
  return getLogicalOr(list);
}
 
Example #19
Source File: DefaultSolrHighlighter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Fields getTermVectors(int docID) throws IOException {
  if (docID != lastDocId) {
    lastDocId = docID;
    tvFields = in.getTermVectors(docID);
  }
  return tvFields;
}
 
Example #20
Source File: TestMultiThreadTermVectors.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static void verifyVectors(Fields vectors, int num) throws IOException {
  for (String field : vectors) {
    Terms terms = vectors.terms(field);
    assert terms != null;
    verifyVector(terms.iterator(), num);
  }
}
 
Example #21
Source File: BlockTreeTermsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
  //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment);

  String lastField = null;
  for(String field : fields) {
    assert lastField == null || lastField.compareTo(field) < 0;
    lastField = field;

    //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment + " field=" + field);
    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }

    TermsEnum termsEnum = terms.iterator();
    TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
    while (true) {
      BytesRef term = termsEnum.next();
      //if (DEBUG) System.out.println("BTTW: next term " + term);

      if (term == null) {
        break;
      }

      //if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + brToString(term));
      termsWriter.write(term, termsEnum, norms);
    }

    termsWriter.finish();

    //if (DEBUG) System.out.println("\nBTTW.write done seg=" + segment + " field=" + field);
  }
}
 
Example #22
Source File: CrankyPostingsFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
  if (random.nextInt(100) == 0) {
    throw new IOException("Fake IOException from FieldsConsumer.write()");
  }  
  delegate.write(fields, norms);
}
 
Example #23
Source File: VersionBlockTreeTermsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {

  String lastField = null;
  for(String field : fields) {
    assert lastField == null || lastField.compareTo(field) < 0;
    lastField = field;

    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }

    TermsEnum termsEnum = terms.iterator();

    TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
    while (true) {
      BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
      termsWriter.write(term, termsEnum, norms);
    }

    termsWriter.finish();
  }
}
 
Example #24
Source File: SecureAtomicReaderTestBase.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private int getTermWithSeekCount(Fields fields, String field) throws IOException {
  Terms terms = fields.terms(field);
  TermsEnum termsEnum = terms.iterator(null);
  SeekStatus seekStatus = termsEnum.seekCeil(new BytesRef(""));
  if (seekStatus == SeekStatus.END) {
    return 0;
  }
  System.out.println(termsEnum.term().utf8ToString());
  int count = 1;
  while (termsEnum.next() != null) {
    count++;
  }
  return count;
}
 
Example #25
Source File: CompletionFieldsConsumer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
  delegateFieldsConsumer.write(fields, norms);

  for (String field : fields) {
    CompletionTermWriter termWriter = new CompletionTermWriter();
    Terms terms = fields.terms(field);
    if (terms == null) {
      // this can happen from ghost fields, where the incoming Fields iterator claims a field exists but it does not
      continue;
    }
    TermsEnum termsEnum = terms.iterator();

    // write terms
    BytesRef term;
    while ((term = termsEnum.next()) != null) {
      termWriter.write(term, termsEnum);
    }

    // store lookup, if needed
    long filePointer = dictOut.getFilePointer();
    if (termWriter.finish(dictOut)) {
      seenFields.put(field, new CompletionMetaData(filePointer,
          termWriter.minWeight,
          termWriter.maxWeight,
          termWriter.type));
    }
  }
}
 
Example #26
Source File: MoreLikeThis.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Find words for a more-like-this query former.
 *
 * @param docNum the id of the lucene document from which to find terms
 */
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
  Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>();
  for (String fieldName : fieldNames) {
    final Fields vectors = ir.getTermVectors(docNum);
    final Terms vector;
    if (vectors != null) {
      vector = vectors.terms(fieldName);
    } else {
      vector = null;
    }

    // field does not store term vector info
    if (vector == null) {
      Document d = ir.document(docNum);
      IndexableField[] fields = d.getFields(fieldName);
      for (IndexableField field : fields) {
        final String stringValue = field.stringValue();
        if (stringValue != null) {
          addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName);
        }
      }
    } else {
      addTermFrequencies(field2termFreqMap, vector, fieldName);
    }
  }

  return createQueue(field2termFreqMap);
}
 
Example #27
Source File: STUniformSplitTermsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private TermIteratorQueue<FieldTerms> createFieldTermsQueue(Fields fields, List<FieldMetadata> fieldMetadataList) throws IOException {
  TermIteratorQueue<FieldTerms> fieldQueue = new TermIteratorQueue<>(fieldMetadataList.size());
  for (FieldMetadata fieldMetadata : fieldMetadataList) {
    Terms terms = fields.terms(fieldMetadata.getFieldInfo().name);
    if (terms != null) {
      FieldTerms fieldTerms = new FieldTerms(fieldMetadata, terms.iterator());
      if (fieldTerms.nextTerm()) {
        // There is at least one term for the field.
        fieldQueue.add(fieldTerms);
      }
    }
  }
  return fieldQueue;
}
 
Example #28
Source File: SecureAtomicReaderTestBase.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private int getTermCount(Fields fields, String field) throws IOException {
  Terms terms = fields.terms(field);
  TermsEnum termsEnum = terms.iterator(null);
  int count = 0;
  while (termsEnum.next() != null) {
    count++;
  }
  return count;
}
 
Example #29
Source File: TermFreq.java    From SourcererCC with GNU General Public License v3.0 5 votes vote down vote up
private void dummy() throws IOException {
    Fields fields = MultiFields.getFields(this.reader);
    Terms terms = fields.terms("field");
    TermsEnum iterator = terms.iterator(null);
    BytesRef byteRef = null;
    while ((byteRef = iterator.next()) != null) {
        String term = new String(byteRef.bytes, byteRef.offset,
                byteRef.length);
        Term termInstance = new Term("tokens", term);
        long termFreq = this.reader.totalTermFreq(termInstance);
        this.TermFreqMap.put(term, termFreq);
        System.out.println(termFreq);
    }
}
 
Example #30
Source File: BaseReadMaskFieldTypeDefinitionTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void checkTerms(IndexSearcher searcher, String fieldName) throws IOException {
  IndexReader reader = searcher.getIndexReader();
  for (AtomicReaderContext context : reader.leaves()) {
    AtomicReader atomicReader = context.reader();
    Fields fields = atomicReader.fields();
    Terms terms = fields.terms(fieldName);
    TermsEnum iterator = terms.iterator(null);
    BytesRef bytesRef = iterator.next();
    if (bytesRef != null) {
      System.out.println(bytesRef.utf8ToString());
      fail("There are only restricted terms for this field [" + fieldName + "]");
    }
  }
}