Java Code Examples for org.apache.lucene.index.Fields

The following examples show how to use org.apache.lucene.index.Fields. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Elasticsearch   Source File: DfsOnlyRequest.java    License: Apache License 2.0 6 votes vote down vote up
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
 
Example 2
Source Project: lucene-solr   Source File: TokenSources.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * A convenience method that tries to first get a {@link TokenStreamFromTermVector} for the
 * specified docId, then, falls back to using the passed in
 * {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
 * This is useful when you already have the document, but would prefer to use
 * the vector first.
 *
 * @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
 *        and get the vector from
 * @param docId The docId to retrieve.
 * @param field The field to retrieve on the document
 * @param document The document to fall back on
 * @param analyzer The analyzer to use for creating the TokenStream if the
 *        vector doesn't exist
 * @return The {@link org.apache.lucene.analysis.TokenStream} for the
 *         {@link org.apache.lucene.index.IndexableField} on the
 *         {@link org.apache.lucene.document.Document}
 * @throws IOException if there was an error loading
 */
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
    String field, Document document, Analyzer analyzer) throws IOException {
  TokenStream ts = null;

  Fields vectors = reader.getTermVectors(docId);
  if (vectors != null) {
    Terms vector = vectors.terms(field);
    if (vector != null) {
      ts = getTokenStream(vector);
    }
  }

  // No token info stored so fall back to analyzing raw content
  if (ts == null) {
    ts = getTokenStream(document, field, analyzer);
  }
  return ts;
}
 
Example 3
Source Project: lucene-solr   Source File: TokenSources.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * A convenience method that tries a number of approaches to getting a token
 * stream. The cost of finding there are no termVectors in the index is
 * minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?)
 * approach to coding is probably acceptable
 * 
 * @return null if field not stored correctly
 * @throws IOException If there is a low-level I/O error
 */
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
    String field, Analyzer analyzer) throws IOException {
  TokenStream ts = null;

  Fields vectors = reader.getTermVectors(docId);
  if (vectors != null) {
    Terms vector = vectors.terms(field);
    if (vector != null) {
      ts = getTokenStream(vector);
    }
  }

  // No token info stored so fall back to analyzing raw content
  if (ts == null) {
    ts = getTokenStream(reader, docId, field, analyzer);
  }
  return ts;
}
 
Example 4
Source Project: lucene-solr   Source File: TokenSources.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns a {@link TokenStream} with positions and offsets constructed from
 * field termvectors.  If the field has no termvectors or offsets
 * are not included in the termvector, return null.  See {@link #getTokenStream(org.apache.lucene.index.Terms)}
 * for an explanation of what happens when positions aren't present.
 *
 * @param reader the {@link IndexReader} to retrieve term vectors from
 * @param docId the document to retrieve termvectors for
 * @param field the field to retrieve termvectors for
 * @return a {@link TokenStream}, or null if offsets are not available
 * @throws IOException If there is a low-level I/O error
 *
 * @see #getTokenStream(org.apache.lucene.index.Terms)
 */
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getTokenStreamWithOffsets(IndexReader reader, int docId,
                                                    String field) throws IOException {

  Fields vectors = reader.getTermVectors(docId);
  if (vectors == null) {
    return null;
  }

  Terms vector = vectors.terms(field);
  if (vector == null) {
    return null;
  }

  if (!vector.hasOffsets()) {
    return null;
  }
  
  return getTokenStream(vector);
}
 
Example 5
Source Project: lucene-solr   Source File: SearchTravRetHighlightTask.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
  IndexReader reader = searcher.getIndexReader();
  highlighter.setFragmentScorer(new QueryScorer(q));
  // highlighter.setTextFragmenter();  unfortunately no sentence mechanism, not even regex. Default here is trivial
  for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
    Document document = reader.document(scoreDoc.doc, hlFields);
    Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
    for (IndexableField indexableField : document) {
      TokenStream tokenStream;
      if (termVecs) {
        tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields,
            indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
      } else {
        tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue());
      }
      // will close TokenStream:
      String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
      preventOptimizeAway = fragments.length;
    }
  }
}
 
Example 6
Source Project: lucene-solr   Source File: BlockTermsWriter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {

  for(String field : fields) {

    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }

    TermsEnum termsEnum = terms.iterator();

    TermsWriter termsWriter = addField(fieldInfos.fieldInfo(field));

    while (true) {
      BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }

      termsWriter.write(term, termsEnum, norms);
    }

    termsWriter.finish();
  }
}
 
Example 7
Source Project: lucene-solr   Source File: UniformSplitTermsWriter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer normsProducer) throws IOException {
  BlockWriter blockWriter = new BlockWriter(blockOutput, targetNumBlockLines, deltaNumLines, blockEncoder);
  ByteBuffersDataOutput fieldsOutput = new ByteBuffersDataOutput();
  int fieldsNumber = 0;
  for (String field : fields) {
    Terms terms = fields.terms(field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator();
      FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
      fieldsNumber += writeFieldTerms(blockWriter, fieldsOutput, termsEnum, fieldInfo, normsProducer);
    }
  }
  writeFieldsMetadata(fieldsNumber, fieldsOutput);
  CodecUtil.writeFooter(dictionaryOutput);
}
 
Example 8
Source Project: lucene-solr   Source File: STUniformSplitTermsWriter.java    License: Apache License 2.0 6 votes vote down vote up
private Collection<FieldMetadata> writeSingleSegment(Fields fields, NormsProducer normsProducer, STBlockWriter blockWriter, IndexDictionary.Builder dictionaryBuilder) throws IOException {
  List<FieldMetadata> fieldMetadataList = createFieldMetadataList(new FieldsIterator(fields, fieldInfos), maxDoc);
  TermIteratorQueue<FieldTerms> fieldTermsQueue = createFieldTermsQueue(fields, fieldMetadataList);
  List<TermIterator<FieldTerms>> groupedFieldTerms = new ArrayList<>(fieldTermsQueue.size());
  List<FieldMetadataTermState> termStates = new ArrayList<>(fieldTermsQueue.size());

  while (fieldTermsQueue.size() != 0) {
    TermIterator<FieldTerms> topFieldTerms = fieldTermsQueue.popTerms();
    BytesRef term = BytesRef.deepCopyOf(topFieldTerms.term);
    groupByTerm(fieldTermsQueue, topFieldTerms, groupedFieldTerms);
    writePostingLines(term, groupedFieldTerms, normsProducer, termStates);
    blockWriter.addLine(term, termStates, dictionaryBuilder);
    nextTermForIterators(groupedFieldTerms, fieldTermsQueue);
  }
  return fieldMetadataList;
}
 
Example 9
Source Project: lucene-solr   Source File: FieldsConsumer.java    License: Apache License 2.0 6 votes vote down vote up
/** Merges in the fields from the readers in 
 *  <code>mergeState</code>. The default implementation skips
 *  and maps around deleted documents, and calls {@link #write(Fields,NormsProducer)}.
 *  Implementations can override this method for more sophisticated
 *  merging (bulk-byte copying, etc). */
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
  final List<Fields> fields = new ArrayList<>();
  final List<ReaderSlice> slices = new ArrayList<>();

  int docBase = 0;

  for(int readerIndex=0;readerIndex<mergeState.fieldsProducers.length;readerIndex++) {
    final FieldsProducer f = mergeState.fieldsProducers[readerIndex];

    final int maxDoc = mergeState.maxDocs[readerIndex];
    f.checkIntegrity();
    slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
    fields.add(f);
    docBase += maxDoc;
  }

  Fields mergedFields = new MappedMultiFields(mergeState, 
                                              new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
                                                              slices.toArray(ReaderSlice.EMPTY_ARRAY)));
  write(mergedFields, norms);
}
 
Example 10
Source Project: lucene-solr   Source File: IndexSizeEstimator.java    License: Apache License 2.0 6 votes vote down vote up
private void estimateTermVectors(Map<String, Object> result) throws IOException {
  log.info("- estimating term vectors...");
  Map<String, Map<String, Object>> stats = new HashMap<>();
  for (LeafReaderContext leafReaderContext : reader.leaves()) {
    LeafReader leafReader = leafReaderContext.reader();
    Bits liveDocs = leafReader.getLiveDocs();
    for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
      if (liveDocs != null && !liveDocs.get(docId)) {
        continue;
      }
      Fields termVectors = leafReader.getTermVectors(docId);
      if (termVectors == null) {
        continue;
      }
      for (String field : termVectors) {
        Terms terms = termVectors.terms(field);
        if (terms == null) {
          continue;
        }
        estimateTermStats(field, terms, stats, true);
      }
    }
  }
  result.put(TERM_VECTORS, stats);
}
 
Example 11
Source Project: mtas   Source File: MtasFieldsConsumer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void merge(MergeState mergeState) throws IOException {
  final List<Fields> fields = new ArrayList<>();
  final List<ReaderSlice> slices = new ArrayList<>();

  int docBase = 0;

  for (int readerIndex = 0; readerIndex < mergeState.fieldsProducers.length; readerIndex++) {
    final FieldsProducer f = mergeState.fieldsProducers[readerIndex];

    final int maxDoc = mergeState.maxDocs[readerIndex];
    f.checkIntegrity();
    slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
    fields.add(f);
    docBase += maxDoc;
  }

  Fields mergedFields = new MappedMultiFields(mergeState,
      new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
          slices.toArray(ReaderSlice.EMPTY_ARRAY)));
  write(mergedFields);
}
 
Example 12
Source Project: incubator-retired-blur   Source File: IndexImporter.java    License: Apache License 2.0 6 votes vote down vote up
private void runOldMergeSortRowIdCheckAndDelete(boolean emitDeletes, IndexReader currentIndexReader,
    BlurPartitioner blurPartitioner, Text key, int numberOfShards, int shardId, Action action,
    AtomicReader atomicReader) throws IOException {
  MergeSortRowIdLookup lookup = new MergeSortRowIdLookup(currentIndexReader);
  Fields fields = atomicReader.fields();
  Terms terms = fields.terms(BlurConstants.ROW_ID);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef ref = null;
    while ((ref = termsEnum.next()) != null) {
      key.set(ref.bytes, ref.offset, ref.length);
      int partition = blurPartitioner.getPartition(key, null, numberOfShards);
      if (shardId != partition) {
        throw new IOException("Index is corrupted, RowIds are found in wrong shard, partition [" + partition
            + "] does not shard [" + shardId + "], this can happen when rows are not hashed correctly.");
      }
      if (emitDeletes) {
        lookup.lookup(ref, action);
      }
    }
  }
}
 
Example 13
Source Project: incubator-retired-blur   Source File: MutatableAction.java    License: Apache License 2.0 6 votes vote down vote up
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
  IndexReader indexReader = searcher.getIndexReader();
  BytesRef rowIdRef = new BytesRef(rowId);
  List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
  for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
    AtomicReader atomicReader = atomicReaderContext.reader();
    Fields fields = atomicReader.fields();
    if (fields == null) {
      continue;
    }
    Terms terms = fields.terms(BlurConstants.ROW_ID);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(rowIdRef, true)) {
      continue;
    }
    // need atomic read as well...
    possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
  }
  if (possibleRowIds.isEmpty()) {
    return null;
  }
  return new IterableRow(rowId, getRecords(possibleRowIds));
}
 
Example 14
@Override
public Fields getTermVectors(final int docID) throws IOException {
    final Fields fields = in.getTermVectors(docID);

    if (!flsEnabled || fields == null) {
        return fields;
    }

    return new Fields() {

        @Override
        public Iterator<String> iterator() {
            return Iterators.<String> filter(fields.iterator(), new Predicate<String>() {

                @Override
                public boolean apply(final String input) {
                    return isFls(input);
                }
            });
        }

        @Override
        public Terms terms(final String field) throws IOException {

            if (!isFls(field)) {
                return null;
            }

            return wrapTerms(field, in.terms(field));

        }

        @Override
        public int size() {
            return flsFieldInfos.size();
        }

    };
}
 
Example 15
Source Project: customized-symspell   Source File: CustomSpellCheckListner.java    License: MIT License 5 votes vote down vote up
/**
 * Relod method of spellcheck listner
 * @param newSearcher
 * @param checker
 * @throws IOException
 * @throws SpellCheckException
 */
public void reload(SolrIndexSearcher newSearcher, SpellChecker checker)
    throws IOException, SpellCheckException {

  DirectoryReader productsIndexReader = newSearcher.getIndexReader();
  Fields fields = MultiFields.getFields(productsIndexReader);
  IndexSchema schema = newSearcher.getCore().getLatestSchema();
  long time = System.currentTimeMillis();
  for (String field : fields) {
    if (!fieldArr.contains(field)) {
      continue;
    }
    FieldType type = schema.getField(field).getType();
    int insertionsCount = 0;
    for (TermsEnum iterator = fields.terms(field).iterator(); iterator.next() != null; ) {
      BytesRef term = iterator.term();
      CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
      type.indexedToReadable(term, charsRefBuilder);
      insertionsCount++;
      checker.getDataHolder().addItem(
          new DictionaryItem(charsRefBuilder.toString().trim(), (double) iterator.totalTermFreq(),
              0.0));
    }
    log.info("Spellcheck Dictionary populated for Field Name {}, Count {}", field,
        insertionsCount);
  }
  log.info("Data for SpellChecker  was populated. Time={} ms",
      (System.currentTimeMillis() - time));
}
 
Example 16
Source Project: Elasticsearch   Source File: MoreLikeThisFetchService.java    License: Apache License 2.0 5 votes vote down vote up
public static Fields[] getFieldsFor(MultiTermVectorsResponse responses) throws IOException {
    List<Fields> likeFields = new ArrayList<>();

    for (MultiTermVectorsItemResponse response : responses) {
        if (response.isFailed()) {
            continue;
        }
        TermVectorsResponse getResponse = response.getResponse();
        if (!getResponse.isExists()) {
            continue;
        }
        likeFields.add(getResponse.getFields());
    }
    return likeFields.toArray(Fields.EMPTY_ARRAY);
}
 
Example 17
Source Project: Elasticsearch   Source File: TermVectorsResponse.java    License: Apache License 2.0 5 votes vote down vote up
public Fields getFields() throws IOException {
    if (hasTermVectors() && isExists()) {
        if (!sourceCopied) { // make the bytes safe
            headerRef = headerRef.copyBytesArray();
            termVectors = termVectors.copyBytesArray();
        }
        TermVectorsFields termVectorsFields = new TermVectorsFields(headerRef, termVectors);
        hasScores = termVectorsFields.hasScores;
        return termVectorsFields;
    } else {
        return new Fields() {
            @Override
            public Iterator<String> iterator() {
                return Collections.emptyIterator();
            }

            @Override
            public Terms terms(String field) throws IOException {
                return null;
            }

            @Override
            public int size() {
                return 0;
            }
        };
    }
}
 
Example 18
Source Project: Elasticsearch   Source File: TermVectorsResponse.java    License: Apache License 2.0 5 votes vote down vote up
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator();
    BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter, boostAtt);
    }
    builder.endObject();
    builder.endObject();
}
 
Example 19
Source Project: Elasticsearch   Source File: TermVectorsResponse.java    License: Apache License 2.0 5 votes vote down vote up
public void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields, @Nullable AggregatedDfs dfs,
                      TermVectorsFilter termVectorsFilter) throws IOException {
    TermVectorsWriter tvw = new TermVectorsWriter(this);

    if (termVectorsByField != null) {
        tvw.setFields(termVectorsByField, selectedFields, flags, topLevelFields, dfs, termVectorsFilter);
    }
}
 
Example 20
Source Project: lucene-solr   Source File: TermVectorLeafReader.java    License: Apache License 2.0 5 votes vote down vote up
public TermVectorLeafReader(String field, Terms terms) {
  fields = new Fields() {
    @Override
    public Iterator<String> iterator() {
      return Collections.singletonList(field).iterator();
    }

    @Override
    public Terms terms(String fld) throws IOException {
      if (!field.equals(fld)) {
        return null;
      }
      return terms;
    }

    @Override
    public int size() {
      return 1;
    }
  };

  IndexOptions indexOptions;
  if (!terms.hasFreqs()) {
    indexOptions = IndexOptions.DOCS;
  } else if (!terms.hasPositions()) {
    indexOptions = IndexOptions.DOCS_AND_FREQS;
  } else if (!terms.hasOffsets()) {
    indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
  } else {
    indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
  }
  FieldInfo fieldInfo = new FieldInfo(field, 0,
                                      true, true, terms.hasPayloads(),
                                      indexOptions, DocValuesType.NONE, -1, Collections.emptyMap(), 0, 0, 0, false);
  fieldInfos = new FieldInfos(new FieldInfo[]{fieldInfo});
}
 
Example 21
Source Project: lucene-solr   Source File: TermVectorLeafReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Fields getTermVectors(int docID) throws IOException {
  if (docID != 0) {
    return null;
  }
  return fields;
}
 
Example 22
Source Project: lucene-solr   Source File: UnifiedHighlighter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Fields getTermVectors(int docID) throws IOException {
  if (docID != lastDocId) {
    lastDocId = docID;
    tvFields = in.getTermVectors(docID);
  }
  return tvFields;
}
 
Example 23
Source Project: lucene-solr   Source File: TestUnifiedHighlighterTermVec.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public LeafReader wrap(LeafReader reader) {
  return new FilterLeafReader(reader) {
    BitSet seenDocIDs = new BitSet();

    @Override
    public Fields getTermVectors(int docID) throws IOException {
      // if we're invoked by ParallelLeafReader then we can't do our assertion. TODO see LUCENE-6868
      if (callStackContains(ParallelLeafReader.class) == false
          && callStackContains(CheckIndex.class) == false) {
        assertFalse("Should not request TVs for doc more than once.", seenDocIDs.get(docID));
        seenDocIDs.set(docID);
      }

      return super.getTermVectors(docID);
    }

    @Override
    public CacheHelper getCoreCacheHelper() {
      return null;
    }

    @Override
    public CacheHelper getReaderCacheHelper() {
      return null;
    }
  };
}
 
Example 24
Source Project: lucene-solr   Source File: FSTTermsWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
  for(String field : fields) {
    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }
    FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
    boolean hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
    TermsEnum termsEnum = terms.iterator();
    TermsWriter termsWriter = new TermsWriter(fieldInfo);

    long sumTotalTermFreq = 0;
    long sumDocFreq = 0;
    FixedBitSet docsSeen = new FixedBitSet(maxDoc);

    while (true) {
      BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
          
      BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms);
      if (termState != null) {
        termsWriter.finishTerm(term, termState);
        sumTotalTermFreq += termState.totalTermFreq;
        sumDocFreq += termState.docFreq;
      }
    }

    termsWriter.finish(hasFreq ? sumTotalTermFreq : -1, sumDocFreq, docsSeen.cardinality());
  }
}
 
Example 25
Source Project: lucene-solr   Source File: OrdsBlockTreeTermsWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {

  String lastField = null;
  for(String field : fields) {
    assert lastField == null || lastField.compareTo(field) < 0;
    lastField = field;

    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }

    TermsEnum termsEnum = terms.iterator();

    TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
    while (true) {
      BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
      termsWriter.write(term, termsEnum, norms);
    }

    termsWriter.finish();
  }
}
 
Example 26
Source Project: lucene-solr   Source File: STUniformSplitTermsWriter.java    License: Apache License 2.0 5 votes vote down vote up
private TermIteratorQueue<FieldTerms> createFieldTermsQueue(Fields fields, List<FieldMetadata> fieldMetadataList) throws IOException {
  TermIteratorQueue<FieldTerms> fieldQueue = new TermIteratorQueue<>(fieldMetadataList.size());
  for (FieldMetadata fieldMetadata : fieldMetadataList) {
    Terms terms = fields.terms(fieldMetadata.getFieldInfo().name);
    if (terms != null) {
      FieldTerms fieldTerms = new FieldTerms(fieldMetadata, terms.iterator());
      if (fieldTerms.nextTerm()) {
        // There is at least one term for the field.
        fieldQueue.add(fieldTerms);
      }
    }
  }
  return fieldQueue;
}
 
Example 27
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Find words for a more-like-this query former.
 *
 * @param docNum the id of the lucene document from which to find terms
 */
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
  Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>();
  for (String fieldName : fieldNames) {
    final Fields vectors = ir.getTermVectors(docNum);
    final Terms vector;
    if (vectors != null) {
      vector = vectors.terms(fieldName);
    } else {
      vector = null;
    }

    // field does not store term vector info
    if (vector == null) {
      Document d = ir.document(docNum);
      IndexableField[] fields = d.getFields(fieldName);
      for (IndexableField field : fields) {
        final String stringValue = field.stringValue();
        if (stringValue != null) {
          addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName);
        }
      }
    } else {
      addTermFrequencies(field2termFreqMap, vector, fieldName);
    }
  }

  return createQueue(field2termFreqMap);
}
 
Example 28
Source Project: lucene-solr   Source File: CompletionFieldsConsumer.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
  delegateFieldsConsumer.write(fields, norms);

  for (String field : fields) {
    CompletionTermWriter termWriter = new CompletionTermWriter();
    Terms terms = fields.terms(field);
    if (terms == null) {
      // this can happen from ghost fields, where the incoming Fields iterator claims a field exists but it does not
      continue;
    }
    TermsEnum termsEnum = terms.iterator();

    // write terms
    BytesRef term;
    while ((term = termsEnum.next()) != null) {
      termWriter.write(term, termsEnum);
    }

    // store lookup, if needed
    long filePointer = dictOut.getFilePointer();
    if (termWriter.finish(dictOut)) {
      seenFields.put(field, new CompletionMetaData(filePointer,
          termWriter.minWeight,
          termWriter.maxWeight,
          termWriter.type));
    }
  }
}
 
Example 29
Source Project: lucene-solr   Source File: VersionBlockTreeTermsWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {

  String lastField = null;
  for(String field : fields) {
    assert lastField == null || lastField.compareTo(field) < 0;
    lastField = field;

    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }

    TermsEnum termsEnum = terms.iterator();

    TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
    while (true) {
      BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
      termsWriter.write(term, termsEnum, norms);
    }

    termsWriter.finish();
  }
}
 
Example 30
Source Project: lucene-solr   Source File: CrankyPostingsFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
  if (random.nextInt(100) == 0) {
    throw new IOException("Fake IOException from FieldsConsumer.write()");
  }  
  delegate.write(fields, norms);
}