Java Code Examples for org.apache.lucene.index.Terms#hasOffsets()

The following examples show how to use org.apache.lucene.index.Terms#hasOffsets() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TermVectorsResponse.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private void buildValues(XContentBuilder builder, Terms curTerms, int termFreq) throws IOException {
    if (!(curTerms.hasPayloads() || curTerms.hasOffsets() || curTerms.hasPositions())) {
        return;
    }

    builder.startArray(FieldStrings.TOKENS);
    for (int i = 0; i < termFreq; i++) {
        builder.startObject();
        if (curTerms.hasPositions()) {
            builder.field(FieldStrings.POS, currentPositions[i]);
        }
        if (curTerms.hasOffsets()) {
            builder.field(FieldStrings.START_OFFSET, currentStartOffset[i]);
            builder.field(FieldStrings.END_OFFSET, currentEndOffset[i]);
        }
        if (curTerms.hasPayloads() && (currentPayloads[i].length() > 0)) {
            builder.field(FieldStrings.PAYLOAD, currentPayloads[i]);
        }
        builder.endObject();
    }
    builder.endArray();
}
 
Example 2
Source File: TermVectorsResponse.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
    for (int j = 0; j < termFreq; j++) {
        int nextPos = posEnum.nextPosition();
        if (curTerms.hasPositions()) {
            currentPositions[j] = nextPos;
        }
        if (curTerms.hasOffsets()) {
            currentStartOffset[j] = posEnum.startOffset();
            currentEndOffset[j] = posEnum.endOffset();
        }
        if (curTerms.hasPayloads()) {
            BytesRef curPayload = posEnum.getPayload();
            if (curPayload != null) {
                currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length);
            } else {
                currentPayloads[j] = null;
            }
        }
    }
}
 
Example 3
Source File: TokenSources.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a {@link TokenStream} with positions and offsets constructed from
 * field termvectors.  If the field has no termvectors or offsets
 * are not included in the termvector, return null.  See {@link #getTokenStream(org.apache.lucene.index.Terms)}
 * for an explanation of what happens when positions aren't present.
 *
 * @param reader the {@link IndexReader} to retrieve term vectors from
 * @param docId the document to retrieve termvectors for
 * @param field the field to retrieve termvectors for
 * @return a {@link TokenStream}, or null if offsets are not available
 * @throws IOException If there is a low-level I/O error
 *
 * @see #getTokenStream(org.apache.lucene.index.Terms)
 */
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getTokenStreamWithOffsets(IndexReader reader, int docId,
                                                    String field) throws IOException {

  Fields vectors = reader.getTermVectors(docId);
  if (vectors == null) {
    return null;
  }

  Terms vector = vectors.terms(field);
  if (vector == null) {
    return null;
  }

  if (!vector.hasOffsets()) {
    return null;
  }
  
  return getTokenStream(vector);
}
 
Example 4
Source File: TermVectorsResponse.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
private void initMemory(Terms curTerms, int termFreq) {
    // init memory for performance reasons
    if (curTerms.hasPositions()) {
        currentPositions = ArrayUtil.grow(currentPositions, termFreq);
    }
    if (curTerms.hasOffsets()) {
        currentStartOffset = ArrayUtil.grow(currentStartOffset, termFreq);
        currentEndOffset = ArrayUtil.grow(currentEndOffset, termFreq);
    }
    if (curTerms.hasPayloads()) {
        currentPayloads = new BytesArray[termFreq];
    }
}
 
Example 5
Source File: TermVectorLeafReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public TermVectorLeafReader(String field, Terms terms) {
  fields = new Fields() {
    @Override
    public Iterator<String> iterator() {
      return Collections.singletonList(field).iterator();
    }

    @Override
    public Terms terms(String fld) throws IOException {
      if (!field.equals(fld)) {
        return null;
      }
      return terms;
    }

    @Override
    public int size() {
      return 1;
    }
  };

  IndexOptions indexOptions;
  if (!terms.hasFreqs()) {
    indexOptions = IndexOptions.DOCS;
  } else if (!terms.hasPositions()) {
    indexOptions = IndexOptions.DOCS_AND_FREQS;
  } else if (!terms.hasOffsets()) {
    indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
  } else {
    indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
  }
  FieldInfo fieldInfo = new FieldInfo(field, 0,
                                      true, true, terms.hasPayloads(),
                                      indexOptions, DocValuesType.NONE, -1, Collections.emptyMap(), 0, 0, 0, false);
  fieldInfos = new FieldInfos(new FieldInfo[]{fieldInfo});
}
 
Example 6
Source File: TokenSources.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a token stream generated from a {@link Terms}. This
 * can be used to feed the highlighter with a pre-parsed token
 * stream.  The {@link Terms} must have offsets available. If there are no positions available,
 * all tokens will have position increments reflecting adjacent tokens, or coincident when terms
 * share a start offset. If there are stopwords filtered from the index, you probably want to ensure
 * term vectors have positions so that phrase queries won't match across stopwords.
 *
 * @throws IllegalArgumentException if no offsets are available
 */
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getTokenStream(final Terms tpv) throws IOException {

  if (!tpv.hasOffsets()) {
    throw new IllegalArgumentException("Highlighting requires offsets from the TokenStream.");
    //TokenStreamFromTermVector can handle a lack of offsets if there are positions. But
    // highlighters require offsets, so we insist here.
  }

  return new TokenStreamFromTermVector(tpv, -1); // TODO propagate maxStartOffset; see LUCENE-6445
}
 
Example 7
Source File: TokenSources.java    From lucene-solr with Apache License 2.0 3 votes vote down vote up
/**
 * Get a token stream by un-inverting the term vector. This method returns null if {@code tvFields} is null
 * or if the field has no term vector, or if the term vector doesn't have offsets.  Positions are recommended on the
 * term vector but it isn't strictly required.
 *
 * @param field The field to get term vectors from.
 * @param tvFields from {@link IndexReader#getTermVectors(int)}. Possibly null. For performance, this instance should
 *                 be re-used for the same document (e.g. when highlighting multiple fields).
 * @param maxStartOffset Terms with a startOffset greater than this aren't returned.  Use -1 for no limit.
 *                       Suggest using {@link Highlighter#getMaxDocCharsToAnalyze()} - 1
 * @return a token stream from term vectors. Null if no term vectors with the right options.
 */
public static TokenStream getTermVectorTokenStreamOrNull(String field, Fields tvFields, int maxStartOffset)
    throws IOException {
  if (tvFields == null) {
    return null;
  }
  final Terms tvTerms = tvFields.terms(field);
  if (tvTerms == null || !tvTerms.hasOffsets()) {
    return null;
  }
  return new TokenStreamFromTermVector(tvTerms, maxStartOffset);
}