Java Code Examples for org.apache.lucene.analysis.Analyzer#getPositionIncrementGap()

The following examples show how to use org.apache.lucene.analysis.Analyzer#getPositionIncrementGap() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TransportAnalyzeAction.java From Elasticsearch with Apache License 2.0

5 votes

private static List<AnalyzeResponse.AnalyzeToken> simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
    List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
    int lastPosition = -1;
    int lastOffset = 0;
    for (String text : request.text()) {
        try (TokenStream stream = analyzer.tokenStream(field, text)) {
            stream.reset();
            CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
            PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
            OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
            TypeAttribute type = stream.addAttribute(TypeAttribute.class);

            while (stream.incrementToken()) {
                int increment = posIncr.getPositionIncrement();
                if (increment > 0) {
                    lastPosition = lastPosition + increment;
                }
                tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type(), null));

            }
            stream.end();
            lastOffset += offset.endOffset();
            lastPosition += posIncr.getPositionIncrement();

            lastPosition += analyzer.getPositionIncrementGap(field);
            lastOffset += analyzer.getOffsetGap(field);
        } catch (IOException e) {
            throw new ElasticsearchException("failed to analyze", e);
        }
    }
    return tokens;
}

Example 2

Source File: TransportAnalyzeAction.java From Elasticsearch with Apache License 2.0

5 votes

private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}

Example 3

Source File: TransportExtendedAnalyzeAction.java From elasticsearch-extended-analyze with Apache License 2.0

5 votes

private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes, boolean shortAttrName) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new ExtendedAnalyzeResponse.ExtendedAnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes, shortAttrName)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}

Example 4

Source File: MemoryIndex.java From lucene-solr with Apache License 2.0

4 votes

/**
 * Adds a lucene {@link IndexableField} to the MemoryIndex using the provided analyzer.
 * Also stores doc values based on {@link IndexableFieldType#docValuesType()} if set.
 *
 * @param field the field to add
 * @param analyzer the analyzer to use for term analysis
 */
public void addField(IndexableField field, Analyzer analyzer) {

  Info info = getInfo(field.name(), field.fieldType());

  int offsetGap;
  TokenStream tokenStream;
  int positionIncrementGap;
  if (analyzer != null) {
    offsetGap = analyzer.getOffsetGap(field.name());
    tokenStream = field.tokenStream(analyzer, null);
    positionIncrementGap = analyzer.getPositionIncrementGap(field.name());
  } else {
    offsetGap = 1;
    tokenStream = field.tokenStream(null, null);
    positionIncrementGap = 0;
  }
  if (tokenStream != null) {
    storeTerms(info, tokenStream, positionIncrementGap, offsetGap);
  }

  DocValuesType docValuesType = field.fieldType().docValuesType();
  Object docValuesValue;
  switch (docValuesType) {
    case NONE:
      docValuesValue = null;
      break;
    case BINARY:
    case SORTED:
    case SORTED_SET:
      docValuesValue = field.binaryValue();
      break;
    case NUMERIC:
    case SORTED_NUMERIC:
      docValuesValue = field.numericValue();
      break;
    default:
      throw new UnsupportedOperationException("unknown doc values type [" + docValuesType + "]");
  }
  if (docValuesValue != null) {
    storeDocValues(info, docValuesType, docValuesValue);
  }

  if (field.fieldType().pointDimensionCount() > 0) {
    storePointValues(info, field.binaryValue());
  }

}