Java Code Examples for org.apache.lucene.analysis.tokenattributes.OffsetAttribute#setOffset()

The following examples show how to use org.apache.lucene.analysis.tokenattributes.OffsetAttribute#setOffset() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AutoPhrasingTokenFilter.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
private void emit(char[] tokenChars) {
    char[] token = tokenChars;
    if (replaceWhitespaceWith != null) {
        token = replaceWhiteSpace(token);
    }
    CharTermAttribute termAttr = getTermAttribute();
    if (termAttr != null) {
        termAttr.setEmpty();
        termAttr.append(new StringBuilder().append(token));
    }
    OffsetAttribute offAttr = getOffsetAttribute();
    if (offAttr != null && offAttr.endOffset() >= token.length) {
        int start = offAttr.endOffset() - token.length;
        offAttr.setOffset(start, offAttr.endOffset());
    }
    PositionIncrementAttribute pia = getPositionIncrementAttribute();
    if (pia != null) {
        pia.setPositionIncrement(++positionIncr);
    }
    lastEmitted = token;
}
 
Example 2
Source File: AutoPhrasingTokenFilter.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
private void emit(Token token) {
    emit(token.tok);
    OffsetAttribute offAttr = getOffsetAttribute();
    if (offAttr != null && token.endPos > token.startPos && token.startPos >= 0) {
        offAttr.setOffset(token.startPos, token.endPos);
    }
}
 
Example 3
Source File: BaseTermVectorsFormatTestCase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void copyTo(AttributeImpl target) {
  OffsetAttribute t = (OffsetAttribute) target;
  t.setOffset(start, end);
}
 
Example 4
Source File: SimplePreAnalyzedParser.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static AttributeSource.State createState(AttributeSource a, Tok state, int tokenEnd) {
  a.clearAttributes();
  CharTermAttribute termAtt = a.addAttribute(CharTermAttribute.class);
  char[] tokChars = state.token.toString().toCharArray();
  termAtt.copyBuffer(tokChars, 0, tokChars.length);
  int tokenStart = tokenEnd - state.token.length();
  for (Entry<String, String> e : state.attr.entrySet()) {
    String k = e.getKey();
    if (k.equals("i")) {
      // position increment
      int incr = Integer.parseInt(e.getValue());
      PositionIncrementAttribute posIncr = a.addAttribute(PositionIncrementAttribute.class);
      posIncr.setPositionIncrement(incr);
    } else if (k.equals("s")) {
      tokenStart = Integer.parseInt(e.getValue());
    } else if (k.equals("e")) {
      tokenEnd = Integer.parseInt(e.getValue());
    } else if (k.equals("y")) {
      TypeAttribute type = a.addAttribute(TypeAttribute.class);
      type.setType(e.getValue());
    } else if (k.equals("f")) {
      FlagsAttribute flags = a.addAttribute(FlagsAttribute.class);
      int f = Integer.parseInt(e.getValue(), 16);
      flags.setFlags(f);
    } else if (k.equals("p")) {
      PayloadAttribute p = a.addAttribute(PayloadAttribute.class);
      byte[] data = hexToBytes(e.getValue());
      if (data != null && data.length > 0) {
        p.setPayload(new BytesRef(data));
      }
    } else {
      // unknown attribute
    }
  }
  // handle offset attr
  OffsetAttribute offset = a.addAttribute(OffsetAttribute.class);
  offset.setOffset(tokenStart, tokenEnd);
  State resState = a.captureState();
  a.clearAttributes();
  return resState;
}
 
Example 5
Source File: MtasPreAnalyzedParser.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public ParseResult parse(Reader reader, AttributeSource parent)
    throws IOException {
  ParseResult res = new ParseResult();

  // get MtasUpdateRequestProcessorResult
  StringBuilder sb = new StringBuilder();
  char[] buf = new char[128];
  int cnt;
  while ((cnt = reader.read(buf)) > 0) {
    sb.append(buf, 0, cnt);
  }
  Iterator<MtasUpdateRequestProcessorResultItem> iterator;

  try (
      MtasUpdateRequestProcessorResultReader result = new MtasUpdateRequestProcessorResultReader(
          sb.toString());) {
    iterator = result.getIterator();
    if (iterator != null && iterator.hasNext()) {
      res.str = result.getStoredStringValue();
      res.bin = result.getStoredBinValue();
    } else {
      res.str = null;
      res.bin = null;
      result.close();
      return res;
    }
    parent.clearAttributes();
    while (iterator.hasNext()) {
      MtasUpdateRequestProcessorResultItem item = iterator.next();
      if (item.tokenTerm != null) {
        CharTermAttribute catt = parent.addAttribute(CharTermAttribute.class);
        catt.append(item.tokenTerm);
      }
      if (item.tokenFlags != null) {
        FlagsAttribute flags = parent.addAttribute(FlagsAttribute.class);
        flags.setFlags(item.tokenFlags);
      }
      if (item.tokenPosIncr != null) {
        PositionIncrementAttribute patt = parent
            .addAttribute(PositionIncrementAttribute.class);
        patt.setPositionIncrement(item.tokenPosIncr);
      }
      if (item.tokenPayload != null) {
        PayloadAttribute p = parent.addAttribute(PayloadAttribute.class);
        p.setPayload(new BytesRef(item.tokenPayload));
      }
      if (item.tokenOffsetStart != null && item.tokenOffsetEnd != null) {
        OffsetAttribute offset = parent.addAttribute(OffsetAttribute.class);
        offset.setOffset(item.tokenOffsetStart, item.tokenOffsetEnd);
      }
      // capture state and add to result
      State state = parent.captureState();
      res.states.add(state.clone());
      // reset for reuse
      parent.clearAttributes();
    }
  } catch (IOException e) {
    // ignore
    log.debug(e);
  }
  return res;
}