org.apache.lucene.analysis.CharacterUtils Java Examples

The following examples show how to use org.apache.lucene.analysis.CharacterUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NGramTokenizer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void init(int minGram, int maxGram, boolean edgesOnly) {
  if (minGram < 1) {
    throw new IllegalArgumentException("minGram must be greater than zero");
  }
  if (minGram > maxGram) {
    throw new IllegalArgumentException("minGram must not be greater than maxGram");
  }
  this.minGram = minGram;
  this.maxGram = maxGram;
  this.edgesOnly = edgesOnly;
  charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
  buffer = new int[charBuffer.getBuffer().length];
  // Make the term att large enough
  termAtt.resizeBuffer(2 * maxGram);
}
 
Example #2
Source File: UpperCaseFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public final boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    CharacterUtils.toUpperCase(termAtt.buffer(), 0, termAtt.length());
    return true;
  } else
    return false;
}
 
Example #3
Source File: TestConditionalTokenFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public final boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    CharacterUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length());
    return true;
  } else
    return false;
}
 
Example #4
Source File: CharBufferReader.java    From elasticsearch-analysis-lc-pinyin with Artistic License 2.0 4 votes vote down vote up
public CharBufferReader(Reader input, int bufferSize) {
    this.input = input;
    this.bufferSize = bufferSize;
    charBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
}
 
Example #5
Source File: CharBufferReader.java    From elasticsearch-analysis-lc-pinyin with Artistic License 2.0 4 votes vote down vote up
private boolean readToBuffer() throws IOException {
    CharacterUtils.fill(charBuffer, input);
    readCursor = charBuffer.getOffset();
    return charBuffer.getLength() > charBuffer.getOffset();
}
 
Example #6
Source File: CharBufferReader.java    From elasticsearch-analysis-lc-pinyin with Artistic License 2.0 4 votes vote down vote up
public void reset(Reader input) {
    this.input = input;
    readCursor = 0;
    charBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
}
 
Example #7
Source File: ICUNormalizer2CharFilter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
ICUNormalizer2CharFilter(Reader in, Normalizer2 normalizer, int bufferSize) {
  super(in);
  this.normalizer = Objects.requireNonNull(normalizer);
  this.tmpBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
}
 
Example #8
Source File: NameTokenizer.java    From HongsCORE with MIT License 4 votes vote down vote up
@Override
public boolean incrementToken() throws IOException {
    clearAttributes();
    char[] buf = termAttr.buffer();
    int    bgn, end, len, chr, cnt, bgx;

    while (true) {
        // 判断是否结束
        if (bufferIndex >= bufferShift) {
            CharacterUtils.fill(buffer , input);
            offsetShift += bufferShift ;
            bufferShift  = buffer.getLength();
            bufferIndex  = 0;
            if (bufferShift == 0) {
                endset = correctOffset(offsetShift);
                offset =  0 ;
                return false;
            }
        }

        bgn = bufferIndex + offsetShift - offset;

        chr = Character.codePointAt(buffer.getBuffer(), bufferIndex);
        cnt = Character.charCount(chr);
        bufferIndex += cnt;

        chr = filterToken(chr);
        if (chr == 0x0) {
            buf = termAttr.buffer();
            offset = 0;
            continue;
        }

        len = Character.toChars(chr, buf, offset);
        end = bgn + len;

        termAttr.setLength(len + offset);
        bgx    = correctOffset(bgn);
        endset = correctOffset(end);
        ofstAttr.setOffset(bgx , endset);

        offset += cnt;
        return  true;
    }
}