Java Code Examples for org.apache.lucene.analysis.CharacterUtils

The following examples show how to use org.apache.lucene.analysis.CharacterUtils. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: NGramTokenizer.java    License: Apache License 2.0 5 votes vote down vote up
private void init(int minGram, int maxGram, boolean edgesOnly) {
  if (minGram < 1) {
    throw new IllegalArgumentException("minGram must be greater than zero");
  }
  if (minGram > maxGram) {
    throw new IllegalArgumentException("minGram must not be greater than maxGram");
  }
  this.minGram = minGram;
  this.maxGram = maxGram;
  this.edgesOnly = edgesOnly;
  charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
  buffer = new int[charBuffer.getBuffer().length];
  // Make the term att large enough
  termAtt.resizeBuffer(2 * maxGram);
}
 
Example 2
Source Project: lucene-solr   Source File: UpperCaseFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public final boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    CharacterUtils.toUpperCase(termAtt.buffer(), 0, termAtt.length());
    return true;
  } else
    return false;
}
 
Example 3
Source Project: lucene-solr   Source File: TestConditionalTokenFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public final boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    CharacterUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length());
    return true;
  } else
    return false;
}
 
Example 4
public CharBufferReader(Reader input, int bufferSize) {
    this.input = input;
    this.bufferSize = bufferSize;
    charBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
}
 
Example 5
private boolean readToBuffer() throws IOException {
    CharacterUtils.fill(charBuffer, input);
    readCursor = charBuffer.getOffset();
    return charBuffer.getLength() > charBuffer.getOffset();
}
 
Example 6
public void reset(Reader input) {
    this.input = input;
    readCursor = 0;
    charBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
}
 
Example 7
Source Project: lucene-solr   Source File: ICUNormalizer2CharFilter.java    License: Apache License 2.0 4 votes vote down vote up
ICUNormalizer2CharFilter(Reader in, Normalizer2 normalizer, int bufferSize) {
  super(in);
  this.normalizer = Objects.requireNonNull(normalizer);
  this.tmpBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
}
 
Example 8
Source Project: HongsCORE   Source File: NameTokenizer.java    License: MIT License 4 votes vote down vote up
@Override
public boolean incrementToken() throws IOException {
    clearAttributes();
    char[] buf = termAttr.buffer();
    int    bgn, end, len, chr, cnt, bgx;

    while (true) {
        // 判断是否结束
        if (bufferIndex >= bufferShift) {
            CharacterUtils.fill(buffer , input);
            offsetShift += bufferShift ;
            bufferShift  = buffer.getLength();
            bufferIndex  = 0;
            if (bufferShift == 0) {
                endset = correctOffset(offsetShift);
                offset =  0 ;
                return false;
            }
        }

        bgn = bufferIndex + offsetShift - offset;

        chr = Character.codePointAt(buffer.getBuffer(), bufferIndex);
        cnt = Character.charCount(chr);
        bufferIndex += cnt;

        chr = filterToken(chr);
        if (chr == 0x0) {
            buf = termAttr.buffer();
            offset = 0;
            continue;
        }

        len = Character.toChars(chr, buf, offset);
        end = bgn + len;

        termAttr.setLength(len + offset);
        bgx    = correctOffset(bgn);
        endset = correctOffset(end);
        ofstAttr.setOffset(bgx , endset);

        offset += cnt;
        return  true;
    }
}