org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl Java Examples

The following examples show how to use org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MLTokenDuplicator.java    From SearchServices with GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
public final boolean incrementToken() throws IOException
{
    clearAttributes();
    
    PackedTokenAttributeImpl next = next();
    if (next == null)
    {
        return false;
    }
    
    termAtt.copyBuffer(next.buffer(), 0, next.length());
    offsetAtt.setOffset(next.startOffset(), next.endOffset());
    typeAtt.setType(next.type());
    posIncAtt.setPositionIncrement(next.getPositionIncrement());
    return true;
    
}
 
Example #2
Source File: StandardnumberTokenFilter.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public final boolean incrementToken() throws IOException {
    if (!tokens.isEmpty()) {
        if (current == null) {
            throw new IllegalArgumentException("current is null");
        }
        PackedTokenAttributeImpl token = tokens.removeFirst();
        restoreState(current);
        termAtt.setEmpty().append(token);
        posIncAtt.setPositionIncrement(0);
        return true;
    }
    if (input.incrementToken()) {
        detect();
        if (!tokens.isEmpty()) {
            current = captureState();
        }
        return true;
    } else {
        return false;
    }
}
 
Example #3
Source File: PathTokenFilter.java    From SearchServices with GNU Lesser General Public License v3.0 6 votes vote down vote up
public PackedTokenAttributeImpl next() throws IOException
{
	PackedTokenAttributeImpl nextToken;
    if (it == null)
    {
        buildTokenListAndIterator();
    }
    if (it.hasNext())
    {
        nextToken = it.next();
    }
    else
    {
        nextToken = null;
    }
    return nextToken;
}
 
Example #4
Source File: PathTokenFilter.java    From SearchServices with GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
public final boolean incrementToken() throws IOException
{
    clearAttributes();
    
    PackedTokenAttributeImpl next = next();
    if (next == null)
    {
        return false;
    }
    
    termAtt.copyBuffer(next.buffer(), 0, next.length());
    offsetAtt.setOffset(correctOffset(next.startOffset()), correctOffset(next.endOffset()));
    typeAtt.setType(next.type());
    posIncAtt.setPositionIncrement(next.getPositionIncrement());
    return true;
}
 
Example #5
Source File: SymbolnameTokenFilter.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public final boolean incrementToken() throws IOException {
    if (!tokens.isEmpty()) {
        if (current == null) {
            throw new IllegalArgumentException("current is null");
        }
        PackedTokenAttributeImpl token = tokens.removeFirst();
        restoreState(current);
        termAtt.setEmpty().append(token);
        posIncAtt.setPositionIncrement(0);
        return true;
    }
    if (input.incrementToken()) {
        process();
        if (!tokens.isEmpty()) {
            current = captureState();
        }
        return true;
    } else {
        return false;
    }
}
 
Example #6
Source File: AnalyzerTest.java    From mmseg4j-solr with Apache License 2.0 6 votes vote down vote up
public static void printlnToken(String txt, Analyzer analyzer) throws IOException {
	System.out.println("---------"+txt.length()+"\n"+txt);
	TokenStream ts = analyzer.tokenStream("text", new StringReader(txt));
	/*//lucene 2.9 以下
	for(Token t= new Token(); (t=ts.next(t)) !=null;) {
		System.out.println(t);
	}*/
	/*while(ts.incrementToken()) {
		TermAttribute termAtt = (TermAttribute)ts.getAttribute(TermAttribute.class);
		OffsetAttribute offsetAtt = (OffsetAttribute)ts.getAttribute(OffsetAttribute.class);
		TypeAttribute typeAtt = (TypeAttribute)ts.getAttribute(TypeAttribute.class);

		System.out.println("("+termAtt.term()+","+offsetAtt.startOffset()+","+offsetAtt.endOffset()+",type="+typeAtt.type()+")");
	}*/
	ts.reset();
	for(PackedTokenAttributeImpl t= new PackedTokenAttributeImpl(); (t=TokenUtils.nextToken(ts, t)) !=null;) {
		System.out.println(t);
	}
	ts.close();
}
 
Example #7
Source File: MLTokenDuplicator.java    From SearchServices with GNU Lesser General Public License v3.0 6 votes vote down vote up
private PackedTokenAttributeImpl next() throws IOException
{
	PackedTokenAttributeImpl t = null;
    if (it == null)
    {
        it = buildIterator();
    }
    if (it == null)
    {
        return null;
    }
    if (it.hasNext())
    {
        t = it.next();
        
        return t;
    }
    else
    {
        it = null;
        t = this.next();
        return t;
    }
}
 
Example #8
Source File: CutLetterDigitFilter.java    From mmseg4j-solr with Apache License 2.0 5 votes vote down vote up
public CutLetterDigitFilter(TokenStream input) {
	super(input);

	reusableToken = new PackedTokenAttributeImpl();
	termAtt = addAttribute(CharTermAttribute.class);
	offsetAtt = addAttribute(OffsetAttribute.class);
	typeAtt = addAttribute(TypeAttribute.class);
}
 
Example #9
Source File: BaseformTokenFilter.java    From elasticsearch-analysis-baseform with Apache License 2.0 5 votes vote down vote up
protected void baseform() throws CharacterCodingException {
    CharSequence term = new String(termAtt.buffer(), 0, termAtt.length());
    CharSequence s = dictionary.lookup(term);
    if (s != null && s.length() > 0) {
        PackedTokenAttributeImpl impl = new PackedTokenAttributeImpl();
        impl.append(s);
        tokens.add(impl);
    }
}
 
Example #10
Source File: StandardnumberTokenFilter.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
private void detect() throws CharacterCodingException {
    CharSequence term = new String(termAtt.buffer(), 0, termAtt.length());
    Collection<CharSequence> variants = service.lookup(settings, term);
    for (CharSequence ch : variants) {
        if (ch != null) {
            PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
            token.append(ch);
            tokens.add(token);
        }
    }
}
 
Example #11
Source File: SymbolnameTokenFilter.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
protected void process() throws CharacterCodingException {
    String term = new String(termAtt.buffer(), 0, termAtt.length());
    for (CharSequence charSequence : process(term)) {
        if (charSequence != null) {
            PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
            token.append(charSequence);
            tokens.add(token);
        }
    }
}
 
Example #12
Source File: CutLetterDigitFilter.java    From mmseg4j-solr with Apache License 2.0 5 votes vote down vote up
public final boolean incrementToken() throws IOException {
	clearAttributes();
	PackedTokenAttributeImpl token = nextToken(reusableToken);
	if(token != null) {
		termAtt.copyBuffer(token.buffer(), 0, token.length());
		offsetAtt.setOffset(token.startOffset(), token.endOffset());
		typeAtt.setType(token.type());
		return true;
	} else {
		return false;
	}
}
 
Example #13
Source File: CutLetterDigitFilter.java    From mmseg4j-solr with Apache License 2.0 5 votes vote down vote up
private void addToken(PackedTokenAttributeImpl oriToken, int termBufferOffset, int termBufferLength, byte type) {
	PackedTokenAttributeImpl token = TokenUtils.subToken(oriToken, termBufferOffset, termBufferLength);

	if(type == Character.DECIMAL_DIGIT_NUMBER) {
		token.setType(Word.TYPE_DIGIT);
	} else {
		token.setType(Word.TYPE_LETTER);
	}

	tokenQueue.offer(token);
}
 
Example #14
Source File: Solr4QueryParserTest.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Test
public void testFlatQueryShouldBeGeneratedFromSequentiallyShiftedTokens() throws Exception
{
    // prepare test data
    LinkedList<PackedTokenAttributeImpl> tokenSequenceWithRepeatedGroup = new LinkedList<PackedTokenAttributeImpl>();
    tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(0, 4), 0, 4));
    tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(5, 6), 5, 6));
    tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(6, 10), 6, 10));
    tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(10, 11), 10, 11));
    
    assertTrue("All tokens in test data must be sequentially shifted",
            parser.isAllTokensSequentiallyShifted(tokenSequenceWithRepeatedGroup));
    assertTrue(parser.getEnablePositionIncrements());
    
    LinkedList<LinkedList<PackedTokenAttributeImpl>> fixedTokenSequences = new LinkedList<LinkedList<PackedTokenAttributeImpl>>();
    fixedTokenSequences.add(tokenSequenceWithRepeatedGroup);
    
    // call method to test
    SpanQuery q = parser.generateSpanOrQuery(TEST_FIELD, fixedTokenSequences);
    
    // check results
    assertNotNull(q);
    assertTrue(q instanceof SpanNearQuery);
    SpanNearQuery spanNearQuery = (SpanNearQuery) q;
    assertEquals("Slop between term must be 0", 0, spanNearQuery.getSlop());
    assertTrue("Terms must be in order", spanNearQuery.isInOrder());
    
    SpanQuery[] termClauses = spanNearQuery.getClauses();
    assertEquals("Flat query must be generated (Query: " + q + ")", tokenSequenceWithRepeatedGroup.size(), termClauses.length);
    for (int i = 0; i < termClauses.length; i++)
    {
        assertTrue(termClauses[i] instanceof SpanTermQuery);
        assertEquals("All tokens must become spanQuery terms",
                tokenSequenceWithRepeatedGroup.get(i).toString(), ((SpanTermQuery) termClauses[i]).getTerm().text());
    }
}
 
Example #15
Source File: Solr4QueryParserTest.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
private PackedTokenAttributeImpl getTokenAttribute(String text, int startOffset, int endOffset)
{
	PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
	token.setEmpty().append(text);
	token.setOffset(startOffset, endOffset);
	return token;
}
 
Example #16
Source File: MLTokenDuplicator.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
public Iterator<PackedTokenAttributeImpl> buildIterator(PackedTokenAttributeImpl token)
{
    if (token == null)
    {
        return null;
    }

    ArrayList<PackedTokenAttributeImpl> tokens = new ArrayList<PackedTokenAttributeImpl>(prefixes.size());
    for (String prefix : prefixes)
    {   
        
    	PackedTokenAttributeImpl newToken = new PackedTokenAttributeImpl();
    	newToken.setEmpty().append(prefix + termText(token));
    	newToken.setOffset(token.startOffset(), token.endOffset());
        newToken.setType(token.type());
        if (tokens.size() == 0)
        {
            newToken.setPositionIncrement(token.getPositionIncrement());
        }
        else
        {
            newToken.setPositionIncrement(0);
        }
        tokens.add(newToken);
    }
    return tokens.iterator();

}
 
Example #17
Source File: MLTokenDuplicator.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
private Iterator<PackedTokenAttributeImpl> buildIterator() throws IOException
{
    // TODO: use incrementToken() somehow?
    if(!done && source.incrementToken())
    {
        CharTermAttribute cta = source.getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = null;
        if(source.hasAttribute(TypeAttribute.class))
        {
            typeAtt = source.getAttribute(TypeAttribute.class);
        }
        PositionIncrementAttribute posIncAtt = null;
        if(source.hasAttribute(PositionIncrementAttribute.class))
        {
            posIncAtt = source.getAttribute(PositionIncrementAttribute.class);
        }
        PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
        token.setEmpty().append(new String(cta.buffer()), 0, cta.length());
        token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
        if(typeAtt != null)
        {
            token.setType(typeAtt.type());
        }
        if(posIncAtt != null)
        {
            token.setPositionIncrement(posIncAtt.getPositionIncrement());
        }
        return buildIterator(token);
    }
    else
    {
        done = true;
        return buildIterator(null);
    }
    

}
 
Example #18
Source File: CutLetterDigitFilter.java    From mmseg4j-solr with Apache License 2.0 4 votes vote down vote up
private PackedTokenAttributeImpl nextToken(PackedTokenAttributeImpl reusableToken) throws IOException {
	assert reusableToken != null;

	//先使用上次留下来的。
	PackedTokenAttributeImpl nextToken = tokenQueue.poll();
	if(nextToken != null) {
		return nextToken;
	}

	nextToken = TokenUtils.nextToken(input, reusableToken);

	if(nextToken != null &&
			(Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type())
				|| Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type()))
			) {
		final char[] buffer = nextToken.buffer();
		final int length = nextToken.length();
		byte lastType = (byte) Character.getType(buffer[0]);	//与上次的字符是否同类
		int termBufferOffset = 0;
		int termBufferLength = 0;
		for(int i=0;i<length;i++) {
			byte type = (byte) Character.getType(buffer[i]);
			if(type <= Character.MODIFIER_LETTER) {
				type = Character.LOWERCASE_LETTER;
			}
			if(type != lastType) {	//与上一次的不同
				addToken(nextToken, termBufferOffset, termBufferLength, lastType);

				termBufferOffset += termBufferLength;
				termBufferLength = 0;

				lastType = type;
			}

			termBufferLength++;
		}
		if(termBufferLength > 0) {	//最后一次
			addToken(nextToken, termBufferOffset, termBufferLength, lastType);
		}
		nextToken = tokenQueue.poll();
	}

	return nextToken;
}
 
Example #19
Source File: PathTokenFilter.java    From SearchServices with GNU Lesser General Public License v3.0 4 votes vote down vote up
private String termText(PackedTokenAttributeImpl token)
{
    return new String(token.buffer(), 0, token.length());
}
 
Example #20
Source File: MLTokenDuplicator.java    From SearchServices with GNU Lesser General Public License v3.0 4 votes vote down vote up
private String termText(PackedTokenAttributeImpl token)
{
    return new String(token.buffer(), 0, token.length());
}