Java Code Examples for org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl

The following examples show how to use org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
private PackedTokenAttributeImpl next() throws IOException
{
	PackedTokenAttributeImpl t = null;
    if (it == null)
    {
        it = buildIterator();
    }
    if (it == null)
    {
        return null;
    }
    if (it.hasNext())
    {
        t = it.next();
        
        return t;
    }
    else
    {
        it = null;
        t = this.next();
        return t;
    }
}
 
Example 2
@Override
public final boolean incrementToken() throws IOException
{
    clearAttributes();
    
    PackedTokenAttributeImpl next = next();
    if (next == null)
    {
        return false;
    }
    
    termAtt.copyBuffer(next.buffer(), 0, next.length());
    offsetAtt.setOffset(next.startOffset(), next.endOffset());
    typeAtt.setType(next.type());
    posIncAtt.setPositionIncrement(next.getPositionIncrement());
    return true;
    
}
 
Example 3
public PackedTokenAttributeImpl next() throws IOException
{
	PackedTokenAttributeImpl nextToken;
    if (it == null)
    {
        buildTokenListAndIterator();
    }
    if (it.hasNext())
    {
        nextToken = it.next();
    }
    else
    {
        nextToken = null;
    }
    return nextToken;
}
 
Example 4
@Override
public final boolean incrementToken() throws IOException
{
    clearAttributes();
    
    PackedTokenAttributeImpl next = next();
    if (next == null)
    {
        return false;
    }
    
    termAtt.copyBuffer(next.buffer(), 0, next.length());
    offsetAtt.setOffset(correctOffset(next.startOffset()), correctOffset(next.endOffset()));
    typeAtt.setType(next.type());
    posIncAtt.setPositionIncrement(next.getPositionIncrement());
    return true;
}
 
Example 5
Source Project: mmseg4j-solr   Source File: AnalyzerTest.java    License: Apache License 2.0 6 votes vote down vote up
public static void printlnToken(String txt, Analyzer analyzer) throws IOException {
	System.out.println("---------"+txt.length()+"\n"+txt);
	TokenStream ts = analyzer.tokenStream("text", new StringReader(txt));
	/*//lucene 2.9 以下
	for(Token t= new Token(); (t=ts.next(t)) !=null;) {
		System.out.println(t);
	}*/
	/*while(ts.incrementToken()) {
		TermAttribute termAtt = (TermAttribute)ts.getAttribute(TermAttribute.class);
		OffsetAttribute offsetAtt = (OffsetAttribute)ts.getAttribute(OffsetAttribute.class);
		TypeAttribute typeAtt = (TypeAttribute)ts.getAttribute(TypeAttribute.class);

		System.out.println("("+termAtt.term()+","+offsetAtt.startOffset()+","+offsetAtt.endOffset()+",type="+typeAtt.type()+")");
	}*/
	ts.reset();
	for(PackedTokenAttributeImpl t= new PackedTokenAttributeImpl(); (t=TokenUtils.nextToken(ts, t)) !=null;) {
		System.out.println(t);
	}
	ts.close();
}
 
Example 6
@Override
public final boolean incrementToken() throws IOException {
    if (!tokens.isEmpty()) {
        if (current == null) {
            throw new IllegalArgumentException("current is null");
        }
        PackedTokenAttributeImpl token = tokens.removeFirst();
        restoreState(current);
        termAtt.setEmpty().append(token);
        posIncAtt.setPositionIncrement(0);
        return true;
    }
    if (input.incrementToken()) {
        process();
        if (!tokens.isEmpty()) {
            current = captureState();
        }
        return true;
    } else {
        return false;
    }
}
 
Example 7
@Override
public final boolean incrementToken() throws IOException {
    if (!tokens.isEmpty()) {
        if (current == null) {
            throw new IllegalArgumentException("current is null");
        }
        PackedTokenAttributeImpl token = tokens.removeFirst();
        restoreState(current);
        termAtt.setEmpty().append(token);
        posIncAtt.setPositionIncrement(0);
        return true;
    }
    if (input.incrementToken()) {
        detect();
        if (!tokens.isEmpty()) {
            current = captureState();
        }
        return true;
    } else {
        return false;
    }
}
 
Example 8
private Iterator<PackedTokenAttributeImpl> buildIterator() throws IOException
{
    // TODO: use incrementToken() somehow?
    if(!done && source.incrementToken())
    {
        CharTermAttribute cta = source.getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = null;
        if(source.hasAttribute(TypeAttribute.class))
        {
            typeAtt = source.getAttribute(TypeAttribute.class);
        }
        PositionIncrementAttribute posIncAtt = null;
        if(source.hasAttribute(PositionIncrementAttribute.class))
        {
            posIncAtt = source.getAttribute(PositionIncrementAttribute.class);
        }
        PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
        token.setEmpty().append(new String(cta.buffer()), 0, cta.length());
        token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
        if(typeAtt != null)
        {
            token.setType(typeAtt.type());
        }
        if(posIncAtt != null)
        {
            token.setPositionIncrement(posIncAtt.getPositionIncrement());
        }
        return buildIterator(token);
    }
    else
    {
        done = true;
        return buildIterator(null);
    }
    

}
 
Example 9
public Iterator<PackedTokenAttributeImpl> buildIterator(PackedTokenAttributeImpl token)
{
    if (token == null)
    {
        return null;
    }

    ArrayList<PackedTokenAttributeImpl> tokens = new ArrayList<PackedTokenAttributeImpl>(prefixes.size());
    for (String prefix : prefixes)
    {   
        
    	PackedTokenAttributeImpl newToken = new PackedTokenAttributeImpl();
    	newToken.setEmpty().append(prefix + termText(token));
    	newToken.setOffset(token.startOffset(), token.endOffset());
        newToken.setType(token.type());
        if (tokens.size() == 0)
        {
            newToken.setPositionIncrement(token.getPositionIncrement());
        }
        else
        {
            newToken.setPositionIncrement(0);
        }
        tokens.add(newToken);
    }
    return tokens.iterator();

}
 
Example 10
private PackedTokenAttributeImpl getTokenAttribute(String text, int startOffset, int endOffset)
{
	PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
	token.setEmpty().append(text);
	token.setOffset(startOffset, endOffset);
	return token;
}
 
Example 11
@Test
public void testFlatQueryShouldBeGeneratedFromSequentiallyShiftedTokens() throws Exception
{
    // prepare test data
    LinkedList<PackedTokenAttributeImpl> tokenSequenceWithRepeatedGroup = new LinkedList<PackedTokenAttributeImpl>();
    tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(0, 4), 0, 4));
    tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(5, 6), 5, 6));
    tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(6, 10), 6, 10));
    tokenSequenceWithRepeatedGroup.add(getTokenAttribute(TEST_QUERY.substring(10, 11), 10, 11));
    
    assertTrue("All tokens in test data must be sequentially shifted",
            parser.isAllTokensSequentiallyShifted(tokenSequenceWithRepeatedGroup));
    assertTrue(parser.getEnablePositionIncrements());
    
    LinkedList<LinkedList<PackedTokenAttributeImpl>> fixedTokenSequences = new LinkedList<LinkedList<PackedTokenAttributeImpl>>();
    fixedTokenSequences.add(tokenSequenceWithRepeatedGroup);
    
    // call method to test
    SpanQuery q = parser.generateSpanOrQuery(TEST_FIELD, fixedTokenSequences);
    
    // check results
    assertNotNull(q);
    assertTrue(q instanceof SpanNearQuery);
    SpanNearQuery spanNearQuery = (SpanNearQuery) q;
    assertEquals("Slop between term must be 0", 0, spanNearQuery.getSlop());
    assertTrue("Terms must be in order", spanNearQuery.isInOrder());
    
    SpanQuery[] termClauses = spanNearQuery.getClauses();
    assertEquals("Flat query must be generated (Query: " + q + ")", tokenSequenceWithRepeatedGroup.size(), termClauses.length);
    for (int i = 0; i < termClauses.length; i++)
    {
        assertTrue(termClauses[i] instanceof SpanTermQuery);
        assertEquals("All tokens must become spanQuery terms",
                tokenSequenceWithRepeatedGroup.get(i).toString(), ((SpanTermQuery) termClauses[i]).getTerm().text());
    }
}
 
Example 12
Source Project: mmseg4j-solr   Source File: CutLetterDigitFilter.java    License: Apache License 2.0 5 votes vote down vote up
public CutLetterDigitFilter(TokenStream input) {
	super(input);

	reusableToken = new PackedTokenAttributeImpl();
	termAtt = addAttribute(CharTermAttribute.class);
	offsetAtt = addAttribute(OffsetAttribute.class);
	typeAtt = addAttribute(TypeAttribute.class);
}
 
Example 13
Source Project: mmseg4j-solr   Source File: CutLetterDigitFilter.java    License: Apache License 2.0 5 votes vote down vote up
private void addToken(PackedTokenAttributeImpl oriToken, int termBufferOffset, int termBufferLength, byte type) {
	PackedTokenAttributeImpl token = TokenUtils.subToken(oriToken, termBufferOffset, termBufferLength);

	if(type == Character.DECIMAL_DIGIT_NUMBER) {
		token.setType(Word.TYPE_DIGIT);
	} else {
		token.setType(Word.TYPE_LETTER);
	}

	tokenQueue.offer(token);
}
 
Example 14
Source Project: mmseg4j-solr   Source File: CutLetterDigitFilter.java    License: Apache License 2.0 5 votes vote down vote up
public final boolean incrementToken() throws IOException {
	clearAttributes();
	PackedTokenAttributeImpl token = nextToken(reusableToken);
	if(token != null) {
		termAtt.copyBuffer(token.buffer(), 0, token.length());
		offsetAtt.setOffset(token.startOffset(), token.endOffset());
		typeAtt.setType(token.type());
		return true;
	} else {
		return false;
	}
}
 
Example 15
protected void process() throws CharacterCodingException {
    String term = new String(termAtt.buffer(), 0, termAtt.length());
    for (CharSequence charSequence : process(term)) {
        if (charSequence != null) {
            PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
            token.append(charSequence);
            tokens.add(token);
        }
    }
}
 
Example 16
private void detect() throws CharacterCodingException {
    CharSequence term = new String(termAtt.buffer(), 0, termAtt.length());
    Collection<CharSequence> variants = service.lookup(settings, term);
    for (CharSequence ch : variants) {
        if (ch != null) {
            PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
            token.append(ch);
            tokens.add(token);
        }
    }
}
 
Example 17
protected void baseform() throws CharacterCodingException {
    CharSequence term = new String(termAtt.buffer(), 0, termAtt.length());
    CharSequence s = dictionary.lookup(term);
    if (s != null && s.length() > 0) {
        PackedTokenAttributeImpl impl = new PackedTokenAttributeImpl();
        impl.append(s);
        tokens.add(impl);
    }
}
 
Example 18
private String termText(PackedTokenAttributeImpl token)
{
    return new String(token.buffer(), 0, token.length());
}
 
Example 19
private String termText(PackedTokenAttributeImpl token)
{
    return new String(token.buffer(), 0, token.length());
}
 
Example 20
Source Project: mmseg4j-solr   Source File: CutLetterDigitFilter.java    License: Apache License 2.0 4 votes vote down vote up
private PackedTokenAttributeImpl nextToken(PackedTokenAttributeImpl reusableToken) throws IOException {
	assert reusableToken != null;

	//先使用上次留下来的。
	PackedTokenAttributeImpl nextToken = tokenQueue.poll();
	if(nextToken != null) {
		return nextToken;
	}

	nextToken = TokenUtils.nextToken(input, reusableToken);

	if(nextToken != null &&
			(Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type())
				|| Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type()))
			) {
		final char[] buffer = nextToken.buffer();
		final int length = nextToken.length();
		byte lastType = (byte) Character.getType(buffer[0]);	//与上次的字符是否同类
		int termBufferOffset = 0;
		int termBufferLength = 0;
		for(int i=0;i<length;i++) {
			byte type = (byte) Character.getType(buffer[i]);
			if(type <= Character.MODIFIER_LETTER) {
				type = Character.LOWERCASE_LETTER;
			}
			if(type != lastType) {	//与上一次的不同
				addToken(nextToken, termBufferOffset, termBufferLength, lastType);

				termBufferOffset += termBufferLength;
				termBufferLength = 0;

				lastType = type;
			}

			termBufferLength++;
		}
		if(termBufferLength > 0) {	//最后一次
			addToken(nextToken, termBufferOffset, termBufferLength, lastType);
		}
		nextToken = tokenQueue.poll();
	}

	return nextToken;
}