package org.ansj.solr;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;

import org.ansj.domain.Term;
import org.ansj.splitWord.analysis.IndexAnalysis;
import org.ansj.splitWord.analysis.ToAnalysis;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;



public class TestAnsj {
	
	public static void main(String[] args) throws IOException {
		List<Term> parse = ToAnalysis.parse("天天向上,媒体打打。《回家真好》");
		System.out.println(parse);
		Tokenizer tokenizer = new AnsjTokenizer(new StringReader("天天向上,媒体打打。《回家真好》"), 0, true);
		CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
		OffsetAttribute offsetAtt = 
				tokenizer.addAttribute(OffsetAttribute.class);
			PositionIncrementAttribute positionIncrementAtt = 
				tokenizer.addAttribute(PositionIncrementAttribute.class);

		
		while (tokenizer.incrementToken()){

			System.out.print(new String(termAtt.toString()) );
			System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
			System.out.print( positionIncrementAtt.getPositionIncrement() +"/");

		}
		tokenizer.close();
	}
}