org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer Java Examples

The following examples show how to use org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BlogServiceImpl.java    From newblog with Apache License 2.0 6 votes vote down vote up
/**
 * ajax简历索引
 */
@Override
public void ajaxbuild() {
    try {
        FileUtils.deleteDirectory(new File(AUTOCOMPLETEPATH));
        logger.info("delete autocomplete file success");
        Directory dir = FSDirectory.open(Paths.get(AUTOCOMPLETEPATH));
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
        AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(dir, analyzer);
        //创建Blog测试数据
        List<Blog> blogs = blogMapper.getAllBlog();
        suggester.build(new BlogIterator(blogs.iterator()));
    } catch (IOException e) {
        System.err.println("Error!");
    }
}
 
Example #2
Source File: BlogServiceImpl.java    From newblog with Apache License 2.0 6 votes vote down vote up
/**
 * 根据关键词查找
 *
 * @param keyword
 * @return
 */
@Override
public Set<String> ajaxsearch(String keyword) {
    try {
        Directory dir = FSDirectory.open(Paths.get(AUTOCOMPLETEPATH));
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
        AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(dir, analyzer);
        List<String> list = lookup(suggester, keyword);
        list.sort((o1, o2) -> {
            if (o1.length() > o2.length()) {
                return 1;
            } else {
                return -1;
            }
        });
        Set<String> set = new LinkedHashSet<>(list);
        ssubSet(set, 7);
        return set;
    } catch (IOException e) {
        System.err.println("Error!");
        return null;
    }
}
 
Example #3
Source File: SearchBuilder.java    From taoshop with Apache License 2.0 5 votes vote down vote up
public static void doSearch(String indexDir , String queryStr) throws IOException, ParseException, InvalidTokenOffsetsException {
    Directory directory = FSDirectory.open(Paths.get(indexDir));
    DirectoryReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new SmartChineseAnalyzer();
    QueryParser parser = new QueryParser("tcontent",analyzer);
    Query query = parser.parse(queryStr);

    long startTime = System.currentTimeMillis();
    TopDocs docs = searcher.search(query,10);

    System.out.println("查找"+queryStr+"所用时间:"+(System.currentTimeMillis()-startTime));
    System.out.println("查询到"+docs.totalHits+"条记录");

    //加入高亮显示的
    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color=red>","</font></b>");
    QueryScorer scorer = new QueryScorer(query);//计算查询结果最高的得分
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);//根据得分算出一个片段
    Highlighter highlighter = new Highlighter(simpleHTMLFormatter,scorer);
    highlighter.setTextFragmenter(fragmenter);//设置显示高亮的片段

    //遍历查询结果
    for(ScoreDoc scoreDoc : docs.scoreDocs){
        Document doc = searcher.doc(scoreDoc.doc);
        System.out.println(doc.get("title"));
        System.out.println(doc.get("tcontent"));
        String tcontent = doc.get("tcontent");
        if(tcontent != null){
            TokenStream tokenStream =  analyzer.tokenStream("tcontent", new StringReader(tcontent));
            String summary = highlighter.getBestFragment(tokenStream, tcontent);
            System.out.println(summary);
        }
    }
    reader.close();
}
 
Example #4
Source File: SmartcnUDF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
private static CharArraySet stopWords(@Nullable final String[] array)
        throws UDFArgumentException {
    if (array == null) {
        return SmartChineseAnalyzer.getDefaultStopSet();
    }
    if (array.length == 0) {
        return CharArraySet.EMPTY_SET;
    }
    CharArraySet results = new CharArraySet(Arrays.asList(array), true /* ignoreCase */);
    return results;
}
 
Example #5
Source File: LuceneIndex.java    From jeesuite-libs with Apache License 2.0 5 votes vote down vote up
/**
 * 获取IndexWriter实例
 * @return
 * @throws Exception
 */
private IndexWriter getWriter()throws Exception{
	/**
	 * 生成的索引我放在了C盘,可以根据自己的需要放在具体位置
	 */
	dir= FSDirectory.open(Paths.get("C://lucene"));
	SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
	IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
	IndexWriter writer=new IndexWriter(dir, iwc);
	return writer;
}
 
Example #6
Source File: LuceneIndex.java    From cjs_ssms with GNU General Public License v2.0 5 votes vote down vote up
/**
 * 获取IndexWriter实例
 *
 * @return
 * @throws Exception
 */
private IndexWriter getWriter() throws Exception {
  /*
   * 生成的索引位置在env-config.properties里配置
 */
  dir = FSDirectory.open(Paths.get(PropertiesUtil.getValue(EnvEnum.LUCENE_INDEX_PATH.val())));
  SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
  IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
  IndexWriter writer = new IndexWriter(dir, iwc);
  return writer;
}
 
Example #7
Source File: SearcherTest.java    From cjs_ssms with GNU General Public License v2.0 5 votes vote down vote up
public static void search(String indexDir, String q) throws Exception {
  Directory dir = FSDirectory.open(Paths.get(indexDir));
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher is = new IndexSearcher(reader);
  // Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
  SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
  QueryParser parser = new QueryParser("desc", analyzer);
  Query query = parser.parse(q);

  long start = System.currentTimeMillis();
  TopDocs hits = is.search(query, 10);
  long end = System.currentTimeMillis();
  System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");

  QueryScorer scorer = new QueryScorer(query);
  Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
  SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
  Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
  highlighter.setTextFragmenter(fragmenter);
  for (ScoreDoc scoreDoc : hits.scoreDocs) {
    Document doc = is.doc(scoreDoc.doc);
    System.out.println(doc.get("city"));
    System.out.println(doc.get("desc"));
    String desc = doc.get("desc");
    if (desc != null) {
      TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc));
      System.out.println(highlighter.getBestFragment(tokenStream, desc));
    }
  }
  reader.close();
}
 
Example #8
Source File: IndexerTest.java    From cjs_ssms with GNU General Public License v2.0 5 votes vote down vote up
/**
 * 获取IndexWriter实例
 *
 * @return
 * @throws Exception
 */
private IndexWriter getWriter() throws Exception {
  //Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
  SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
  IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
  IndexWriter writer = new IndexWriter(dir, iwc);
  return writer;
}
 
Example #9
Source File: IndexSearchSharderManager.java    From fiery with Apache License 2.0 5 votes vote down vote up
private boolean openIndex(String foldername, String folderpath) {
    try {
        Analyzer analyzer = new SmartChineseAnalyzer();

        //diskConfig = new IndexWriterConfig(analyzer);
        //diskConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        //diskConfig.setRAMBufferSizeMB(256.0);
        //init director

        FSDirectory diskDirectory = FSDirectory.open(Paths.get(folderpath));

        //init reader
        DirectoryReader diskReader = DirectoryReader.open(diskDirectory);

        //every thing is ok
        if (analyzer != null && diskDirectory != null && diskReader != null) {
            analyzerList.put(foldername, analyzer);
            directorList.put(foldername, diskDirectory);
            readerList.put(foldername, diskReader);

            log.info("Load Index Success:" + foldername + " path:" + folderpath);
            return true;
        } else {
            log.error("Load Index Fail:" + foldername);
            return false;
        }

    } catch (org.apache.lucene.index.IndexNotFoundException xe) {
        log.error("Load Index Not Found:" + foldername);
        //throw new Exception(e.getMessage());
        //e.printStackTrace();
    } catch (Exception xxe) {
        //do nothing
        xxe.printStackTrace();
        log.error("load index Exception:" + xxe.getMessage());
        //throw new Exception(e.getMessage());
    }
    return false;
}
 
Example #10
Source File: BlogLuceneIndexManager.java    From BlogSystem with Apache License 2.0 5 votes vote down vote up
/**
 * 搜索博文
 *
 * @param word  关键字
 * @param count 返回数量
 * @return 符合的博文id
 */
public int[] search(String word, int count) throws IOException, ParseException {
    if (StringUtils.isEmpty(word) || count <= 0) return null;

    String path = propertiesManager.getLuceneIndexDir();

    Directory dir = FSDirectory.open(Paths.get(path));
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher is = new IndexSearcher(reader);
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();

    // 或 查询
    booleanQuery.add((new QueryParser(INDEX_BLOG_TITLE, analyzer)).parse(word), BooleanClause.Occur.SHOULD);
    booleanQuery.add((new QueryParser(INDEX_BLOG_KEY_WORDS, analyzer)).parse(word), BooleanClause.Occur.SHOULD);
    booleanQuery.add((new QueryParser(INDEX_BLOG_SUMMARY, analyzer)).parse(word), BooleanClause.Occur.SHOULD);
    booleanQuery.add((new QueryParser(INDEX_BLOG_CONTENT, analyzer)).parse(word), BooleanClause.Occur.SHOULD);

    //检索
    TopDocs top = is.search(booleanQuery.build(), count);

    Integer[] result = new Integer[count];
    int sum = 0;
    for (ScoreDoc doc : top.scoreDocs) {
        Document document = is.doc(doc.doc);
        result[sum++] = Integer.parseInt(document.get(INDEX_BLOG_ID));
    }
    if (sum == 0) return null;

    int[] rs = new int[sum];
    for (int i = 0; i < sum; i++) {
        rs[i] = result[i];
    }

    dir.close();
    analyzer.close();
    reader.close();

    return rs;
}
 
Example #11
Source File: BlogLuceneIndexManager.java    From BlogSystem with Apache License 2.0 5 votes vote down vote up
/**
 * 获取IndexWriter实例
 */
private IndexWriter getWriter() throws IOException {
    String path = propertiesManager.getLuceneIndexDir();
    // 创建一个索引位置
    Directory dir = FSDirectory.open(Paths.get(path));

    //文档被索引之前,首先需要对文档内容进行分词处理,并且而剔除一些冗余的词句(例如:a,the,they等),这部分工作就是
    // 由 Analyzer 来做的。
    SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

    //IndexWriter是在索引过程中的中心组件。IndexWriter这个类创建一个新的索引并且添加文档到一个已有的索引中。 你可以
    // 把IndexWriter 想象成让你可以对索引进行写操作的对象,但是不能让你读取或搜索。
    return new IndexWriter(dir, iwc);
}
 
Example #12
Source File: BlogIndex.java    From newblog with Apache License 2.0 5 votes vote down vote up
private IndexWriter getWriter() throws Exception {
    dir = FSDirectory.open(Paths.get(BASE_PATH));
    SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(dir, config);
    return writer;
}
 
Example #13
Source File: BlogServiceImpl.java    From newblog with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
        try {
            Directory dir = FSDirectory.open(Paths.get(AUTOCOMPLETEPATH));
            RAMDirectory indexDir = new RAMDirectory();
            SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
            AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(dir, analyzer);
            IBlogService blogService = new BlogServiceImpl();
            lookup(suggester, "jav");
//            new BlogServiceImpl().ajaxsearch("北京");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
 
Example #14
Source File: IndexUtil.java    From everywhere with Apache License 2.0 5 votes vote down vote up
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException {
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new SmartChineseAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
    mergePolicy.setMergeFactor(50);
    mergePolicy.setMaxMergeDocs(5000);
    if (create){
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    return new IndexWriter(dir, iwc);
}
 
Example #15
Source File: SearchBuilder.java    From taoshop with Apache License 2.0 5 votes vote down vote up
public static void doSearch(String indexDir , String queryStr) throws IOException, ParseException, InvalidTokenOffsetsException {
    Directory directory = FSDirectory.open(Paths.get(indexDir));
    DirectoryReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new SmartChineseAnalyzer();
    QueryParser parser = new QueryParser("tcontent",analyzer);
    Query query = parser.parse(queryStr);

    long startTime = System.currentTimeMillis();
    TopDocs docs = searcher.search(query,10);

    System.out.println("查找"+queryStr+"所用时间:"+(System.currentTimeMillis()-startTime));
    System.out.println("查询到"+docs.totalHits+"条记录");

    //加入高亮显示的
    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color=red>","</font></b>");
    QueryScorer scorer = new QueryScorer(query);//计算查询结果最高的得分
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);//根据得分算出一个片段
    Highlighter highlighter = new Highlighter(simpleHTMLFormatter,scorer);
    highlighter.setTextFragmenter(fragmenter);//设置显示高亮的片段

    //遍历查询结果
    for(ScoreDoc scoreDoc : docs.scoreDocs){
        Document doc = searcher.doc(scoreDoc.doc);
        System.out.println(doc.get("title"));

        String tcontent = doc.get("tcontent");
        if(tcontent != null){
            TokenStream tokenStream =  analyzer.tokenStream("tcontent", new StringReader(tcontent));
            String summary = highlighter.getBestFragment(tokenStream, tcontent);
            System.out.println(summary);
        }
    }
    reader.close();
}
 
Example #16
Source File: LuceneIndexer.java    From taoshop with Apache License 2.0 5 votes vote down vote up
public boolean createIndex(String indexDir) throws IOException{
	//加点测试的静态数据
	Integer ids[] = {1 , 2 , 3};
	String titles[] = {"标题1" , "标题2" , "标题3"};
	String tcontents[] = {
			"内容1内容啊哈哈哈",
			"内容2内容啊哈哈哈",
			"内容3内容啊哈哈哈"
	};

	long startTime = System.currentTimeMillis();//记录索引开始时间
	
	Analyzer analyzer = new SmartChineseAnalyzer();
	Directory directory = FSDirectory.open(Paths.get(indexDir));
	IndexWriterConfig config = new IndexWriterConfig(analyzer);
	
	IndexWriter indexWriter = new IndexWriter(directory, config);

	for(int i = 0; i < ids.length;i++){
		Document doc = new Document();
		//添加字段
        doc.add(new TextField("id", ids[i].toString(),Field.Store.YES)); //添加内容
        doc.add(new TextField("title", titles[i], Field.Store.YES)); //添加文件名,并把这个字段存到索引文件里
        doc.add(new TextField("tcontent", tcontents[i], Field.Store.YES)); //添加文件路径
        indexWriter.addDocument(doc);
	}

	indexWriter.commit();
	System.out.println("共索引了"+indexWriter.numDocs()+"个文件");
	indexWriter.close();
	System.out.println("创建索引所用时间:"+(System.currentTimeMillis()-startTime)+"毫秒");
	
	return true;
}
 
Example #17
Source File: LuceneIndexer.java    From taoshop with Apache License 2.0 5 votes vote down vote up
public boolean createIndex(String indexDir) throws IOException{
    //加点测试的静态数据
    Integer ids[] = {1 , 2 , 3};
    String titles[] = {"标题1" , "标题2" , "标题3"};
    String tcontents[] = {
            "内容1内容啊哈哈哈",
            "内容2内容啊哈哈哈",
            "内容3内容啊哈哈哈"
    };

    long startTime = System.currentTimeMillis();//记录索引开始时间

    Analyzer analyzer = new SmartChineseAnalyzer();
    Directory directory = FSDirectory.open(Paths.get(indexDir));
    IndexWriterConfig config = new IndexWriterConfig(analyzer);

    IndexWriter indexWriter = new IndexWriter(directory, config);

    for(int i = 0; i < ids.length;i++){
        Document doc = new Document();
        //添加字段
        doc.add(new TextField("id", ids[i].toString(),Field.Store.YES)); //添加内容
        doc.add(new TextField("title", titles[i], Field.Store.YES)); //添加文件名,并把这个字段存到索引文件里
        doc.add(new TextField("tcontent", tcontents[i], Field.Store.YES)); //添加文件路径
        indexWriter.addDocument(doc);
    }

    indexWriter.commit();
    System.out.println("共索引了"+indexWriter.numDocs()+"个文件");
    indexWriter.close();
    System.out.println("创建索引所用时间:"+(System.currentTimeMillis()-startTime)+"毫秒");

    return true;
}
 
Example #18
Source File: IndexWriterShaderManager.java    From fiery with Apache License 2.0 4 votes vote down vote up
public synchronized void CheckTheWriterIndex(String dbname) {

        //existed will not create again
        if (analyzerList.containsKey(dbname) || diskConfigList.containsKey(dbname) ||
                diskDirectoryList.containsKey(dbname) || diskWriterList.containsKey(dbname) ||
                IndexInputQueueList.containsKey(dbname)) {
            return;
        }
        log.info("Create Write Index:" + dbname);

        analyzerList.put(dbname, new SmartChineseAnalyzer());
        diskConfigList.put(dbname, new IndexWriterConfig(analyzerList.get(dbname)));
        diskConfigList.get(dbname).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        //diskConfig.setRAMBufferSizeMB(256.0);

        IndexInputQueueList.put(dbname, new ConcurrentLinkedQueue<MetaLog>());

        try {
            //init director
            diskDirectoryList.put(dbname, FSDirectory.open(Paths.get(fieryConfig.getIndexpath() + "/" + dbname)));

            //init writer
            diskWriterList.put(dbname, new IndexWriter(diskDirectoryList.get(dbname), diskConfigList.get(dbname)));

            //init the folder
            diskWriterList.get(dbname).commit();

        } catch (Exception e) {

            if (analyzerList.containsKey(dbname)) {
                analyzerList.remove(dbname);
            }

            if (diskConfigList.containsKey(dbname)) {
                diskConfigList.remove(dbname);
            }

            if (diskDirectoryList.containsKey(dbname)) {
                diskDirectoryList.remove(dbname);
            }

            if (diskWriterList.containsKey(dbname)) {
                diskWriterList.remove(dbname);
            }

            if (IndexInputQueueList.containsKey(dbname)) {
                IndexInputQueueList.remove(dbname);
            }

            //do nothing
            e.printStackTrace();
            log.error("init Exception:" + e.getMessage());
        }
    }
 
Example #19
Source File: PostSearchServiceImpl.java    From mblog with GNU General Public License v3.0 4 votes vote down vote up
@Override
@PostStatusFilter
public Page<PostVO> search(Pageable pageable, String term) throws Exception {
    FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
    QueryBuilder builder = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Post.class).get();

    Query luceneQuery = builder
            .keyword()
            .fuzzy()
            .withEditDistanceUpTo(1)
            .withPrefixLength(1)
            .onFields("title", "summary", "tags")
            .matching(term).createQuery();

    FullTextQuery query = fullTextEntityManager.createFullTextQuery(luceneQuery, Post.class);
    query.setFirstResult((int) pageable.getOffset());
    query.setMaxResults(pageable.getPageSize());

    SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
    QueryScorer scorer = new QueryScorer(luceneQuery);
    Highlighter highlighter = new Highlighter(formatter, scorer);

    List<Post> list = query.getResultList();
    List<PostVO> rets = list.stream().map(po -> {
        PostVO post = BeanMapUtils.copy(po);

        try {
            // 处理高亮
            String title = highlighter.getBestFragment(analyzer, "title", post.getTitle());
            String summary = highlighter.getBestFragment(analyzer, "summary", post.getSummary());

            if (StringUtils.isNotEmpty(title)) {
                post.setTitle(title);
            }
            if (StringUtils.isNotEmpty(summary)) {
                post.setSummary(summary);
            }
        } catch (Exception e) {
            log.error(e.getMessage(), e);
        }
        return post;
    }).collect(Collectors.toList());
    buildUsers(rets);
    return new PageImpl<>(rets, pageable, query.getResultSize());
}