Java Code Examples for org.apache.lucene.analysis.standard.StandardAnalyzer

The following are top voted examples for showing how to use org.apache.lucene.analysis.standard.StandardAnalyzer. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: Mastering-Java-EE-Development-with-WildFly   File: SearchTestCase.java   View source code 6 votes vote down vote up
private List<Post> findPosts() {
	try {
		FullTextSession fullTextSession = getFullTextSession((Session) entityManager.getDelegate());
		Builder builder = new Builder();
		String[] fields = new String[] { "message.text", "topic.subject" };
		MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
		builder.add(parser.parse(POST_TEXT), MUST);
		builder.add(new TermQuery(new Term("topic.forum.id", "0")), MUST);
		builder.add(new TermQuery(new Term("topic.forum.category.id", "0")), MUST);
		builder.add(new WildcardQuery(new Term("poster.userId", "root")), MUST);
		addPostTimeQuery(builder);
		FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(builder.build(), Post.class);
		fullTextQuery.setSort(getSort());
		fullTextQuery.setFirstResult(0);
		fullTextQuery.setMaxResults(15);
		@SuppressWarnings("unchecked")
		List<Post> posts = fullTextQuery.list();
		return posts;
	} catch (ParseException e) {
		logger.severe("error");
		return null;
	}
}
 
Example 2
Project: elasticsearch_my   File: CustomUnifiedHighlighterTests.java   View source code 6 votes vote down vote up
public void testMultiPhrasePrefixQuery() throws Exception {
    Analyzer analyzer = new StandardAnalyzer();
    Directory dir = newDirectory();
    String value = "The quick brown fox.";
    IndexReader ir = indexOneDoc(dir, "text", value, analyzer);
    MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery();
    query.add(new Term("text", "quick"));
    query.add(new Term("text", "brown"));
    query.add(new Term("text", "fo"));
    IndexSearcher searcher = newSearcher(ir);
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertThat(topDocs.totalHits, equalTo(1));
    int docId = topDocs.scoreDocs[0].doc;
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
    CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
        passageFormatter, null, value, false);
    Snippet[] snippets = highlighter.highlightField("text", query, docId, 5);
    assertThat(snippets.length, equalTo(1));
    assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>."));
    ir.close();
    dir.close();
}
 
Example 3
Project: elasticsearch_my   File: CustomUnifiedHighlighterTests.java   View source code 6 votes vote down vote up
public void testAllTermQuery() throws IOException {
    Directory dir = newDirectory();
    String value = "The quick brown fox.";
    Analyzer analyzer = new StandardAnalyzer();
    IndexReader ir = indexOneDoc(dir, "all", value, analyzer);
    AllTermQuery query = new AllTermQuery(new Term("all", "fox"));
    IndexSearcher searcher = newSearcher(ir);
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertThat(topDocs.totalHits, equalTo(1));
    int docId = topDocs.scoreDocs[0].doc;
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
    CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
        passageFormatter, null, value, false);
    Snippet[] snippets = highlighter.highlightField("all", query, docId, 5);
    assertThat(snippets.length, equalTo(1));
    assertThat(snippets[0].getText(), equalTo("The quick brown <b>fox</b>."));
    ir.close();
    dir.close();
}
 
Example 4
Project: elasticsearch_my   File: CustomUnifiedHighlighterTests.java   View source code 6 votes vote down vote up
public void testCommonTermsQuery() throws IOException {
    Directory dir = newDirectory();
    String value = "The quick brown fox.";
    Analyzer analyzer = new StandardAnalyzer();
    IndexReader ir = indexOneDoc(dir, "text", value, analyzer);
    CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128);
    query.add(new Term("text", "quick"));
    query.add(new Term("text", "brown"));
    query.add(new Term("text", "fox"));
    IndexSearcher searcher = newSearcher(ir);
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertThat(topDocs.totalHits, equalTo(1));
    int docId = topDocs.scoreDocs[0].doc;
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
    CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer,
        passageFormatter, null, value, false);
    Snippet[] snippets = highlighter.highlightField("text", query, docId, 5);
    assertThat(snippets.length, equalTo(1));
    assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>."));
    ir.close();
    dir.close();
}
 
Example 5
Project: elasticsearch_my   File: AnalysisModuleTests.java   View source code 6 votes vote down vote up
public void testAnalyzerAliasReferencesAlias() throws IOException {
    Settings settings = Settings.builder()
        .put("index.analysis.analyzer.foobar.alias","default")
        .put("index.analysis.analyzer.foobar.type", "german")
        .put("index.analysis.analyzer.foobar_search.alias","default_search")
        .put("index.analysis.analyzer.foobar_search.type", "default")
        .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
        // analyzer aliases are only allowed in 2.x indices
        .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.V_2_3_5))
        .build();
    AnalysisRegistry newRegistry = getNewRegistry(settings);
    IndexAnalyzers indexAnalyzers = getIndexAnalyzers(newRegistry, settings);

    assertThat(indexAnalyzers.get("default").analyzer(), is(instanceOf(GermanAnalyzer.class)));
    // analyzer types are bound early before we resolve aliases
    assertThat(indexAnalyzers.get("default_search").analyzer(), is(instanceOf(StandardAnalyzer.class)));
    assertWarnings("setting [index.analysis.analyzer.foobar.alias] is only allowed on index [test] because it was created before " +
            "5.x; analyzer aliases can no longer be created on new indices.",
            "setting [index.analysis.analyzer.foobar_search.alias] is only allowed on index [test] because it was created before " +
                    "5.x; analyzer aliases can no longer be created on new indices.");
}
 
Example 6
Project: elasticsearch_my   File: AnalysisModuleTests.java   View source code 6 votes vote down vote up
public void testVersionedAnalyzers() throws Exception {
    String yaml = "/org/elasticsearch/index/analysis/test1.yml";
    Settings settings2 = Settings.builder()
            .loadFromStream(yaml, getClass().getResourceAsStream(yaml))
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_0_0)
            .build();
    AnalysisRegistry newRegistry = getNewRegistry(settings2);
    IndexAnalyzers indexAnalyzers = getIndexAnalyzers(newRegistry, settings2);

    // registry always has the current version
    assertThat(newRegistry.getAnalyzer("default"), is(instanceOf(NamedAnalyzer.class)));
    NamedAnalyzer defaultNamedAnalyzer = (NamedAnalyzer) newRegistry.getAnalyzer("default");
    assertThat(defaultNamedAnalyzer.analyzer(), is(instanceOf(StandardAnalyzer.class)));
    assertEquals(Version.CURRENT.luceneVersion, defaultNamedAnalyzer.analyzer().getVersion());

    // analysis service has the expected version
    assertThat(indexAnalyzers.get("standard").analyzer(), is(instanceOf(StandardAnalyzer.class)));
    assertEquals(Version.V_2_0_0.luceneVersion, indexAnalyzers.get("standard").analyzer().getVersion());
    assertEquals(Version.V_2_0_0.luceneVersion, indexAnalyzers.get("thai").analyzer().getVersion());

    assertThat(indexAnalyzers.get("custom7").analyzer(), is(instanceOf(StandardAnalyzer.class)));
    assertEquals(org.apache.lucene.util.Version.fromBits(3,6,0), indexAnalyzers.get("custom7").analyzer().getVersion());
}
 
Example 7
Project: elasticsearch_my   File: PlainHighlighterTests.java   View source code 6 votes vote down vote up
public void checkGeoQueryHighlighting(Query geoQuery) throws IOException, InvalidTokenOffsetsException {
    Map analysers = new HashMap<String, Analyzer>();
    analysers.put("text", new StandardAnalyzer());
    FieldNameAnalyzer fieldNameAnalyzer = new FieldNameAnalyzer(analysers);
    Query termQuery = new TermQuery(new Term("text", "failure"));
    Query boolQuery = new BooleanQuery.Builder().add(new BooleanClause(geoQuery, BooleanClause.Occur.SHOULD))
        .add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD)).build();
    org.apache.lucene.search.highlight.Highlighter highlighter =
        new org.apache.lucene.search.highlight.Highlighter(new CustomQueryScorer(boolQuery));
    String fragment = highlighter.getBestFragment(fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " +
        "a failure"), "Arbitrary text field which should not cause a failure");
    assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
    Query rewritten = boolQuery.rewrite(null);
    highlighter = new org.apache.lucene.search.highlight.Highlighter(new CustomQueryScorer(rewritten));
    fragment = highlighter.getBestFragment(fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " +
        "a failure"), "Arbitrary text field which should not cause a failure");
    assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
}
 
Example 8
Project: elasticsearch_my   File: SimpleQueryParserTests.java   View source code 6 votes vote down vote up
public void testAnalyzeWildcard() {
    SimpleQueryParser.Settings settings = new SimpleQueryParser.Settings();
    settings.analyzeWildcard(true);
    Map<String, Float> weights = new HashMap<>();
    weights.put("field1", 1.0f);
    SimpleQueryParser parser = new MockSimpleQueryParser(new StandardAnalyzer(), weights, -1, settings);
    for (Operator op : Operator.values()) {
        BooleanClause.Occur defaultOp = op.toBooleanClauseOccur();
        parser.setDefaultOperator(defaultOp);
        Query query = parser.parse("first foo-bar-foobar* last");
        Query expectedQuery =
            new BooleanQuery.Builder()
                .add(new BooleanClause(new TermQuery(new Term("field1", "first")), defaultOp))
                .add(new BooleanQuery.Builder()
                    .add(new BooleanClause(new TermQuery(new Term("field1", "foo")), defaultOp))
                    .add(new BooleanClause(new TermQuery(new Term("field1", "bar")), defaultOp))
                    .add(new BooleanClause(new PrefixQuery(new Term("field1", "foobar")), defaultOp))
                    .build(), defaultOp)
                .add(new BooleanClause(new TermQuery(new Term("field1", "last")), defaultOp))
                .build();
        assertThat(query, equalTo(expectedQuery));
    }
}
 
Example 9
Project: elasticsearch_my   File: ParentFieldMapperTests.java   View source code 6 votes vote down vote up
public void testNoParentNullFieldCreatedIfNoParentSpecified() throws Exception {
    Index index = new Index("_index", "testUUID");
    IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, Settings.EMPTY);
    NamedAnalyzer namedAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
    IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, namedAnalyzer, namedAnalyzer, namedAnalyzer,
        Collections.emptyMap(), Collections.emptyMap());
    SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
    MapperService mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry(), similarityService,
        new IndicesModule(emptyList()).getMapperRegistry(), () -> null);
    XContentBuilder mappingSource = jsonBuilder().startObject().startObject("some_type")
        .startObject("properties")
        .endObject()
        .endObject().endObject();
    mapperService.merge("some_type", new CompressedXContent(mappingSource.string()), MergeReason.MAPPING_UPDATE, false);
    Set<String> allFields = new HashSet<>(mapperService.simpleMatchToIndexNames("*"));
    assertTrue(allFields.contains("_parent"));
    assertFalse(allFields.contains("_parent#null"));
}
 
Example 10
Project: nitrite-database   File: LuceneService.java   View source code 6 votes vote down vote up
public LuceneService() {
    try {
        this.keySerializer = new ObjectMapper();
        keySerializer.setVisibility(
                keySerializer.getSerializationConfig().
                        getDefaultVisibilityChecker().
                        withFieldVisibility(JsonAutoDetect.Visibility.ANY).
                        withGetterVisibility(JsonAutoDetect.Visibility.NONE).
                        withIsGetterVisibility(JsonAutoDetect.Visibility.NONE)
        );

        indexDirectory = new RAMDirectory();
        analyzer = new StandardAnalyzer(Version.LUCENE_4_9);

        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        indexWriter = new IndexWriter(indexDirectory, iwc);
        commit();
    } catch (IOException e) {
        throw new IndexingException(errorMessage(
                "could not create full-text index", 0), e);
    } catch (VirtualMachineError vme) {
        handleVirtualMachineError(vme);
    }
}
 
Example 11
Project: Java-Data-Science-Cookbook   File: SearchFiles.java   View source code 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_DIRECTORY)));
	IndexSearcher indexSearcher = new IndexSearcher(reader);

	Analyzer analyzer = new StandardAnalyzer();
	QueryParser queryParser = new QueryParser(FIELD_CONTENTS, analyzer);
	String searchString = "shakespeare";
	Query query = queryParser.parse(searchString);

	TopDocs results = indexSearcher.search(query, 5);
	ScoreDoc[] hits = results.scoreDocs;

	int numTotalHits = results.totalHits;
	System.out.println(numTotalHits + " total matching documents");

	for(int i=0;i<hits.length;++i) {
		int docId = hits[i].doc;
		Document d = indexSearcher.doc(docId);
		System.out.println((i + 1) + ". " + d.get("path") + " score=" + hits[i].score);
	}
}
 
Example 12
Project: freemoz   File: Searcher.java   View source code 6 votes vote down vote up
public SearchResult search(String index, String queryString, int page) {
    SearchResult searchResult = null;

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.INDEX_LOCATION, Values.DEFAULT_INDEX_LOCATION))));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer();

        // Search over the titles only for the moment
        QueryParser parser = new QueryParser(index, analyzer);
        Query query = parser.parse(queryString);

        searchResult = this.doPagingSearch(reader, searcher, query, queryString, page);
        reader.close();
    }
    catch(Exception ex) {}

    return searchResult;
}
 
Example 13
Project: JLink   File: LuceneRetrieval.java   View source code 6 votes vote down vote up
private void synTokenQuery(String search, final int numbOfResults, final double minLuceneScore,
		Map<String, Float> result, IndexSearcher searcher) throws ParseException, IOException {

	QueryParser parser = new QueryParser(Version.LUCENE_46, "surfaceFormTokens",
			new StandardAnalyzer(Version.LUCENE_46));

	search = QueryParser.escape(search);

	Query q = parser.parse(search);
	/*
	 * Works only in String field!!
	 */
	// Query q = new FuzzyQuery(new Term("surfaceFormTokens",
	// QueryParser.escape(search)), 2);

	TopDocs top = searcher.search(q, numbOfResults);

	for (ScoreDoc doc : top.scoreDocs) {
		if (doc.score >= minLuceneScore) {
			final String key = searcher.doc(doc.doc).get("conceptID");
			if (result.getOrDefault(key, 0f) < doc.score) {
				result.put(key, doc.score);
			}
		}
	}
}
 
Example 14
Project: NGB-master   File: FeatureIndexDao.java   View source code 6 votes vote down vote up
/**
 * Deletes features from specified feature files from project's index
 *
 * @param projectId a project to delete index entries
 * @param fileIds a list of Pair of feature types to file Ids, which entries to delete. To delete gene file
 *                entries, pass FeatureType.GENE
 */
public void deleteFromIndexByFileId(final long projectId, List<Pair<FeatureType, Long>> fileIds) {
    if (fileIds == null || fileIds.isEmpty() || !fileManager.indexForProjectExists(projectId)) {
        return;
    }

    try (
        StandardAnalyzer analyzer = new StandardAnalyzer();
        Directory index = fileManager.getIndexForProject(projectId);
        IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(
                                                                    IndexWriterConfig.OpenMode.CREATE_OR_APPEND))
    ) {
        if (fileManager.indexForProjectExists(projectId)) {
            for (Pair<FeatureType, Long> id : fileIds) {
                deleteDocumentByTypeAndId(id.getKey(), id.getValue(), writer);
            }
        }
    } catch (IOException e) {
        LOGGER.error("Exception while deleting from index:", e);
    }
}
 
Example 15
Project: TextHIN   File: FbEntitySearcher.java   View source code 6 votes vote down vote up
public FbEntitySearcher(String indexDir, int numOfDocs, String searchingStrategy) throws IOException {

    LogInfo.begin_track("Constructing Searcher");
    if (!searchingStrategy.equals("exact") && !searchingStrategy.equals("inexact"))
      throw new RuntimeException("Bad searching strategy: " + searchingStrategy);
    this.searchStrategy = searchingStrategy;

    queryParser = new QueryParser(
        Version.LUCENE_44,
        FbIndexField.TEXT.fieldName(),
        searchingStrategy.equals("exact") ? new KeywordAnalyzer() : new StandardAnalyzer(Version.LUCENE_44));
    LogInfo.log("Opening index dir: " + indexDir);
    IndexReader indexReader = DirectoryReader.open(SimpleFSDirectory.open(new File(indexDir)));
    indexSearcher = new IndexSearcher(indexReader);
    LogInfo.log("Opened index with " + indexReader.numDocs() + " documents.");

    this.numOfDocs = numOfDocs;
    LogInfo.end_track();
  }
 
Example 16
Project: Elasticsearch   File: StandardAnalyzerProvider.java   View source code 6 votes vote down vote up
public StandardAnalyzerProvider(Index index, Settings indexSettings, Environment env, String name, Settings settings) {
    super(index, indexSettings, name, settings);
    this.esVersion = Version.indexCreated(indexSettings);
    final CharArraySet defaultStopwords;
    if (esVersion.onOrAfter(Version.V_1_0_0_Beta1)) {
        defaultStopwords = CharArraySet.EMPTY_SET;
    } else {
        defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }

    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
    int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    standardAnalyzer = new StandardAnalyzer(stopWords);
    standardAnalyzer.setVersion(version);
    standardAnalyzer.setMaxTokenLength(maxTokenLength);
}
 
Example 17
Project: parabuild-ci   File: IndexFiles.java   View source code 6 votes vote down vote up
public static void main(String[] args) throws IOException {
  String usage = "java " + IndexFiles.class + " <root_directory>";
  if (args.length == 0) {
    System.err.println("Usage: " + usage);
    System.exit(1);
  }

  Date start = new Date();
  try {
    IndexWriter writer = new IndexWriter("index", new StandardAnalyzer(), true);
    indexDocs(writer, new File(args[0]));

    writer.optimize();
    writer.close();

    Date end = new Date();

    System.out.print(end.getTime() - start.getTime());
    System.out.println(" total milliseconds");

  } catch (IOException e) {
    System.out.println(" caught a " + e.getClass() +
     "\n with message: " + e.getMessage());
  }
}
 
Example 18
Project: dacapobench   File: Index.java   View source code 6 votes vote down vote up
/**
 * Index all text files under a directory.
 */
public void main(final File INDEX_DIR, final String[] args) throws IOException {
  IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
  for (int arg = 0; arg < args.length; arg++) {
    final File docDir = new File(args[arg]);
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
      throw new IOException("Cannot read from document directory");
    }

    indexDocs(writer, docDir);
    System.out.println("Optimizing...");
    writer.optimize();
  }
  writer.close();
}
 
Example 19
Project: as-full-text-search-server   File: LuceneBasicFlowExample.java   View source code 6 votes vote down vote up
/**
 * Search sample. 
 * 
 * @param directory the index directory.
 * @throws IOException in case of I/O failure.
 * @throws ParseException in case of Query parse exception.
 */	
public static void search(Directory directory) throws IOException, ParseException {
	IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));
	
	Query query = new QueryParser("title", new StandardAnalyzer()).parse("title:Solr");
	TopDocs matches = searcher.search(query, 10);
	
	System.out.println("Search returned " + matches.totalHits + " matches.");
	Arrays.stream(matches.scoreDocs)
		.map(scoreDoc -> luceneDoc(scoreDoc, searcher))
		.forEach(doc -> {
			System.out.println("-------------------------------------");				
			System.out.println("ID:\t" + doc.get("id"));
			System.out.println("TITLE:\t" + doc.get("title"));
			System.out.println("AUTHOR:\t" + doc.get("author"));
			System.out.println("SCORE:\t" + doc.get("score"));
			
		});
}
 
Example 20
Project: as-full-text-search-server   File: LuceneBasicFlowExampleTestCase.java   View source code 6 votes vote down vote up
/**
 * Executes a query for all documents in the index. 
 * 
 * @throws Exception never, otherwise the test fails.
 */
@Test
public void findAll() throws Exception {
	IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));
	
	Query query = new QueryParser("title", new StandardAnalyzer()).parse("Solr");
	TopDocs matches = searcher.search(query, 10);
	
	assertEquals(3, matches.totalHits);
	
	Set<String> expected = new HashSet<String>(); 
	expected.add("1");
	expected.add("2");
	expected.add("3");
	
	Set<String> result = Arrays.stream(matches.scoreDocs)
		.map(scoreDoc -> luceneDoc(scoreDoc.doc, searcher))
		.map(doc -> doc.get("id"))
		.collect(Collectors.toCollection(HashSet::new));
	
	assertEquals(expected, result);
}
 
Example 21
Project: as-full-text-search-server   File: LuceneBasicFlowExampleTestCase.java   View source code 6 votes vote down vote up
/**
 * Search all books of a given author. 
 * 
 * @throws Exception never, otherwise the test fails.
 */
@Test
public void findByAuthorSurname() throws Exception {
	IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));
	
	Query query = new QueryParser("author", new StandardAnalyzer()).parse("Gazzarini");
	TopDocs matches = searcher.search(query, 10);
	
	assertEquals(1, matches.totalHits);
			
	final String id = Arrays.stream(matches.scoreDocs)
		.map(scoreDoc -> luceneDoc(scoreDoc.doc, searcher))
		.map(doc -> doc.get("id"))
		.findFirst()
		.get();
	
	assertEquals("1", id);
}
 
Example 22
Project: elasticsearch-full   File: CreateIndexDemo.java   View source code 6 votes vote down vote up
@Test
public void test() throws Exception {
    Path path = FileSystems.getDefault().getPath("", "index");
    Directory directory = FSDirectory.open(path);
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
    Document document = new Document();
    document.add(new LegacyLongField("id", 5499, Field.Store.YES));
    document.add(new Field("title", "小米6", TYPE_STORED));
    document.add(new Field("sellPoint", "骁龙835,6G内存,双摄!", TYPE_STORED));
    document.
    indexWriter.addDocument(document);
    indexWriter.commit();
    indexWriter.close();
}
 
Example 23
Project: NGB-master   File: FeatureIndexDao.java   View source code 6 votes vote down vote up
/**
 * Stores features from a specified feature file to the specified project's Lucene index
 * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins)
 *
 * @param featureFileId a FeatureFile, for which features to save
 * @param projectId a project, for which to write an index
 * @param entries a list of FeatureIndexEntry to write to index
 * @throws IOException
 */
public void writeLuceneIndexForProject(final Long featureFileId, final long projectId,
                                       final List<? extends FeatureIndexEntry> entries) throws IOException {
    try (
        StandardAnalyzer analyzer = new StandardAnalyzer();
        Directory index = fileManager.createIndexForProject(projectId);
        IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(
                                                                IndexWriterConfig.OpenMode.CREATE_OR_APPEND))
    ) {
        FacetsConfig facetsConfig = new FacetsConfig();
        facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(),
                FeatureIndexFields.FACET_CHR_ID.getFieldName());

        for (FeatureIndexEntry entry : entries) {
            Document document = new Document();
            addCommonDocumentFields(document, entry, featureFileId);

            if (entry instanceof VcfIndexEntry) {
                addVcfDocumentFields(document, entry);
            }

            writer.addDocument(facetsConfig.build(document));
        }
    }
}
 
Example 24
Project: marathonv5   File: IndexSearcher.java   View source code 5 votes vote down vote up
public IndexSearcher() {
    try {
        searcher = new org.apache.lucene.search.IndexSearcher(new ClasspathDirectory());
    } catch (IOException e) {
        e.printStackTrace();
    }
    analyzer = new StandardAnalyzer(Version.LUCENE_31);
    parser = new MultiFieldQueryParser(Version.LUCENE_31, new String[]{"name","description"}, analyzer);
}
 
Example 25
Project: marathonv5   File: IndexSearcher.java   View source code 5 votes vote down vote up
public IndexSearcher() {
    try {
        searcher = new org.apache.lucene.search.IndexSearcher(new ClasspathDirectory());
    } catch (IOException e) {
        e.printStackTrace();
    }
    analyzer = new StandardAnalyzer(Version.LUCENE_31);
    parser = new MultiFieldQueryParser(Version.LUCENE_31, new String[]{"name","description"}, analyzer);
}
 
Example 26
Project: newblog   File: SuggesterTest.java   View source code 5 votes vote down vote up
public static void main(String[] args) {
        try {
            RAMDirectory indexDir = new RAMDirectory();
            StandardAnalyzer analyzer = new StandardAnalyzer();
            AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(indexDir, analyzer);

            //创建Product测试数据
            ArrayList<Product> products = new ArrayList<Product>();
            products.add(new Product("Electric Guitar",
                    "http://images.example/electric-guitar.jpg", new String[]{
                    "US", "CA"}, 100));
            products.add(new Product("Electric Train",
                    "http://images.example/train.jpg", new String[]{"US",
                    "CA"}, 100));
            products.add(new Product("Acoustic Guitar",
                    "http://images.example/acoustic-guitar.jpg", new String[]{
                    "US", "ZA"}, 80));
            products.add(new Product("Guarana Soda",
                    "http://images.example/soda.jpg",
                    new String[]{"ZA", "IE"}, 130));

            // 创建测试索引
            suggester.build(new ProductIterator(products.iterator()));

            // 开始搜索
            lookup(suggester, "Gu", "US");
//            lookup(suggester, "Gu", "ZA");
//            lookup(suggester, "Gui", "CA");
//            lookup(suggester, "Electric guit", "US");
        } catch (IOException e) {
            System.err.println("Error!");
        }
    }
 
Example 27
Project: elasticsearch_my   File: StandardAnalyzerProvider.java   View source code 5 votes vote down vote up
public StandardAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
    CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
    int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    standardAnalyzer = new StandardAnalyzer(stopWords);
    standardAnalyzer.setVersion(version);
    standardAnalyzer.setMaxTokenLength(maxTokenLength);
}
 
Example 28
Project: elasticsearch_my   File: ChineseAnalyzerProvider.java   View source code 5 votes vote down vote up
public ChineseAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, name, settings);
    // old index: best effort
    analyzer = new StandardAnalyzer();
    analyzer.setVersion(version);
    
}
 
Example 29
Project: elasticsearch_my   File: SamplerAggregatorTests.java   View source code 5 votes vote down vote up
/**
 * Uses the sampler aggregation to find the minimum value of a field out of the top 3 scoring documents in a search.
 */
public void testSampler() throws IOException {
    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setIndexAnalyzer(new NamedAnalyzer("foo", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
    MappedFieldType numericFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    numericFieldType.setName("int");

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment with predictable docIds
    try (Directory dir = newDirectory();
            IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        for (long value : new long[] {7, 3, -10, -6, 5, 50}) {
            Document doc = new Document();
            StringBuilder text = new StringBuilder();
            for (int i = 0; i < value; i++) {
                text.append("good ");
            }
            doc.add(new Field("text", text.toString(), textFieldType));
            doc.add(new SortedNumericDocValuesField("int", value));
            w.addDocument(doc);
        }

        SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
                .shardSize(3)
                .subAggregation(new MinAggregationBuilder("min")
                        .field("int"));
        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);
            Sampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "good")), aggBuilder, textFieldType,
                    numericFieldType);
            Min min = sampler.getAggregations().get("min");
            assertEquals(5.0, min.getValue(), 0);
        }
    }
}
 
Example 30
Project: elasticsearch_my   File: FunctionScoreTests.java   View source code 5 votes vote down vote up
@Before
public void initSearcher() throws IOException {
    dir = newDirectory();
    w = new IndexWriter(dir, newIndexWriterConfig(new StandardAnalyzer()));
    Document d = new Document();
    d.add(new TextField(FIELD, TEXT, Field.Store.YES));
    d.add(new TextField("_uid", "1", Field.Store.YES));
    w.addDocument(d);
    w.commit();
    reader = DirectoryReader.open(w);
    searcher = newSearcher(reader);
}
 
Example 31
Project: elasticsearch_my   File: AbstractFieldDataTestCase.java   View source code 5 votes vote down vote up
@Before
public void setup() throws Exception {
    Version version = VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.V_2_3_0); // we need 2.x so that fielddata is allowed on string fields
    Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
    indexService = createIndex("test", settings);
    mapperService = indexService.mapperService();
    indicesFieldDataCache = getInstanceFromNode(IndicesService.class).getIndicesFieldDataCache();
    ifdService = indexService.fieldData();
    // LogByteSizeMP to preserve doc ID order
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()).setMergePolicy(new LogByteSizeMergePolicy()));
}
 
Example 32
Project: elasticsearch_my   File: InternalEngineTests.java   View source code 5 votes vote down vote up
public TranslogHandler(NamedXContentRegistry xContentRegistry, String indexName, Logger logger) {
    super(new ShardId("test", "_na_", 0), null, logger);
    Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
    Index index = new Index(indexName, "_na_");
    IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(index, settings);
    NamedAnalyzer defaultAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
    IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, Collections.emptyMap(), Collections.emptyMap());
    SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
    MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();
    mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,
            () -> null);
}
 
Example 33
Project: elasticsearch_my   File: AnalysisRegistryTests.java   View source code 5 votes vote down vote up
public void testDefaultAnalyzers() throws IOException {
    Version version = VersionUtils.randomVersion(random());
    Settings settings = Settings
        .builder()
        .put(IndexMetaData.SETTING_VERSION_CREATED, version)
        .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
        .build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
    IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings),
            emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap())
        .build(idxSettings);
    assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
    assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
    assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
}
 
Example 34
Project: elasticsearch_my   File: AnalysisRegistryTests.java   View source code 5 votes vote down vote up
public void testBackCompatOverrideDefaultIndexAnalyzer() {
    Version version = VersionUtils.randomVersionBetween(random(), VersionUtils.getFirstVersion(),
            VersionUtils.getPreviousVersion(Version.V_5_0_0_alpha1));
    Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
    IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
            singletonMap("default_index", analyzerProvider("default_index")), emptyMap(), emptyMap(), emptyMap(), emptyMap());
    assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
    assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
    assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
    assertWarnings("setting [index.analysis.analyzer.default_index] is deprecated, use [index.analysis.analyzer.default] " +
            "instead for index [index]");
}
 
Example 35
Project: elasticsearch_my   File: AnalysisRegistryTests.java   View source code 5 votes vote down vote up
public void testOverrideDefaultSearchAnalyzer() {
    Version version = VersionUtils.randomVersion(random());
    Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
    IndexAnalyzers indexAnalyzers = registry.build(IndexSettingsModule.newIndexSettings("index", settings),
            singletonMap("default_search", analyzerProvider("default_search")), emptyMap(), emptyMap(), emptyMap(), emptyMap());
    assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class));
    assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
    assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class));
}
 
Example 36
Project: elasticsearch_my   File: TermVectorsUnitTests.java   View source code 5 votes vote down vote up
private void writeEmptyTermVector(TermVectorsResponse outResponse) throws IOException {

        Directory dir = newDirectory();
        IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer());
        conf.setOpenMode(OpenMode.CREATE);
        IndexWriter writer = new IndexWriter(dir, conf);
        FieldType type = new FieldType(TextField.TYPE_STORED);
        type.setStoreTermVectorOffsets(true);
        type.setStoreTermVectorPayloads(false);
        type.setStoreTermVectorPositions(true);
        type.setStoreTermVectors(true);
        type.freeze();
        Document d = new Document();
        d.add(new Field("id", "abc", StringField.TYPE_STORED));

        writer.updateDocument(new Term("id", "abc"), d);
        writer.commit();
        writer.close();
        DirectoryReader dr = DirectoryReader.open(dir);
        IndexSearcher s = new IndexSearcher(dr);
        TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
        ScoreDoc[] scoreDocs = search.scoreDocs;
        int doc = scoreDocs[0].doc;
        Fields fields = dr.getTermVectors(doc);
        EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
        outResponse.setFields(fields, null, flags, fields);
        outResponse.setExists(true);
        dr.close();
        dir.close();

    }
 
Example 37
Project: elasticsearch_my   File: TermVectorsUnitTests.java   View source code 5 votes vote down vote up
private void writeStandardTermVector(TermVectorsResponse outResponse) throws IOException {

        Directory dir = newDirectory();
        IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer());

        conf.setOpenMode(OpenMode.CREATE);
        IndexWriter writer = new IndexWriter(dir, conf);
        FieldType type = new FieldType(TextField.TYPE_STORED);
        type.setStoreTermVectorOffsets(true);
        type.setStoreTermVectorPayloads(false);
        type.setStoreTermVectorPositions(true);
        type.setStoreTermVectors(true);
        type.freeze();
        Document d = new Document();
        d.add(new Field("id", "abc", StringField.TYPE_STORED));
        d.add(new Field("title", "the1 quick brown fox jumps over  the1 lazy dog", type));
        d.add(new Field("desc", "the1 quick brown fox jumps over  the1 lazy dog", type));

        writer.updateDocument(new Term("id", "abc"), d);
        writer.commit();
        writer.close();
        DirectoryReader dr = DirectoryReader.open(dir);
        IndexSearcher s = new IndexSearcher(dr);
        TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
        ScoreDoc[] scoreDocs = search.scoreDocs;
        int doc = scoreDocs[0].doc;
        Fields termVectors = dr.getTermVectors(doc);
        EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
        outResponse.setFields(termVectors, null, flags, termVectors);
        dr.close();
        dir.close();

    }
 
Example 38
Project: clearwsd   File: LuceneWrapper.java   View source code 5 votes vote down vote up
private void initialize(File indexDir) {
    try {
        Stopwatch stopwatch = Stopwatch.createStarted();
        indexSearcher = new IndexSearcher(IndexReader.open(FSDirectory.open(indexDir)));
        analyzer = new StandardAnalyzer(Version.LUCENE_36);
        log.info("Initialized lucene index at {} ({})", indexDir.getPath(), stopwatch.stop());
    } catch (IOException e) {
        throw new RuntimeException("Unable to locate Lucene index.", e);
    }
}
 
Example 39
Project: nitrite-database   File: LuceneService.java   View source code 5 votes vote down vote up
@Override
public void drop() {
    try {
        indexDirectory = new RAMDirectory();
        analyzer = new StandardAnalyzer(Version.LUCENE_4_9);

        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        indexWriter = new IndexWriter(indexDirectory, iwc);
        commit();
    } catch (IOException e) {
        throw new IndexingException(errorMessage(
                "could not drop full-text index", 0), e);
    }
}
 
Example 40
Project: fiery   File: SearchPage.java   View source code 5 votes vote down vote up
@RequestMapping(value = "/search", method = RequestMethod.GET)
public String searchPage(
        Model model,
        @RequestParam(value = "keyword", required = false) String keyword) {

    String[] fieldList = {"uid", "rpcid", "traceid", "rt_type", "url", "param", "ip", "httpcod", "project"};

    Map<String, Float> boosts = new HashMap<>();
    boosts.put("uid", 1.0f);
    boosts.put("ip", 1.0f);
    boosts.put("rpcid", 1.0f);
    boosts.put("traceid", 1.0f);
    boosts.put("rt_type", 1.0f);
    boosts.put("url", 1.0f);
    boosts.put("urlraw", 1.0f);
    boosts.put("param", 1.0f);
    boosts.put("httpcode", 1.0f);
    boosts.put("project", 1.0f);

    MultiFieldQueryParser mulFieldQueryParser = new MultiFieldQueryParser(fieldList, new StandardAnalyzer(), boosts);
    Query query;
    try {
        query = mulFieldQueryParser.parse(keyword);
    } catch (Exception e) {
        model.addAttribute("msg", "query parser error");
        return "search";
    }

    Sort sort = new Sort(new SortField("time", SortField.Type.DOUBLE, true));

    ResponseJson result = indexHelper.searchByQuery(DateTimeHelper.getCurrentTime(), query, 0, 1000, sort);
    model.addAttribute("resultlist", result.getResult());
    model.addAttribute("keyword", keyword);
    return "search";
}