Java Code Examples for org.apache.lucene.analysis.standard.StandardAnalyzer

The following examples show how to use org.apache.lucene.analysis.standard.StandardAnalyzer. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: yuzhouwan   Source File: LuceneExample.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    // index
    try (Directory index = new NIOFSDirectory(Paths.get("/tmp/index"))) {
        // add
        try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(new StandardAnalyzer()))) {
            Document doc = new Document();
            doc.add(new TextField("blog", "yuzhouwan.com", Field.Store.YES));
            doc.add(new StringField("github", "asdf2014", Field.Store.YES));
            writer.addDocument(doc);
            writer.commit();
        }
        // search
        try (DirectoryReader reader = DirectoryReader.open(index)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            QueryParser parser = new QueryParser("blog", new StandardAnalyzer());
            Query query = parser.parse("yuzhouwan.com");
            ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
            for (ScoreDoc hit : hits) {
                Document hitDoc = searcher.doc(hit.doc);
                System.out.println(hitDoc.get("blog"));
            }
        }
    }
}
 
Example 2
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldAddTermsFromSortOrderAsc() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("sort");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get("order")).thenReturn("asc");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.sort(anyString(), any(SortingParams.class))).thenReturn(Arrays.asList("123", "321"));
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  final ArgumentCaptor<SortingParams> argument = ArgumentCaptor.forClass(SortingParams.class);
  verify(jedisMock).sort(eq("simpleKey"), argument.capture());
  Assert.assertEquals(getSortingParamString(new SortingParams().asc()), getSortingParamString(argument.getValue()));
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(2, terms.size());
}
 
Example 3
Source Project: JPPF   Source File: CrawlerTask.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Search for the user-specified query expression in the current page.
 * @throws Exception if an error occurs.
 */
private void search() throws Exception {
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
  final Query q = parser.parse(query);

  final MemoryIndex index = new MemoryIndex();
  final Link link = new Link(url);
  final PageData pageData = new SimpleHttpClientParser().load(link);
  index.addField("contents", pageData.getData().toString(), new StandardAnalyzer());
  final IndexSearcher searcher = index.createSearcher();
  final Hits hits = searcher.search(q);
  @SuppressWarnings("rawtypes")
  final Iterator it = hits.iterator();
  float relevance = 0f;
  if (it.hasNext()) {
    while (it.hasNext()) {
      final Hit hit = (Hit) it.next();
      relevance += ((float) Math.round(hit.getScore() * 1000)) / 10;
    }
    matchedLinks.add(new LinkMatch(url, relevance));
  }
}
 
Example 4
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldReturnEmptyQueryOnEmptyListOfSunion() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("sunion");
  when(localParamsMock.get("key")).thenReturn("key1");
  when(localParamsMock.get("key1")).thenReturn("key2");
  when(localParamsMock.getParameterNamesIterator()).thenReturn(Arrays.asList("command", "key", "key1").iterator());
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.sunion(anyString(), anyString())).thenReturn(new HashSet<String>());
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).sunion("key1", "key2");
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(0, terms.size());
}
 
Example 5
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldDeflateGzipAndParseJsonTermsFromRedisOnGetCommand() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("get");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get("compression")).thenReturn("gzip");
  when(localParamsMock.get("serialization")).thenReturn("json");

  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.get(any(byte[].class))).thenReturn(Compressor.compressGzip("[100,200,300]".getBytes()));
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).get("simpleKey".getBytes());
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(3, terms.size());
}
 
Example 6
Source Project: Clusion   Source File: IEX2LevAMAZON.java    License: GNU General Public License v3.0 6 votes vote down vote up
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
	String line = value.toString();

	CharArraySet noise = EnglishAnalyzer.getDefaultStopSet();
	// We are using a standard tokenizer that eliminates the stop words.
	// We can use Stemming tokenizer such Porter
	// A set of English noise keywords is used that will eliminates
	// words such as "the, a, etc"
	Analyzer analyzer = new StandardAnalyzer(noise);
	List<String> token = Tokenizer.tokenizeString(analyzer, line);
	Iterator<String> it = token.iterator();
	while (it.hasNext()) {
		word.set(it.next());
		fileName.set(key);
		if (!mapTable.containsKey(fileName.toString() + word.toString())) {
			context.write(fileName, word);
			mapTable.put(fileName.toString() + word.toString(), new IntWritable(1));
		}
	}
}
 
Example 7
@Override
String[] getExtraProperties() {
	String analyzer = ConfigurableAnalyzerFactory.Options.ANALYZER;
	return new String[]{
	FullTextIndex.Options.ANALYZER_FACTORY_CLASS, ConfigurableAnalyzerFactory.class.getName(),
	analyzer+"_."+AnalyzerOptions.LIKE, "x-empty",
	analyzer+"x-empty."+AnalyzerOptions.ANALYZER_CLASS, EmptyAnalyzer.class.getName(),
	analyzer+"x-terms."+AnalyzerOptions.PATTERN, "\\W+",
	analyzer+"x-splits."+AnalyzerOptions.ANALYZER_CLASS, TermCompletionAnalyzer.class.getName(),
	analyzer+"x-splits."+AnalyzerOptions.STOPWORDS, AnalyzerOptions.STOPWORDS_VALUE_NONE,
	analyzer+"x-splits."+AnalyzerOptions.WORD_BOUNDARY, " ",
	analyzer+"x-splits."+AnalyzerOptions.SUB_WORD_BOUNDARY, "(?<!\\p{L}|\\p{N})(?=\\p{L}|\\p{N})|(?<!\\p{Lu})(?=\\p{Lu})|(?<=\\p{N})(?=\\p{L})",
	analyzer+"x-hyphen."+AnalyzerOptions.SUB_WORD_BOUNDARY, "[-.]",
	analyzer+"x-hyphen."+AnalyzerOptions.SOFT_HYPHENS, "-",
	analyzer+"x-hyphen."+AnalyzerOptions.WORD_BOUNDARY, " ",
	analyzer+"x-hyphen."+AnalyzerOptions.ALWAYS_REMOVE_SOFT_HYPHENS, "false",
	analyzer+"x-hyphen2."+AnalyzerOptions.SUB_WORD_BOUNDARY, "[-.]",
	analyzer+"x-hyphen2."+AnalyzerOptions.SOFT_HYPHENS, "-",
	analyzer+"x-hyphen2."+AnalyzerOptions.WORD_BOUNDARY, " ",
	analyzer+"x-hyphen2."+AnalyzerOptions.ALWAYS_REMOVE_SOFT_HYPHENS, "true",
	analyzer+"x-keywords."+AnalyzerOptions.ANALYZER_CLASS, KeywordAnalyzer.class.getName(),
	analyzer+"en-x-de."+AnalyzerOptions.ANALYZER_CLASS, StandardAnalyzer.class.getName(),
	analyzer+"en-x-de."+AnalyzerOptions.STOPWORDS, GermanAnalyzer.class.getName(),
	};
}
 
Example 8
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldAddTermsFromRedisOnLrangeCommandCustomMinAndMax() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("lrange");
  when(localParamsMock.get("min")).thenReturn("2");
  when(localParamsMock.get("max")).thenReturn("3");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.lrange(anyString(), anyLong(), anyLong())).thenReturn(Arrays.asList("123", "321"));
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).lrange("simpleKey", 2, 3);
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(2, terms.size());
}
 
Example 9
Source Project: swellrt   Source File: LucenePerUserWaveViewHandlerImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Inject
public LucenePerUserWaveViewHandlerImpl(IndexDirectory directory,
                                        ReadableWaveletDataProvider waveletProvider,
                                        @Named(CoreSettingsNames.WAVE_SERVER_DOMAIN) String domain,
                                        @IndexExecutor Executor executor) {
  this.waveletProvider = waveletProvider;
  this.executor = executor;
  analyzer = new StandardAnalyzer(LUCENE_VERSION);
  try {
    IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);
    indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    indexWriter = new IndexWriter(directory.getDirectory(), indexConfig);
    nrtManager = new NRTManager(indexWriter, new WaveSearchWarmer(domain));
  } catch (IOException ex) {
    throw new IndexException(ex);
  }

  nrtManagerReopenThread = new NRTManagerReopenThread(nrtManager, MAX_STALE_SEC, MIN_STALE_SEC);
  nrtManagerReopenThread.start();
}
 
Example 10
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldReturnEmptyQueryOnEmptyListOfSinter() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("sinter");
  when(localParamsMock.get("key")).thenReturn("key1");
  when(localParamsMock.get("key1")).thenReturn("key2");
  when(localParamsMock.getParameterNamesIterator()).thenReturn(Arrays.asList("command", "key", "key1").iterator());
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.sinter(anyString(), anyString())).thenReturn(new HashSet<String>());
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).sinter("key1", "key2");
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(0, terms.size());
}
 
Example 11
Source Project: Lottery   Source File: LuceneContentSvcImpl.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Transactional(readOnly = true)
public Integer createIndex(Integer siteId, Integer channelId,
		Date startDate, Date endDate, Integer startId, Integer max,
		Directory dir) throws IOException, ParseException {
	boolean exist = IndexReader.indexExists(dir);
	IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
			Version.LUCENE_30), !exist, IndexWriter.MaxFieldLength.LIMITED);
	try {
		if (exist) {
			LuceneContent.delete(siteId, channelId, startDate, endDate,
					writer);
		}
		Integer lastId = luceneContentDao.index(writer, siteId, channelId,
				startDate, endDate, startId, max);
		writer.optimize();
		return lastId;
	} finally {
		writer.close();
	}
}
 
Example 12
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldAddTermsFromRedisOnZrangeCommandWithCustomRange() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("zrange");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get("range_start")).thenReturn("1");
  when(localParamsMock.get("range_end")).thenReturn("100");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.zrangeWithScores(anyString(), anyLong(), anyLong()))
      .thenReturn(new HashSet<>(Arrays.asList(new Tuple("123", (double) 1.0f), new Tuple("321", (double) 1.0f))));
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).zrangeWithScores("simpleKey", 1, 100);
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(2, terms.size());
}
 
Example 13
Source Project: bioasq   Source File: LuceneInMemoryPassageScorer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public boolean initialize(ResourceSpecifier aSpecifier, Map<String, Object> aAdditionalParams)
        throws ResourceInitializationException {
  super.initialize(aSpecifier, aAdditionalParams);
  hits = Integer.class.cast(getParameterValue("hits"));
  // query constructor
  String stoplistPath = String.class.cast(getParameterValue("stoplist-path"));
  try {
    stoplist = Resources.readLines(getClass().getResource(stoplistPath), UTF_8).stream()
            .map(String::trim).collect(toSet());
  } catch (IOException e) {
    throw new ResourceInitializationException(e);
  }
  analyzer = new StandardAnalyzer();
  parser = new QueryParser("text", analyzer);
  return true;
}
 
Example 14
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldAddTermsFromRedisOnLrangeCommandEmptyMinAndMaxFallsBackToDefault() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("lrange");
  when(localParamsMock.get("min")).thenReturn("");
  when(localParamsMock.get("max")).thenReturn("");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.lrange(anyString(), anyLong(), anyLong())).thenReturn(Arrays.asList("123", "321"));
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).lrange("simpleKey", 0, -1);
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(2, terms.size());
}
 
Example 15
Source Project: bioasq   Source File: LogRegDocumentReranker.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
  super.initialize(context);
  hits = UimaContextHelper.getConfigParameterIntValue(context, "hits", 100);
  analyzer = UimaContextHelper.createObjectFromConfigParameter(context, "query-analyzer",
          "query-analyzer-params", StandardAnalyzer.class, Analyzer.class);
  queryStringConstructor = UimaContextHelper.createObjectFromConfigParameter(context,
          "query-string-constructor", "query-string-constructor-params",
          LuceneQueryStringConstructor.class, QueryStringConstructor.class);
  parser = new QueryParser("text", analyzer);
  // load parameters
  String param = UimaContextHelper.getConfigParameterStringValue(context, "doc-logreg-params");
  try {
    docFeatWeights = Resources.readLines(getClass().getResource(param), UTF_8).stream().limit(1)
            .map(line -> line.split("\t")).flatMap(Arrays::stream)
            .mapToDouble(Double::parseDouble).toArray();
  } catch (IOException e) {
    throw new ResourceInitializationException(e);
  }
}
 
Example 16
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldReturnEmptyQueryOnEmptyListOfHget() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("hget");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get("field")).thenReturn("f1");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.hget(anyString(), anyString())).thenReturn(null);
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).hget("simpleKey", "f1");
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(0, terms.size());
}
 
Example 17
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldAddTermsFromSort() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("sort");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.sort(anyString(), any(SortingParams.class))).thenReturn(Arrays.asList("123", "321"));
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  final ArgumentCaptor<SortingParams> argument = ArgumentCaptor.forClass(SortingParams.class);
  verify(jedisMock).sort(eq("simpleKey"), argument.capture());
  Assert.assertEquals(getSortingParamString(new SortingParams()), getSortingParamString(argument.getValue()));
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(2, terms.size());
}
 
Example 18
Source Project: lucene-solr   Source File: SearchPanelProvider.java    License: Apache License 2.0 6 votes vote down vote up
private void doMLTSearch() {
  if (Objects.isNull(mltDocFTF.getValue())) {
    throw new LukeException("Doc num is not set.");
  }
  int docNum = (int) mltDocFTF.getValue();
  MLTConfig mltConfig = operatorRegistry.get(MLTTabOperator.class)
      .map(MLTTabOperator::getConfig)
      .orElse(new MLTConfig.Builder().build());
  Analyzer analyzer = operatorRegistry.get(AnalysisTabOperator.class)
      .map(AnalysisTabOperator::getCurrentAnalyzer)
      .orElse(new StandardAnalyzer());
  Query query = searchModel.mltQuery(docNum, mltConfig, analyzer);
  Set<String> fieldsToLoad = operatorRegistry.get(FieldValuesTabOperator.class)
      .map(FieldValuesTabOperator::getFieldsToLoad)
      .orElse(Collections.emptySet());
  SearchResults results = searchModel.search(query, new SimilarityConfig.Builder().build(), fieldsToLoad, DEFAULT_PAGE_SIZE, false);

  TableUtils.setupTable(resultsTable, ListSelectionModel.SINGLE_SELECTION, new SearchResultsTableModel(), null,
      SearchResultsTableModel.Column.DOCID.getColumnWidth(),
      SearchResultsTableModel.Column.SCORE.getColumnWidth());
  populateResults(results);

  messageBroker.clearStatusMessage();
}
 
Example 19
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldAddTermsFromRedisOnHmgetCommand() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("hmget");
  when(localParamsMock.get("key")).thenReturn("hash");
  when(localParamsMock.get("field")).thenReturn("field1");
  when(localParamsMock.getParameterNamesIterator()).thenReturn(Arrays.asList("command", "key", "field").iterator());
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.hmget(anyString(), anyString())).thenReturn(Arrays.asList("123"));
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).hmget("hash", "field1");
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(1, terms.size());
}
 
Example 20
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldAddTermsFromSortOffset() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("sort");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get("offset")).thenReturn("100");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.sort(anyString(), any(SortingParams.class))).thenReturn(Arrays.asList("123", "321"));
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  final ArgumentCaptor<SortingParams> argument = ArgumentCaptor.forClass(SortingParams.class);
  verify(jedisMock).sort(eq("simpleKey"), argument.capture());
  Assert.assertEquals(getSortingParamString(new SortingParams().limit(100, 0)),
      getSortingParamString(argument.getValue()));
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(2, terms.size());
}
 
Example 21
Source Project: uyuni   Source File: NGramQueryParserTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
public void testFreeFormQueryParse() throws Exception {
    String queryString = new String("name:spell -description:another");
    log.info("Original query: "  + queryString);

    NGramQueryParser parser = new NGramQueryParser("name",
            new NGramAnalyzer(min_ngram, max_ngram), true);
    Query q = parser.parse(queryString);
    log.info("NGramQueryParser parsed query:  " + q.toString());

    QueryParser origParser = new QueryParser("name", new StandardAnalyzer());
    q = origParser.parse(queryString);
    log.info("QueryParser parsed query = " + q.toString());
}
 
Example 22
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void shouldReturnEmptyQueryOnEmptyListOfSrandmember() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("srandmember");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.srandmember(anyString(), anyInt())).thenReturn(new ArrayList<String>());
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).srandmember("simpleKey", 1);
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(0, terms.size());
}
 
Example 23
Source Project: Lottery   Source File: LuceneContentSvcImpl.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Transactional(readOnly = true)
public void deleteIndex(Integer contentId, Directory dir)
		throws IOException, ParseException {
	boolean exist = IndexReader.indexExists(dir);
	if (exist) {
		IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
				Version.LUCENE_30), false,
				IndexWriter.MaxFieldLength.LIMITED);
		try {
			LuceneContent.delete(contentId, writer);
		} finally {
			writer.close();
		}
	}
}
 
Example 24
Source Project: clue   Source File: BuildSampleIndex.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param args
 */
public static void main(String[] args) throws Exception{
  if (args.length != 2) {
    System.out.println("usage: source_file index_dir");
    System.exit(1);
  }
  File f = new File(args[0]);
  BufferedReader reader = new BufferedReader(new FileReader(f));

  ObjectMapper mapper = new ObjectMapper();

  IndexWriterConfig idxWriterConfig = new IndexWriterConfig(new StandardAnalyzer());
  Directory dir = FSDirectory.open(FileSystems.getDefault().getPath(args[1]));
  IndexWriter writer = new IndexWriter(dir, idxWriterConfig);
  int count = 0;
  while (true) {
    String line = reader.readLine();
    if (line == null) break;
    JsonNode json = mapper.readTree(line);
    Document doc = buildDoc(json);
    writer.addDocument(doc);
    count++;
    if (count % 100 == 0) {
      System.out.print(".");
    }
  }
  
  System.out.println(count+" docs indexed");
  
  reader.close();
  writer.commit();
  writer.close();
}
 
Example 25
Source Project: solr-redis   Source File: TestRedisQParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void shouldAddTermsFromRedisOnLrangeCommand() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("lrange");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(jedisMock.lrange(anyString(), anyLong(), anyLong())).thenReturn(Arrays.asList("123", "321"));
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).lrange("simpleKey", 0, -1);
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(2, terms.size());
}
 
Example 26
Source Project: SONDY   Source File: Indexer.java    License: GNU General Public License v3.0 5 votes vote down vote up
public HashMap<String,Short> indexFile(int i, String filePath) {
    HashMap<String,Short> map = new HashMap<>();
    try {
        List<String> lines = FileUtils.readLines(new File(filePath));
        Analyzer analyzer = new StandardAnalyzer();
        int messageCountFile = 0;
        for(String line : lines){
            if(!mention || (mention && line.contains("@"))){
                messageCountFile++;
                String cleanLine = line.toLowerCase();
                List<String> strings = Tokenizer.tokenizeString(analyzer, cleanLine);
                for(String string : strings){
                    if(string.length()>=minWordLength){
                        Short count = map.get(string);
                        if(count == null){
                            count = 0;
                        }
                        count++;
                        map.put(string,count);
                    }
                }
            }
        }
        messageCountDistribution.put(i,messageCountFile);
        messageCount += messageCountFile;
    } catch (IOException ex) {
        Logger.getLogger(Indexer.class.getName()).log(Level.SEVERE, null, ex);
    }
    return map;
}
 
Example 27
Source Project: tutorials   Source File: LuceneInMemorySearchIntegrationTest.java    License: MIT License 5 votes vote down vote up
@Test
public void givenPrefixQueryWhenFetchedDocumentThenCorrect() {
    InMemoryLuceneIndex inMemoryLuceneIndex = new InMemoryLuceneIndex(new RAMDirectory(), new StandardAnalyzer());
    inMemoryLuceneIndex.indexDocument("article", "Lucene introduction");
    inMemoryLuceneIndex.indexDocument("article", "Introduction to Lucene");

    Term term = new Term("body", "intro");
    Query query = new PrefixQuery(term);

    List<Document> documents = inMemoryLuceneIndex.searchIndex(query);
    Assert.assertEquals(2, documents.size());
}
 
Example 28
Source Project: marathonv5   Source File: IndexSearcher.java    License: Apache License 2.0 5 votes vote down vote up
public IndexSearcher() {
    try {
        searcher = new org.apache.lucene.search.IndexSearcher(new ClasspathDirectory());
    } catch (IOException e) {
        e.printStackTrace();
    }
    analyzer = new StandardAnalyzer(Version.LUCENE_31);
    parser = new MultiFieldQueryParser(Version.LUCENE_31, new String[]{"name","description"}, analyzer);
}
 
Example 29
Source Project: lucene-solr   Source File: TestDemo.java    License: Apache License 2.0 5 votes vote down vote up
public void testDemo() throws IOException {
  String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
  String text = "This is the text to be indexed. " + longTerm;

  Path indexPath = Files.createTempDirectory("tempIndex");
  try (Directory dir = FSDirectory.open(indexPath)) {
    Analyzer analyzer = new StandardAnalyzer();
    try (IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(analyzer))) {
      Document doc = new Document();
      doc.add(newTextField("fieldname", text, Field.Store.YES));
      iw.addDocument(doc);
    }

    // Now search the index.
    try (IndexReader reader = DirectoryReader.open(dir)) {
      IndexSearcher searcher = newSearcher(reader);

      assertEquals(1, searcher.count(new TermQuery(new Term("fieldname", longTerm))));

      Query query = new TermQuery(new Term("fieldname", "text"));
      TopDocs hits = searcher.search(query, 1);
      assertEquals(1, hits.totalHits.value);

      // Iterate through the results.
      for (int i = 0; i < hits.scoreDocs.length; i++) {
        Document hitDoc = searcher.doc(hits.scoreDocs[i].doc);
        assertEquals(text, hitDoc.get("fieldname"));
      }

      // Test simple phrase query.
      PhraseQuery phraseQuery = new PhraseQuery("fieldname", "to", "be");
      assertEquals(1, searcher.count(phraseQuery));
    }
  }

  IOUtils.rm(indexPath);
}
 
Example 30
Source Project: aedict   Source File: Main.java    License: GNU General Public License v3.0 5 votes vote down vote up
private void indexWithLucene() throws IOException {
    System.out.println("Deleting old Lucene index");
    FileUtils.deleteDirectory(new File(LUCENE_INDEX));
    System.out.println("Indexing with Lucene");
    final BufferedReader dictionary = config.newReader();
    try {
        final Directory directory = FSDirectory.open(new File(LUCENE_INDEX));
        try {
            final IndexWriter luceneWriter = new IndexWriter(directory,
                    new StandardAnalyzer(LuceneSearch.LUCENE_VERSION), true,
                    IndexWriter.MaxFieldLength.UNLIMITED);
            try {
                final IDictParser parser = config.fileType.newParser(config);
                indexWithLucene(dictionary, luceneWriter, parser);
                System.out.println("Optimizing Lucene index");
                luceneWriter.optimize();
            } finally {
                luceneWriter.close();
            }
        } finally {
            closeQuietly(directory);
        }
    } finally {
        IOUtils.closeQuietly(dictionary);
    }
    System.out.println("Finished Lucene indexing");
}