Java Code Examples for org.apache.lucene.analysis.core.WhitespaceAnalyzer

The following are top voted examples for showing how to use org.apache.lucene.analysis.core.WhitespaceAnalyzer. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: RedisDirectory   File: TestLucene.java   View source code 8 votes vote down vote up
public void testRamDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("RamDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RamDirectory search consumes {}ms!", (end - start));
}
 
Example 2
Project: RedisDirectory   File: TestLucene.java   View source code 7 votes vote down vote up
public void testMMapDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
    IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("MMapDirectory search consumes {}ms!", (end - start));
}
 
Example 3
Project: elasticsearch_my   File: PercolateQueryBuilderTests.java   View source code 6 votes vote down vote up
public void testCreateMultiDocumentSearcher() throws Exception {
    int numDocs = randomIntBetween(2, 8);
    List<ParseContext.Document> docs = new ArrayList<>(numDocs);
    for (int i = 0; i < numDocs; i++) {
        docs.add(new ParseContext.Document());
    }

    Analyzer analyzer = new WhitespaceAnalyzer();
    ParsedDocument parsedDocument = new ParsedDocument(null, null, "_id", "_type", null, docs, null, null, null);
    IndexSearcher indexSearcher = PercolateQueryBuilder.createMultiDocumentSearcher(analyzer, parsedDocument);
    assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(numDocs));

    // ensure that any query get modified so that the nested docs are never included as hits:
    Query query = new MatchAllDocsQuery();
    BooleanQuery result = (BooleanQuery) indexSearcher.createNormalizedWeight(query, true).getQuery();
    assertThat(result.clauses().size(), equalTo(2));
    assertThat(result.clauses().get(0).getQuery(), sameInstance(query));
    assertThat(result.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST));
    assertThat(result.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.MUST_NOT));
}
 
Example 4
Project: elasticsearch_my   File: SmoothingModelTestCase.java   View source code 6 votes vote down vote up
/**
 * Test the WordScorer emitted by the smoothing model
 */
public void testBuildWordScorer() throws IOException {
    SmoothingModel testModel = createTestModel();
    Map<String, Analyzer> mapping = new HashMap<>();
    mapping.put("field", new WhitespaceAnalyzer());
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
    IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
    Document doc = new Document();
    doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    DirectoryReader ir = DirectoryReader.open(writer);

    WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
            BytesRefs.toBytesRef(" "));
    assertWordScorer(wordScorer, testModel);
}
 
Example 5
Project: RedisDirectory   File: TestLucene.java   View source code 6 votes vote down vote up
public void testRamDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("RamDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RamDirectory search consumes {}ms!", (end - start));
}
 
Example 6
Project: RedisDirectory   File: TestLucene.java   View source code 6 votes vote down vote up
public void testMMapDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
    IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("MMapDirectory search consumes {}ms!", (end - start));
}
 
Example 7
Project: fastcatsearch3   File: BasicAnalysisPlugin.java   View source code 6 votes vote down vote up
@Override
protected void loadAnalyzerFactory(Map<String, AnalyzerInfo> analyzerFactoryMap) {
	//extract entire word 
	registerAnalyzer(analyzerFactoryMap, "keyword", "Keyword Analyzer", new DefaultAnalyzerFactory(KeywordAnalyzer.class));
	//lucene StandardAnalyzer
	registerAnalyzer(analyzerFactoryMap, "standard", "Standard Analyzer", new DefaultAnalyzerFactory(StandardAnalyzer.class));
	
	registerAnalyzer(analyzerFactoryMap, "ngram", "NGram Analyzer", new DefaultAnalyzerFactory(NGramWordAnalyzer.class));
	
	registerAnalyzer(analyzerFactoryMap, "primary", "Primary Word Analyzer", new DefaultAnalyzerFactory(PrimaryWordAnalyzer.class));
	
	registerAnalyzer(analyzerFactoryMap, "whitespace", "Whitespace Analyzer", new DefaultAnalyzerFactory(WhitespaceAnalyzer.class));
	
	registerAnalyzer(analyzerFactoryMap, "csv", "Comma separated value Analyzer", new DefaultAnalyzerFactory(CSVAnalyzer.class));

       registerAnalyzer(analyzerFactoryMap, "autocomplete", "Autocomplete Analyzer", new DefaultAnalyzerFactory(AutocompleteAnalyzer.class));
}
 
Example 8
Project: flipper-reverse-image-search   File: LireBuilder.java   View source code 6 votes vote down vote up
/**
 * Index a picture
 * @param source
 * @param picture_id
 * @param conf
 * @throws IOException
 */
public static void index(byte[] source, UUID picture_id, IndexWriterConfig conf) throws IOException
{
    ByteArrayInputStream in = new ByteArrayInputStream(source);
    BufferedImage image = ImageIO.read(in);

    // Creating an Lucene IndexWriter
    log.debug("Is Lucene configured? " + (conf == null));
    if(conf == null) {
        conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }

    luceneIndexer(image, picture_id, FeatureEnumerate.AutoColorCorrelogram.getText(), DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.CEDD.getText(), DocumentBuilderFactory.getCEDDDocumentBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.ColorLayout.getText(), DocumentBuilderFactory.getColorLayoutBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.EdgeHistogram.getText(), DocumentBuilderFactory.getEdgeHistogramBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.ColorHistogram.getText(), DocumentBuilderFactory.getColorHistogramDocumentBuilder(), conf);
    luceneIndexer(image, picture_id, FeatureEnumerate.PHOG.getText(), DocumentBuilderFactory.getPHOGDocumentBuilder(), conf);

}
 
Example 9
Project: flipper-reverse-image-search   File: LireBuilder.java   View source code 6 votes vote down vote up
private static void deleteFromFeature(UUID pictureId, Term term, String prefix, IndexWriterConfig conf) throws IOException {

        File file = getPath(prefix);

        // Creating an Lucene IndexWriter
        log.debug("Is Lucene configured: " + (conf == null));
        if(conf == null) {
            conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
            conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        }
        IndexWriter iw = new IndexWriter(FSDirectory.open(file), conf);

        iw.deleteDocuments(term);

        iw.close();
    }
 
Example 10
Project: search   File: DocMakerTest.java   View source code 6 votes vote down vote up
private Document createTestNormsDocument(boolean setNormsProp,
    boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal)
    throws Exception {
  Properties props = new Properties();
  
  // Indexing configuration.
  props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
  props.setProperty("directory", "RAMDirectory");
  if (setNormsProp) {
    props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
  }
  if (setBodyNormsProp) {
    props.setProperty("doc.body.tokenized.norms", Boolean.toString(bodyNormsVal));
  }
  
  // Create PerfRunData
  Config config = new Config(props);
  
  DocMaker dm = new DocMaker();
  dm.setConfig(config, new OneDocSource());
  return dm.makeDocument();
}
 
Example 11
Project: search   File: ExpressionAggregationFacetsExample.java   View source code 6 votes vote down vote up
/** Build the example index. */
private void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer()));

  // Writes facet ords to a separate directory from the main index
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

  Document doc = new Document();
  doc.add(new TextField("c", "foo bar", Store.NO));
  doc.add(new NumericDocValuesField("popularity", 5L));
  doc.add(new FacetField("A", "B"));
  indexWriter.addDocument(config.build(taxoWriter, doc));

  doc = new Document();
  doc.add(new TextField("c", "foo foo bar", Store.NO));
  doc.add(new NumericDocValuesField("popularity", 3L));
  doc.add(new FacetField("A", "C"));
  indexWriter.addDocument(config.build(taxoWriter, doc));
  
  indexWriter.close();
  taxoWriter.close();
}
 
Example 12
Project: search   File: RangeFacetsExample.java   View source code 6 votes vote down vote up
/** Build the example index. */
public void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer()));

  // Add documents with a fake timestamp, 1000 sec before
  // "now", 2000 sec before "now", ...:
  for(int i=0;i<100;i++) {
    Document doc = new Document();
    long then = nowSec - i * 1000;
    // Add as doc values field, so we can compute range facets:
    doc.add(new NumericDocValuesField("timestamp", then));
    // Add as numeric field so we can drill-down:
    doc.add(new LongField("timestamp", then, Field.Store.NO));
    indexWriter.addDocument(doc);
  }

  // Open near-real-time searcher
  searcher = new IndexSearcher(DirectoryReader.open(indexWriter, true));
  indexWriter.close();
}
 
Example 13
Project: search   File: SpellingQueryConverterTest.java   View source code 6 votes vote down vote up
@Test
public void testUnicode() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());
  
  // chinese text value
  Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_购field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_field:我购xyz买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
 
Example 14
Project: search   File: SpellingQueryConverterTest.java   View source code 6 votes vote down vote up
@Test
public void testMultipleClauses() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());

  // two field:value pairs should give two tokens
  Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());

  // a field:value pair and a search term should give two tokens
  tokens = converter.convert("text_field:我购买了道具和服装。 bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
 
Example 15
Project: search   File: HighlighterTest.java   View source code 6 votes vote down vote up
@Test
public void testTermOffsetsTokenStream() throws Exception {
  String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
  Analyzer a1 = new WhitespaceAnalyzer();
  TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n");
  tokenStream.reset();

  TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
      tokenStream);
  for( String v : multivalued ){
    TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
    Analyzer a2 = new WhitespaceAnalyzer();
    TokenStream ts2 = a2.tokenStream("", v);
    ts2.reset();

    while (ts1.incrementToken()) {
      assertTrue(ts2.incrementToken());
      assertEquals(ts1, ts2);
    }
    assertFalse(ts2.incrementToken());
  }
}
 
Example 16
Project: edits   File: RulesIndexGenerator.java   View source code 6 votes vote down vote up
public void generateIndex(String path, List<AnnotatedEntailmentPair> aps) throws Exception {

		log.info("Rules extraction started.");
		IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_47, new WhitespaceAnalyzer(Version.LUCENE_47));
		conf.setOpenMode(OpenMode.CREATE);
		writer = new IndexWriter(FSDirectory.open(new File(path)), conf);
		Document doc = new Document();
		doc.add(new StringField(IndexRulesSource.TERMDOC_FIELD, "true", Store.YES));
		for (String u : rulesSource.uses())
			doc.add(new StringField(IndexRulesSource.USES_FIELD, u, Store.YES));
		writer.addDocument(doc);
		start(aps.iterator());
		writer.waitForMerges();
		writer.close(true);
		log.info(cache.size() + " rules extracted!");

	}
 
Example 17
Project: tri   File: KeywordFinder.java   View source code 6 votes vote down vote up
public KeywordFinder(File inputFile) throws IOException {
    RAMDirectory ramdir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LATEST, new WhitespaceAnalyzer());
    IndexWriter writer = new IndexWriter(ramdir, conf);
    BufferedReader reader = new BufferedReader(new FileReader(inputFile));
    while (reader.ready()) {
        String keyword = reader.readLine().toLowerCase().trim();
        if (keyword.length() > 0) {
            Document doc = new Document();
            doc.add(new TextField("keyword", keyword.replace("-", " ").replace("_", " ").replace("\\", " ").replace("/", " "), Field.Store.YES));
            writer.addDocument(doc);
        }
    }
    writer.close();
    searcher = new IndexSearcher(DirectoryReader.open(ramdir));
}
 
Example 18
Project: tri   File: TriWhitespaceTokenizer.java   View source code 6 votes vote down vote up
@Override
public List<String> getTokens(Reader reader) throws IOException {
    List<String> tokens = new ArrayList<>();
    Analyzer analyzer = new WhitespaceAnalyzer();
    TokenStream tokenStream = analyzer.tokenStream("text", reader);
    tokenStream.reset();
    CharTermAttribute cattr = tokenStream.addAttribute(CharTermAttribute.class);
    while (tokenStream.incrementToken()) {
        String token = cattr.toString();
        tokens.add(token);
    }
    tokenStream.end();
    if (finder != null) {
        return finder.process(tokens);
    } else {
        return tokens;
    }
}
 
Example 19
Project: lire   File: Indexor.java   View source code 6 votes vote down vote up
public void run() {
        // do it ...
        try {
//            IndexWriter indexWriter = LuceneUtils.createIndexWriter(indexPath, overwriteIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
            IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            config.setCodec(new LireCustomCodec());
            IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexPath)), config);
            for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext(); ) {
                File inputFile = iterator.next();
                if (verbose) System.out.println("Processing " + inputFile.getPath() + ".");
                readFile(indexWriter, inputFile);
                if (verbose) System.out.println("Indexing finished.");
            }
            indexWriter.commit();
            indexWriter.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
 
Example 20
Project: lire   File: VisualWordsTest.java   View source code 6 votes vote down vote up
public void testIndexingAndSearchSift() throws IOException {
    // Creating an Lucene IndexWriter
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, new WhitespaceAnalyzer(Version.LUCENE_40));
    IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), conf);
    long ms = System.currentTimeMillis();
    int count = 0;
    ArrayList<File> files = FileUtils.getAllImageFiles(new File("testdata\\ferrari"), true);
    for (Iterator<File> i = files.iterator(); i.hasNext(); ) {
        File imgFile = i.next();
        iw.addDocument(siftBuilder.createDocument(
                ImageIO.read(imgFile), imgFile.getPath()));
        count++;
        if (count > 100 && count % 500 == 0) {
            System.out.println(count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file");
        }

    }
    iw.close();
    IndexReader ir = DirectoryReader.open(FSDirectory.open(indexPath));
    SiftFeatureHistogramBuilder sfh = new SiftFeatureHistogramBuilder(ir, 1000, 500);
    sfh.index();
}
 
Example 21
Project: lire   File: MserTest.java   View source code 6 votes vote down vote up
public void testExtendedIndexMSER() throws IOException {
    MSERDocumentBuilder builder = new MSERDocumentBuilder();
    IndexWriterConfig conf = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION,
            new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), conf);
    long ms = System.currentTimeMillis();
    int count = 0;
    ArrayList<File> files = FileUtils.getAllImageFiles(new File("D:\\DataSets\\WIPO\\CA\\sample"), true);
    for (Iterator<File> i = files.iterator(); i.hasNext(); ) {
        File imgFile = i.next();
        BufferedImage img = ImageIO.read(imgFile);
        if (Math.max(img.getWidth(), img.getHeight()) < 800) {
            // scale image ...
            img = ImageUtils.scaleImage(img, 800);
        }
        iw.addDocument(builder.createDocument(img, imgFile.getPath()));
        count++;
        if (count > 2 && count % 25 == 0) {
            System.out.println(count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file");
        }

    }
    iw.close();
}
 
Example 22
Project: solr-redis   File: TestRedisQParser.java   View source code 6 votes vote down vote up
@Test
public void shouldTurnAnalysisOn() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("smembers");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true);
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
  when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321")));
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).smembers("simpleKey");
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(3, terms.size());
}
 
Example 23
Project: solr-redis   File: TestRedisQParser.java   View source code 6 votes vote down vote up
@Test
public void shouldRetryWhenRedisFailed() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("smembers");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(false);
  when(localParamsMock.get("retries")).thenReturn("2");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
  when(jedisPoolMock.getResource()).thenReturn(jedisFailingMock).thenReturn(jedisMock);
  when(jedisFailingMock.smembers("simpleKey")).thenThrow(new JedisException("Synthetic exception"));
  when(jedisMock.smembers("simpleKey")).thenReturn(new HashSet<String>(Collections.singletonList("value")));
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock,
          new RetryingCommandHandler(jedisPoolMock, 1));
  final Query query = redisQParser.parse();
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(1, terms.size());
}
 
Example 24
Project: solr-redis   File: TestRedisQParser.java   View source code 6 votes vote down vote up
@Test
public void shouldUseTermsQuery() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("smembers");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get("ignoreScore")).thenReturn("true");
  when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true);
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
  when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321", "322", "323", "324",
          "325", "326", "327", "328", "329", "330", "331", "332", "333", "334", "335", "336", "337", "338")));
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).smembers("simpleKey");
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  Query rewrittenQuery = searcher.rewrite(query);
  assertTrue(rewrittenQuery instanceof TermsQuery);
}
 
Example 25
Project: pyramid   File: PhraseCountQueryBuilder.java   View source code 6 votes vote down vote up
protected Query doToQuery(QueryShardContext context) throws IOException {
//        Analyzer analyzer = context.getMapperService().searchAnalyzer();
        Analyzer analyzer = new WhitespaceAnalyzer();
        try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
            CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
            if (termAtt == null) {
                return null;
            }
            List<CustomSpanTermQuery> clauses = new ArrayList<>();
            stream.reset();
            while (stream.incrementToken()) {
                Term term = new Term(fieldName, termAtt.getBytesRef());
                    clauses.add(new CustomSpanTermQuery(term));
            }
            return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
        } catch (IOException e) {
            throw new RuntimeException("Error analyzing query text", e);
        }


    }
 
Example 26
Project: NYBC   File: DocMakerTest.java   View source code 6 votes vote down vote up
private Document createTestNormsDocument(boolean setNormsProp,
    boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal)
    throws Exception {
  Properties props = new Properties();
  
  // Indexing configuration.
  props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
  props.setProperty("directory", "RAMDirectory");
  if (setNormsProp) {
    props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
  }
  if (setBodyNormsProp) {
    props.setProperty("doc.body.tokenized.norms", Boolean.toString(bodyNormsVal));
  }
  
  // Create PerfRunData
  Config config = new Config(props);
  
  DocMaker dm = new DocMaker();
  dm.setConfig(config, new OneDocSource());
  return dm.makeDocument();
}
 
Example 27
Project: NYBC   File: MultiCategoryListsFacetsExample.java   View source code 6 votes vote down vote up
/** Build the example index. */
private void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));

  // Writes facet ords to a separate directory from the main index
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);

  // Reused across documents, to add the necessary facet fields
  FacetFields facetFields = new FacetFields(taxoWriter, indexingParams);

  add(indexWriter, facetFields, "Author/Bob", "Publish Date/2010/10/15");
  add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2010/10/20");
  add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2012/1/1");
  add(indexWriter, facetFields, "Author/Susan", "Publish Date/2012/1/7");
  add(indexWriter, facetFields, "Author/Frank", "Publish Date/1999/5/5");
  
  indexWriter.close();
  taxoWriter.close();
}
 
Example 28
Project: NYBC   File: SimpleFacetsExample.java   View source code 6 votes vote down vote up
/** Build the example index. */
private void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));

  // Writes facet ords to a separate directory from the main index
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);

  // Reused across documents, to add the necessary facet fields
  FacetFields facetFields = new FacetFields(taxoWriter);

  add(indexWriter, facetFields, "Author/Bob", "Publish Date/2010/10/15");
  add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2010/10/20");
  add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2012/1/1");
  add(indexWriter, facetFields, "Author/Susan", "Publish Date/2012/1/7");
  add(indexWriter, facetFields, "Author/Frank", "Publish Date/1999/5/5");
  
  indexWriter.close();
  taxoWriter.close();
}
 
Example 29
Project: NYBC   File: AssociationsFacetsExample.java   View source code 6 votes vote down vote up
/** Build the example index. */
private void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, 
      new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));

  // Writes facet ords to a separate directory from the main index
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);

  // Reused across documents, to add the necessary facet fields
  FacetFields facetFields = new AssociationsFacetFields(taxoWriter);
  
  for (int i = 0; i < CATEGORIES.length; i++) {
    Document doc = new Document();
    CategoryAssociationsContainer associations = new CategoryAssociationsContainer();
    for (int j = 0; j < CATEGORIES[i].length; j++) {
      associations.setAssociation(CATEGORIES[i][j], ASSOCIATIONS[i][j]);
    }
    facetFields.addFields(doc, associations);
    indexWriter.addDocument(doc);
  }
  
  indexWriter.close();
  taxoWriter.close();
}
 
Example 30
Project: NYBC   File: SpellingQueryConverterTest.java   View source code 6 votes vote down vote up
@Test
public void testUnicode() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
  
  // chinese text value
  Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_购field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_field:我购xyz买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
 
Example 31
Project: NYBC   File: SpellingQueryConverterTest.java   View source code 6 votes vote down vote up
@Test
public void testMultipleClauses() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));

  // two field:value pairs should give two tokens
  Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());

  // a field:value pair and a search term should give two tokens
  tokens = converter.convert("text_field:我购买了道具和服装。 bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
 
Example 32
Project: NYBC   File: HighlighterTest.java   View source code 6 votes vote down vote up
@Test
public void testTermOffsetsTokenStream() throws Exception {
  String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
  Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
  TokenStream tokenStream = a1.tokenStream("", new StringReader("a b c d e f g h i j k l m n"));
  tokenStream.reset();

  TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
      tokenStream);
  for( String v : multivalued ){
    TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
    Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
    TokenStream ts2 = a2.tokenStream("", new StringReader(v));
    ts2.reset();

    while (ts1.incrementToken()) {
      assertTrue(ts2.incrementToken());
      assertEquals(ts1, ts2);
    }
    assertFalse(ts2.incrementToken());
  }
}
 
Example 33
Project: lucure-core   File: RestrictedFieldTest.java   View source code 6 votes vote down vote up
@Test
public void testColumnVisibilityPayload() throws Exception {
    String visibility = "U";
    String value = "value";
    RestrictedField restrictedField = new RestrictedField(new StringField(
      "field", value, Field.Store.NO), new FieldVisibility(visibility));
    try(TokenStream tokenStream = restrictedField.tokenStream(
      new WhitespaceAnalyzer(), null)) {
        CharTermAttribute charTermAttribute = tokenStream
          .getAttribute(CharTermAttribute.class);
        PayloadAttribute payloadAttribute = tokenStream
          .getAttribute(PayloadAttribute.class);

        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            assertEquals(value, new String(charTermAttribute.buffer(), 0, charTermAttribute.length()));
            assertEquals(visibility, new String(payloadAttribute.getPayload().bytes));
        }
    }
}
 
Example 34
Project: incubator-blur   File: Blur024CodecTest.java   View source code 6 votes vote down vote up
@Test
public void testDocValuesFormat() throws IOException {
  RAMDirectory directory = new RAMDirectory();
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf.setCodec(new Blur024Codec());
  IndexWriter writer = new IndexWriter(directory, conf);

  Document doc = new Document();
  doc.add(new StringField("f", "v", Store.YES));
  doc.add(new SortedDocValuesField("f", new BytesRef("v")));
  writer.addDocument(doc);

  writer.close();

  DirectoryReader reader = DirectoryReader.open(directory);
  AtomicReaderContext context = reader.leaves().get(0);
  AtomicReader atomicReader = context.reader();
  SortedDocValues sortedDocValues = atomicReader.getSortedDocValues("f");
  assertTrue(sortedDocValues.getClass().getName().startsWith(DiskDocValuesProducer.class.getName()));

  reader.close();
}
 
Example 35
Project: incubator-blur   File: SuperParserTest.java   View source code 6 votes vote down vote up
@Test
public void test5() throws ParseException, IOException {
  parser = new SuperParser(LUCENE_VERSION, getFieldManager(new WhitespaceAnalyzer(LUCENE_VERSION)), true, null,
      ScoreType.SUPER, new Term("_primedoc_"));
  Query query = parser.parse("<a.a:a a.d:{e TO f} a.b:b a.test:hello\\<> -<g.c:c g.d:d>");

  BooleanQuery booleanQuery1 = new BooleanQuery();
  booleanQuery1.add(new TermQuery(new Term("a.a", "a")), Occur.SHOULD);
  booleanQuery1.add(new TermRangeQuery("a.d", new BytesRef("e"), new BytesRef("f"), false, false), Occur.SHOULD);
  booleanQuery1.add(new TermQuery(new Term("a.b", "b")), Occur.SHOULD);
  // std analyzer took the "<" out
  booleanQuery1.add(new TermQuery(new Term("a.test", "hello<")), Occur.SHOULD);

  BooleanQuery booleanQuery2 = new BooleanQuery();
  booleanQuery2.add(new TermQuery(new Term("g.c", "c")), Occur.SHOULD);
  booleanQuery2.add(new TermQuery(new Term("g.d", "d")), Occur.SHOULD);

  SuperQuery superQuery1 = new SuperQuery(booleanQuery1, ScoreType.SUPER, new Term("_primedoc_"));
  SuperQuery superQuery2 = new SuperQuery(booleanQuery2, ScoreType.SUPER, new Term("_primedoc_"));

  BooleanQuery booleanQuery = new BooleanQuery();
  booleanQuery.add(superQuery1, Occur.SHOULD);
  booleanQuery.add(superQuery2, Occur.MUST_NOT);

  assertQuery(booleanQuery, query);
}
 
Example 36
Project: search-core   File: SpellingQueryConverterTest.java   View source code 6 votes vote down vote up
@Test
public void testUnicode() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
  
  // chinese text value
  Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_购field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_field:我购xyz买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
 
Example 37
Project: search-core   File: SpellingQueryConverterTest.java   View source code 6 votes vote down vote up
@Test
public void testMultipleClauses() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));

  // two field:value pairs should give two tokens
  Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());

  // a field:value pair and a search term should give two tokens
  tokens = converter.convert("text_field:我购买了道具和服装。 bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
 
Example 38
Project: search-core   File: HighlighterTest.java   View source code 6 votes vote down vote up
@Test
public void testTermOffsetsTokenStream() throws Exception {
  String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
  Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
  TokenStream tokenStream = a1.tokenStream("", new StringReader("a b c d e f g h i j k l m n"));
  tokenStream.reset();

  TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
      tokenStream);
  for( String v : multivalued ){
    TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
    Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
    TokenStream ts2 = a2.tokenStream("", new StringReader(v));
    ts2.reset();

    while (ts1.incrementToken()) {
      assertTrue(ts2.incrementToken());
      assertEquals(ts1, ts2);
    }
    assertFalse(ts2.incrementToken());
  }
}
 
Example 39
Project: meresco-lucene   File: LuceneSettings.java   View source code 6 votes vote down vote up
private static Analyzer getAnalyzer(JsonObject analyzer) {
    switch (analyzer.getString("type")) {
    case "MerescoDutchStemmingAnalyzer":
        JsonArray jsonFields = analyzer.getJsonArray("stemmingFields");
        String[] fields = new String[jsonFields.size()];
        for (int i = 0; i < jsonFields.size(); i++) {
            fields[i] = jsonFields.getString(i);
        }
        return new MerescoDutchStemmingAnalyzer(fields);
    case "MerescoStandardAnalyzer":
        return new MerescoStandardAnalyzer();
    case "WhitespaceAnalyzer":
        return new WhitespaceAnalyzer();
    }
    return null;
}
 
Example 40
Project: elasticsearch-analysis-german   File: ComboAnalyzerTests.java   View source code 6 votes vote down vote up
@Test
public void testCascadeCombo() throws IOException {
    ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
            new ComboAnalyzer(TEST_VERSION_CURRENT,
                    new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
                    new KeywordAnalyzer()
            ),
            new StandardAnalyzer(TEST_VERSION_CURRENT),
            new KeywordAnalyzer()
    );
    for (int i = 0 ; i < 3 ; i++)
        assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                new String[]{"just", "just", "just a little test "+i, "just a little test "+i, "a", "little", "little", "test", "test", Integer.toString(i), Integer.toString(i)},
                new int[]{ 0,  0,  0,  0,  5,  7,  7, 14, 14, 19, 19},
                new int[]{ 4,  4, 20, 20,  6, 13, 13, 18, 18, 20, 20},
                new int[]{ 1,  0,  0,  0,  1,  1,  0,  1,  0,  1,  0});
}