org.apache.lucene.analysis.core.WhitespaceAnalyzer Java Examples

The following examples show how to use org.apache.lucene.analysis.core.WhitespaceAnalyzer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TestRedisQParser.java From solr-redis with Apache License 2.0

6 votes

@Test
public void shouldRetryWhenRedisFailed() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("smembers");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(false);
  when(localParamsMock.get("retries")).thenReturn("2");
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
  when(jedisPoolMock.getResource()).thenReturn(jedisFailingMock).thenReturn(jedisMock);
  when(jedisFailingMock.smembers("simpleKey")).thenThrow(new JedisException("Synthetic exception"));
  when(jedisMock.smembers("simpleKey")).thenReturn(new HashSet<String>(Collections.singletonList("value")));
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock,
          new RetryingCommandHandler(jedisPoolMock, 1));
  final Query query = redisQParser.parse();
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(1, terms.size());
}

Example #2

Source File: SpellingQueryConverterTest.java From lucene-solr with Apache License 2.0

6 votes

@Test
@SuppressWarnings({"rawtypes"})
public void testMultipleClauses() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());

  // two field:value pairs should give two tokens
  Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());

  // a field:value pair and a search term should give two tokens
  tokens = converter.convert("text_field:我购买了道具和服装。 bar");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}

Example #3

Source File: SpellingQueryConverterTest.java From lucene-solr with Apache License 2.0

6 votes

@Test
@SuppressWarnings({"rawtypes"})
public void testUnicode() {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());
  
  // chinese text value
  Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_购field:我购买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());

  tokens = converter.convert("text_field:我购xyz买了道具和服装。");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}

Example #4

Source File: HighlighterTest.java From lucene-solr with Apache License 2.0

6 votes

@Test
public void testOffsetWindowTokenFilter() throws Exception {
  String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
  try (Analyzer a1 = new WhitespaceAnalyzer()) {
    TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n");

    try (DefaultSolrHighlighter.OffsetWindowTokenFilter tots = new DefaultSolrHighlighter.OffsetWindowTokenFilter(tokenStream)) {
      for (String v : multivalued) {
        TokenStream ts1 = tots.advanceToNextWindowOfLength(v.length());
        ts1.reset();
        try (Analyzer a2 = new WhitespaceAnalyzer()) {
          TokenStream ts2 = a2.tokenStream("", v);
          ts2.reset();

          while (ts1.incrementToken()) {
            assertTrue(ts2.incrementToken());
            assertEquals(ts1, ts2);
          }
          assertFalse(ts2.incrementToken());
        }
      }
    }
  }
}

Example #5

Source File: TestTextField.java From lucene-solr with Apache License 2.0

6 votes

@Test
public void testAnalyzeMultiTerm() {
  // No terms provided by the StopFilter (stop word) for the multi-term part.
  // This is supported. Check TextField.analyzeMultiTerm returns null (and does not throw an exception).
  BytesRef termBytes = TextField.analyzeMultiTerm("field", "the", new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET));
  assertNull(termBytes);

  // One term provided by the WhitespaceTokenizer for the multi-term part.
  // This is the regular case. Check TextField.analyzeMultiTerm returns it (and does not throw an exception).
  termBytes = TextField.analyzeMultiTerm("field", "Sol", new WhitespaceAnalyzer());
  assertEquals("Sol", termBytes.utf8ToString());

  // Two terms provided by the WhitespaceTokenizer for the multi-term part.
  // This is not allowed. Expect an exception.
  SolrException exception = expectThrows(SolrException.class, () -> TextField.analyzeMultiTerm("field", "term1 term2", new WhitespaceAnalyzer()));
  assertEquals("Unexpected error code", SolrException.ErrorCode.BAD_REQUEST.code, exception.code());
}

Example #6

Source File: TestRegexpQueryHandler.java From lucene-solr with Apache License 2.0

6 votes

public void testTermStreamWrapping() throws IOException {

    CustomQueryHandler handler
        = new RegexpQueryHandler("FOO", 10, "__wibble__", Collections.singleton("field1"));

    try (Analyzer input = new WhitespaceAnalyzer()) {

      // field1 is in the excluded set, so nothing should happen
      assertTokenStreamContents(handler.wrapTermStream("field1", input.tokenStream("field1", "hello world")),
          new String[]{ "hello", "world" });

      // field2 is not excluded
      assertTokenStreamContents(handler.wrapTermStream("field2", input.tokenStream("field2", "harm alarm asdasasdasdasd")),
          new String[]{
              "harm", "harmFOO", "harFOO", "haFOO", "hFOO", "armFOO", "arFOO", "aFOO", "rmFOO", "rFOO", "mFOO", "FOO",
              "alarm", "alarmFOO", "alarFOO", "alaFOO", "alFOO", "larmFOO", "larFOO", "laFOO", "lFOO",
              "asdasasdasdasd", "__wibble__"
          });
    }
  }

Example #7

Source File: IndexUtils.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Create an index writer.
 *
 * @param dir - index directory
 * @param analyzer - analyser used by the index writer
 * @param useCompound - if true, compound index files are used
 * @param keepAllCommits - if true, all commit generations are kept
 * @param ps - information stream
 * @return new index writer
 * @throws IOException - if there is a low level IO error.
 */
public static IndexWriter createWriter(Directory dir, Analyzer analyzer, boolean useCompound, boolean keepAllCommits,
                                       PrintStream ps) throws IOException {
  Objects.requireNonNull(dir);

  IndexWriterConfig config = new IndexWriterConfig(analyzer == null ? new WhitespaceAnalyzer() : analyzer);
  config.setUseCompoundFile(useCompound);
  if (ps != null) {
    config.setInfoStream(ps);
  }
  if (keepAllCommits) {
    config.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
  } else {
    config.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
  }

  return new IndexWriter(dir, config);
}

Example #8

Source File: RangeFacetsExample.java From lucene-solr with Apache License 2.0

6 votes

/** Build the example index. */
public void index() throws IOException {
  IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(
      new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

  // Add documents with a fake timestamp, 1000 sec before
  // "now", 2000 sec before "now", ...:
  for(int i=0;i<100;i++) {
    Document doc = new Document();
    long then = nowSec - i * 1000;
    // Add as doc values field, so we can compute range facets:
    doc.add(new NumericDocValuesField("timestamp", then));
    // Add as numeric field so we can drill-down:
    doc.add(new LongPoint("timestamp", then));
    indexWriter.addDocument(doc);
  }

  // Open near-real-time searcher
  searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
  indexWriter.close();
}

Example #9

Source File: TestRedisQParser.java From solr-redis with Apache License 2.0

6 votes

@Test
public void shouldTurnAnalysisOn() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("smembers");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true);
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
  when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321")));
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).smembers("simpleKey");
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  final Set<Term> terms = extractTerms(searcher, query);
  Assert.assertEquals(3, terms.size());
}

Example #10

Source File: DocMakerTest.java From lucene-solr with Apache License 2.0

6 votes

private Document createTestNormsDocument(boolean setNormsProp,
    boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal)
    throws Exception {
  Properties props = new Properties();
  
  // Indexing configuration.
  props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
  props.setProperty("directory", "ByteBuffersDirectory");
  if (setNormsProp) {
    props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
  }
  if (setBodyNormsProp) {
    props.setProperty("doc.body.tokenized.norms", Boolean.toString(bodyNormsVal));
  }
  
  // Create PerfRunData
  Config config = new Config(props);
  
  DocMaker dm = new DocMaker();
  dm.setConfig(config, new OneDocSource());
  return dm.makeDocument();
}

Example #11

Source File: TestLucene.java From RedisDirectory with Apache License 2.0

6 votes

public void testMMapDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
    IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("MMapDirectory search consumes {}ms!", (end - start));
}

Example #12

Source File: TestRedisQParser.java From solr-redis with Apache License 2.0

6 votes

@Test
public void shouldUseTermsQuery() throws SyntaxError, IOException {
  when(localParamsMock.get("command")).thenReturn("smembers");
  when(localParamsMock.get("key")).thenReturn("simpleKey");
  when(localParamsMock.get("ignoreScore")).thenReturn("true");
  when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true);
  when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
  when(requestMock.getSchema()).thenReturn(schema);
  when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
  when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321", "322", "323", "324",
          "325", "326", "327", "328", "329", "330", "331", "332", "333", "334", "335", "336", "337", "338")));
  redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
  final Query query = redisQParser.parse();
  verify(jedisMock).smembers("simpleKey");
  IndexSearcher searcher = new IndexSearcher(new MultiReader());
  Query rewrittenQuery = searcher.rewrite(query);
  assertTrue(rewrittenQuery instanceof TermsQuery);
}

Example #13

Source File: PhraseCountQueryBuilder.java From pyramid with Apache License 2.0

6 votes

protected Query doToQuery(QueryShardContext context) throws IOException {
//        Analyzer analyzer = context.getMapperService().searchAnalyzer();
        Analyzer analyzer = new WhitespaceAnalyzer();
        try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
            CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
            if (termAtt == null) {
                return null;
            }
            List<CustomSpanTermQuery> clauses = new ArrayList<>();
            stream.reset();
            while (stream.incrementToken()) {
                Term term = new Term(fieldName, termAtt.getBytesRef());
                    clauses.add(new CustomSpanTermQuery(term));
            }
            return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
        } catch (IOException e) {
            throw new RuntimeException("Error analyzing query text", e);
        }


    }

Example #14

Source File: TestLucene.java From RedisDirectory with Apache License 2.0

6 votes

public void testRamDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("RamDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RamDirectory search consumes {}ms!", (end - start));
}

Example #15

Source File: Blur024CodecTest.java From incubator-retired-blur with Apache License 2.0

6 votes

@Test
public void testDocValuesFormat() throws IOException {
  RAMDirectory directory = new RAMDirectory();
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf.setCodec(new Blur024Codec());
  IndexWriter writer = new IndexWriter(directory, conf);

  Document doc = new Document();
  doc.add(new StringField("f", "v", Store.YES));
  doc.add(new SortedDocValuesField("f", new BytesRef("v")));
  writer.addDocument(doc);

  writer.close();

  DirectoryReader reader = DirectoryReader.open(directory);
  AtomicReaderContext context = reader.leaves().get(0);
  AtomicReader atomicReader = context.reader();
  SortedDocValues sortedDocValues = atomicReader.getSortedDocValues("f");
  assertTrue(sortedDocValues.getClass().getName().startsWith(DiskDocValuesProducer.class.getName()));

  reader.close();
}

Example #16

Source File: SuperParserTest.java From incubator-retired-blur with Apache License 2.0

6 votes

@Test
public void test5() throws ParseException, IOException {
  parser = new SuperParser(LUCENE_VERSION, getFieldManager(new WhitespaceAnalyzer(LUCENE_VERSION)), true, null,
      ScoreType.SUPER, new Term("_primedoc_"));
  Query query = parser.parse("<a.a:a a.d:{e TO f} a.b:b a.test:hello\\<> -<g.c:c g.d:d>");

  BooleanQuery booleanQuery1 = new BooleanQuery();
  booleanQuery1.add(new TermQuery(new Term("a.a", "a")), Occur.SHOULD);
  booleanQuery1.add(new TermRangeQuery("a.d", new BytesRef("e"), new BytesRef("f"), false, false), Occur.SHOULD);
  booleanQuery1.add(new TermQuery(new Term("a.b", "b")), Occur.SHOULD);
  // std analyzer took the "<" out
  booleanQuery1.add(new TermQuery(new Term("a.test", "hello<")), Occur.SHOULD);

  BooleanQuery booleanQuery2 = new BooleanQuery();
  booleanQuery2.add(new TermQuery(new Term("g.c", "c")), Occur.SHOULD);
  booleanQuery2.add(new TermQuery(new Term("g.d", "d")), Occur.SHOULD);

  SuperQuery superQuery1 = new SuperQuery(booleanQuery1, ScoreType.SUPER, new Term("_primedoc_"));
  SuperQuery superQuery2 = new SuperQuery(booleanQuery2, ScoreType.SUPER, new Term("_primedoc_"));

  BooleanQuery booleanQuery = new BooleanQuery();
  booleanQuery.add(superQuery1, Occur.SHOULD);
  booleanQuery.add(superQuery2, Occur.MUST_NOT);

  assertQuery(booleanQuery, query);
}

Example #17

Source File: TestLucene.java From RedisDirectory with Apache License 2.0

6 votes

public void testMMapDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
    IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("MMapDirectory search consumes {}ms!", (end - start));
}

Example #18

Source File: QueryHelperTest.java From fess with Apache License 2.0

6 votes

@Override
public void setUp() throws Exception {
    super.setUp();
    queryHelper = new QueryHelper() {
        protected QueryParser getQueryParser() {
            ExtendableQueryParser queryParser = new ExtendableQueryParser(Constants.DEFAULT_FIELD, new WhitespaceAnalyzer());
            queryParser.setAllowLeadingWildcard(true);
            queryParser.setDefaultOperator(QueryParser.Operator.AND);
            return queryParser;
        }
    };
    File file = File.createTempFile("test", ".properties");
    file.deleteOnExit();
    FileUtil.writeBytes(file.getAbsolutePath(), "ldap.security.principal=%[email protected]".getBytes("UTF-8"));
    DynamicProperties systemProps = new DynamicProperties(file);
    ComponentUtil.register(systemProps, "systemProperties");
    ComponentUtil.register(new SystemHelper(), "systemHelper");
    ComponentUtil.register(new VirtualHostHelper(), "virtualHostHelper");
    ComponentUtil.register(new KeyMatchHelper(), "keyMatchHelper");
    inject(queryHelper);
    queryHelper.init();
}

Example #19

Source File: TestLucene.java From RedisDirectory with Apache License 2.0

6 votes

public void testRamDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("RamDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RamDirectory search consumes {}ms!", (end - start));
}

Example #20

Source File: TestLucene.java From RedisDirectory with Apache License 2.0

5 votes

public void
testRedisDirectoryWithRemoteJedisPool() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    JedisPool jedisPool = new JedisPool(new JedisPoolConfig(), "10.97.19.55", 6379, Constants.TIME_OUT);
    RedisDirectory redisDirectory = new RedisDirectory(new JedisPoolStream(jedisPool));
    IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
    for (int i = 0; i < 5000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    redisDirectory.close();
    long end = System.currentTimeMillis();
    log.error("RedisDirectoryWithJedisPool consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost",
            6379))));
    int total = 0;
    for (int i = 0; i < 1000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RedisDirectoryWithJedisPool search consumes {}ms!", (end - start));
}

Example #21

Source File: AliasBlurFilterCacheTest.java From incubator-retired-blur with Apache License 2.0

5 votes

@Test
public void testFetchPreFilterNotEmpty() throws IOException, BlurException, ParseException {
  BlurConfiguration configuration = new BlurConfiguration();
  configuration.set("blur.filter.alias.test.super:abc1", "(fam1.f1:abc1 fam2.f1:abc1)");
  configuration.set("blur.filter.alias.test.super:abc2", "(fam1.f1:abc2 fam2.f1:abc2)");
  configuration.set("blur.filter.alias.test.super:abc3", "(fam1.f1:abc3 fam2.f1:abc3)");
  AliasBlurFilterCache defaultBlurFilterCache = new AliasBlurFilterCache(configuration);

  TableDescriptor tableDescriptor = new TableDescriptor();
  tableDescriptor.setName(TABLE);
  tableDescriptor.setTableUri("file:///");
  final TableContext tableContext = TableContext.create(tableDescriptor);

  final BaseFieldManager fieldManager = getFieldManager(new WhitespaceAnalyzer(LUCENE_VERSION));

  Filter filter = QueryParserUtil.parseFilter(TABLE, TEST_FILTER, false, fieldManager, defaultBlurFilterCache,
      tableContext);
  Filter filterToRun = defaultBlurFilterCache.storePreFilter(TABLE, TEST_FILTER, filter, new FilterParser() {
    @Override
    public Query parse(String query) throws ParseException {
      return new SuperParser(LUCENE_VERSION, fieldManager, false, null, ScoreType.CONSTANT, tableContext
          .getDefaultPrimeDocTerm()).parse(query);
    }
  });
  assertNotNull(filterToRun);
  assertEquals("BooleanFilter(" + "FilterCache(super-abc1,QueryWrapperFilter(fam1.f1:abc1 fam2.f1:abc1)) "
      + "FilterCache(super-abc2,QueryWrapperFilter(fam1.f1:abc2 fam2.f1:abc2)) "
      + "FilterCache(super-abc3,QueryWrapperFilter(fam1.f1:abc3 fam2.f1:abc3))" + ")", filterToRun.toString());

  Filter fetchPreFilter = defaultBlurFilterCache.fetchPreFilter(TABLE, TEST_FILTER);
  assertNotNull(fetchPreFilter);

  assertTrue(filterToRun == fetchPreFilter);

}

Example #22

Source File: LuceneIndexFromTriples.java From semanticvectors with BSD 3-Clause "New" or "Revised" License

5 votes

/** Index all text files under a directory. */
public static void main(String[] args) {
  String usage = "java pitt.search.lucene.LuceneIndexFromTriples [triples text file] ";
  if (args.length == 0) {
    System.err.println("Usage: " + usage);
    System.exit(1);
  }
  FlagConfig flagConfig = FlagConfig.getFlagConfig(args);
  // Allow for the specification of a directory to write the index to.
  if (flagConfig.luceneindexpath().length() > 0) {
    INDEX_DIR = FileSystems.getDefault().getPath(flagConfig.luceneindexpath());
  }
  if (Files.exists(INDEX_DIR)) {
     throw new IllegalArgumentException(
         "Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
  }

  try {
    // Create IndexWriter using WhiteSpaceAnalyzer without any stopword list.
    IndexWriterConfig writerConfig = new IndexWriterConfig(new WhitespaceAnalyzer());
    IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), writerConfig);

    final File triplesTextFile = new File(args[0]);
    if (!triplesTextFile.exists() || !triplesTextFile.canRead()) {
      writer.close();
      throw new IOException("Document file '" + triplesTextFile.getAbsolutePath() +
          "' does not exist or is not readable, please check the path");
    }

    System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
    indexDoc(writer, triplesTextFile);
    writer.close();       
  } catch (IOException e) {
    System.out.println(" caught a " + e.getClass() +
        "\n with message: " + e.getMessage());
  }
}

Example #23

Source File: SimpleQueryConverter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Collection<Token> convert(String origQuery) {
  Collection<Token> result = new HashSet<>();

  try (WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); TokenStream ts = analyzer.tokenStream("", origQuery)) {
    // TODO: support custom attributes
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

    ts.reset();

    while (ts.incrementToken()) {
      Token tok = new Token();
      tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
      tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
      tok.setFlags(flagsAtt.getFlags());
      tok.setPayload(payloadAtt.getPayload());
      tok.setPositionIncrement(posIncAtt.getPositionIncrement());
      tok.setType(typeAtt.type());
      result.add(tok);
    }
    ts.end();      
    return result;
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

Example #24

Source File: KeywordFieldMapper.java From crate with Apache License 2.0

5 votes

@Override
public KeywordFieldMapper build(BuilderContext context) {
    setupFieldType(context);
    if (normalizerName != null) {
        NamedAnalyzer normalizer = indexAnalyzers.getNormalizer(normalizerName);
        if (normalizer == null) {
            throw new MapperParsingException("normalizer [" + normalizerName + "] not found for field [" + name + "]");
        }
        fieldType().setNormalizer(normalizer);
        final NamedAnalyzer searchAnalyzer;
        if (fieldType().splitQueriesOnWhitespace) {
            searchAnalyzer = indexAnalyzers.getWhitespaceNormalizer(normalizerName);
        } else {
            searchAnalyzer = normalizer;
        }
        fieldType().setSearchAnalyzer(searchAnalyzer);
    } else if (fieldType().splitQueriesOnWhitespace) {
        fieldType().setSearchAnalyzer(new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer()));
    }
    return new KeywordFieldMapper(
        name,
        position,
        defaultExpression,
        fieldType,
        defaultFieldType,
        ignoreAbove,
        lengthLimit,
        context.indexSettings(),
        multiFieldsBuilder.build(this, context),
        copyTo
    );
}

Example #25

Source File: TestLucene.java From RedisDirectory with Apache License 2.0

5 votes

public void testRedisDirectoryWithJedisPool() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    //indexWriterConfig.setInfoStream(System.out);
    //indexWriterConfig.setRAMBufferSizeMB(2048);
    //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy();
    //logByteSizeMergePolicy.setMinMergeMB(1);
    //logByteSizeMergePolicy.setMaxMergeMB(64);
    //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64);
    //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false);
    //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
    //获取连接等待时间
    //genericObjectPoolConfig.setMaxWaitMillis(3000);
    //10s超时时间
    JedisPool jedisPool = new JedisPool(new JedisPoolConfig(), "localhost", 6379, Constants.TIME_OUT);
    RedisDirectory redisDirectory = new RedisDirectory(new JedisPoolStream(jedisPool));
    IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    redisDirectory.close();
    long end = System.currentTimeMillis();
    log.error("RedisDirectoryWithJedisPool consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost",
            6379))));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RedisDirectoryWithJedisPool search consumes {}ms!", (end - start));
}

Example #26

Source File: SpellingQueryConverterTest.java From lucene-solr with Apache License 2.0

5 votes

@Test
@SuppressWarnings({"rawtypes"})
public void testNumeric() throws Exception {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());
  String[] queries = {"12345", "foo:12345", "12345 67890", "foo:(12345 67890)", "foo:(life 67890)", "12345 life",
      "+12345 +life", "-12345 life"};
  int[] tokensToExpect = {1, 1, 2, 2, 2, 2, 2, 2};
  for (int i = 0; i < queries.length; i++) {
    Collection<Token> tokens = converter.convert(queries[i]);
    assertTrue("tokens Size: " + tokens.size() + " is not: " + tokensToExpect[i], tokens.size() == tokensToExpect[i]);
  }
}

Example #27

Source File: SpellingQueryConverterTest.java From lucene-solr with Apache License 2.0

5 votes

@Test
@SuppressWarnings({"rawtypes"})
public void test() throws Exception {
  SpellingQueryConverter converter = new SpellingQueryConverter();
  converter.init(new NamedList());
  converter.setAnalyzer(new WhitespaceAnalyzer());
  Collection<Token> tokens = converter.convert("field:foo");
  assertTrue("tokens is null and it shouldn't be", tokens != null);
  assertTrue("tokens Size: " + tokens.size() + " is not: " + 1, tokens.size() == 1);
}

Example #28

Source File: TestLucene.java From RedisDirectory with Apache License 2.0

5 votes

public void testRedisDirectoryWithJedis() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    //indexWriterConfig.setInfoStream(System.out);
    //indexWriterConfig.setRAMBufferSizeMB(2048);
    //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy();
    //logByteSizeMergePolicy.setMinMergeMB(1);
    //logByteSizeMergePolicy.setMaxMergeMB(64);
    //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64);
    //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false);
    //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
    //获取连接等待时间
    //genericObjectPoolConfig.setMaxWaitMillis(3000);
    //10s超时时间
    RedisDirectory redisDirectory = new RedisDirectory(new JedisStream("localhost", 6379));
    IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    redisDirectory.close();
    long end = System.currentTimeMillis();
    log.error("RedisDirectoryWithJedis consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost",
            6379))));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RedisDirectoryWithJedis search consumes {}ms!", (end - start));
}

Example #29

Source File: DocMakerTest.java From lucene-solr with Apache License 2.0

5 votes

private void doTestIndexProperties(boolean setIndexProps,
    boolean indexPropsVal, int numExpectedResults) throws Exception {
  Properties props = new Properties();
  
  // Indexing configuration.
  props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
  props.setProperty("content.source", OneDocSource.class.getName());
  props.setProperty("directory", "ByteBuffersDirectory");
  if (setIndexProps) {
    props.setProperty("doc.index.props", Boolean.toString(indexPropsVal));
  }
  
  // Create PerfRunData
  Config config = new Config(props);
  PerfRunData runData = new PerfRunData(config);

  TaskSequence tasks = new TaskSequence(runData, getTestName(), null, false);
  tasks.addTask(new CreateIndexTask(runData));
  tasks.addTask(new AddDocTask(runData));
  tasks.addTask(new CloseIndexTask(runData));
  tasks.doLogic();
  
  IndexReader reader = DirectoryReader.open(runData.getDirectory());
  IndexSearcher searcher = newSearcher(reader);
  TopDocs td = searcher.search(new TermQuery(new Term("key", "value")), 10);
  assertEquals(numExpectedResults, td.totalHits.value);
  reader.close();
}

Example #30

Source File: TestLucene.java From RedisDirectory with Apache License 2.0

5 votes

public void testRedisDirectoryWithJedis() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    //indexWriterConfig.setInfoStream(System.out);
    //indexWriterConfig.setRAMBufferSizeMB(2048);
    //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy();
    //logByteSizeMergePolicy.setMinMergeMB(1);
    //logByteSizeMergePolicy.setMaxMergeMB(64);
    //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64);
    //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false);
    //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
    //获取连接等待时间
    //genericObjectPoolConfig.setMaxWaitMillis(3000);
    //10s超时时间
    RedisDirectory redisDirectory = new RedisDirectory(new JedisStream("localhost", 6379));
    IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    redisDirectory.close();
    long end = System.currentTimeMillis();
    log.error("RedisDirectoryWithJedis consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost",
            6379))));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RedisDirectoryWithJedis search consumes {}ms!", (end - start));
}