org.elasticsearch.analysis.common.CommonAnalysisPlugin Java Examples

The following examples show how to use org.elasticsearch.analysis.common.CommonAnalysisPlugin. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EsEmbeddedServer.java    From datashare with GNU Affero General Public License v3.0 6 votes vote down vote up
public EsEmbeddedServer(String clusterName, String homePath, String dataPath, String httpPort) {
    Settings settings = Settings.builder()
            .put("transport.type", "netty4")
            .put("http.type", "netty4")
            .put("path.home", homePath)
            .put("path.data", dataPath)
            .put("http.port", httpPort)
            .put("cluster.name", clusterName).build();

    node = new PluginConfigurableNode(settings, asList(
            Netty4Plugin.class,
            ParentJoinPlugin.class,
            CommonAnalysisPlugin.class,
            PainlessPlugin.class,
            ReindexPlugin.class
    ));
}
 
Example #2
Source File: UnstemmedGermanNormalizationTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
public void testSix() throws Exception {
    String source = "Programmieren in C++ für Einsteiger";
    String[] expected = {
            "programmieren",
            "programmi",
            "c++",
            "einsteiger",
            "einsteig"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("default");
    assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
 
Example #3
Source File: DecompoundTokenFilterTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
public void testWithSubwordsOnly() throws Exception {
    String source = "Das ist ein Schlüsselwort, ein Bindestrichwort";
    String[] expected = {
            "Da",
            "ist",
            "ein",
            "Schlüssel",
            "wort",
            "ein",
            "Bindestrich",
            "wort"
    };
    String resource = "keywords_analysis.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("with_subwords_only");
    assertNotNull(analyzer);
    assertTokenStreamContents(analyzer.tokenStream("test-field", source), expected);
}
 
Example #4
Source File: UnstemmedGermanNormalizationTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
public void testFour() throws Exception {
    String source = "Prante, Jörg";
    String[] expected = {
            "prante",
            "jorg"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("unstemmed");
    assertTokenStreamContents(analyzer.tokenStream("test", new StringReader(source)), expected);
}
 
Example #5
Source File: UnstemmedGermanNormalizationTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
public void testTwo() throws Exception {
    String source = "So wird's was: das Elasticsearch-Buch erscheint beim O'Reilly-Verlag.";
    String[] expected = {
            "wird's",
            "elasticsearch-buch",
            "elasticsearchbuch",
            "erscheint",
            "o'reilly-verlag",
            "o'reillyverlag"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("default");
    assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
 
Example #6
Source File: UnstemmedGermanNormalizationTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
public void testThree() throws Exception {
    String source = "978-1-4493-5854-9";
    String[] expected = {
         "978-1-4493-5854-9",
         "9781449358549"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("default");
    assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
 
Example #7
Source File: LocalNode.java    From core-ng-project with Apache License 2.0 5 votes vote down vote up
private static List<Class<? extends Plugin>> plugins() {
    return List.of(ReindexPlugin.class,
            Netty4Plugin.class,
            MapperExtrasPlugin.class,  // for scaled_float type
            PainlessPlugin.class,
            CommonAnalysisPlugin.class);  // for stemmer analysis
}
 
Example #8
Source File: CreateAnalyzerAnalyzerTest.java    From crate with Apache License 2.0 5 votes vote down vote up
@Before
public void prepare() throws IOException {
    e = SQLExecutor.builder(clusterService, 1, Randomness.get(), List.of(new CommonAnalysisPlugin()))
        .enableDefaultTables()
        .build();
    plannerContext = e.getPlannerContext(clusterService.state());
}
 
Example #9
Source File: UnstemmedGermanNormalizationTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testFive() throws Exception {
    String source = "Schroeder";
    String[] expected = {
            "schroder"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("unstemmed");
    assertTokenStreamContents(analyzer.tokenStream("test", new StringReader(source)), expected);
}
 
Example #10
Source File: UnstemmedGermanNormalizationTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testOne() throws Exception {
    String source = "Ein Tag in Köln im Café an der Straßenecke mit einer Standard-Nummer ISBN 1-4493-5854-3";
    String[] expected = {
            "tag",
            "koln",
            "cafe",
            "caf",
            "strassenecke",
            "strasseneck",
            "standard-nummer",
            "standardnummer",
            "standard-numm",
            "standardnumm",
            "isbn",
            "1-4493-5854-3",
            "1449358543",
            "978-1-4493-5854-9",
            "9781449358549"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("default");
    assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
 
Example #11
Source File: FstDecompoundTokenFilterTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testDecompound() throws Exception {

        String source = "Die Jahresfeier der Rechtsanwaltskanzleien auf dem Donaudampfschiff hat viel Ökosteuer gekostet";

        String[] expected = {
                "Die",
                "Jahresfeier",
                "jahres",
                "feier",
                "der",
                "Rechtsanwaltskanzleien",
                "rechts",
                "anwalts",
                "kanzleien",
                "auf",
                "dem",
                "Donaudampfschiff",
                "donau",
                "dampf",
                "schiff",
                "hat",
                "viel",
                "Ökosteuer",
                "ökos",
                "teuer",
                "gekostet"
        };

        Settings settings = Settings.builder()
                .put("index.analysis.analyzer.myanalyzer.type", "custom")
                .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
                .put("index.analysis.analyzer.myanalyzer.filter.0", "fst_decompound")
                .put("index.analysis.analyzer.myanalyzer.filter.1", "unique")
                .build();
        ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
                settings,
                new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
        Analyzer myanalyzer = analysis.indexAnalyzers.get("myanalyzer");
        assertAnalyzesTo(myanalyzer, source, expected);
    }
 
Example #12
Source File: DecompoundTokenFilterTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
public void test() throws Exception {

        String source = "Die Jahresfeier der Rechtsanwaltskanzleien auf dem Donaudampfschiff hat viel Ökosteuer gekostet";

        String[] expected = {
            "Die",
            "Die",
            "Jahresfeier",
            "Jahr",
            "feier",
            "der",
            "der",
            "Rechtsanwaltskanzleien",
            "Recht",
            "anwalt",
            "kanzlei",
            "auf",
            "auf",
            "dem",
            "dem",
            "Donaudampfschiff",
            "Donau",
            "dampf",
            "schiff",
            "hat",
            "hat",
            "viel",
            "viel",
            "Ökosteuer",
            "Ökosteuer",
            "gekostet",
            "gekosten"
        };
        String resource = "decompound_analysis.json";
        Settings settings = Settings.builder()
                .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
                .build();
        ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
                settings,
                new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("decomp");
        Tokenizer tokenizer = analysis.tokenizer.get("standard").create();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
 
Example #13
Source File: LemmatizeSearchTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
/** The plugin classes that should be added to the node. */
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
    return Arrays.asList(BundlePlugin.class, CommonAnalysisPlugin.class);
}
 
Example #14
Source File: LemmatizeTokenFilterTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
public void testLemmatizer() throws Exception {

        String source = "While these texts were previously only available to users of academic libraries " +
                "participating in the partnership, at the end of the first phase of EEBO-TCP the current " +
                "25,000 texts have now been released into the public domain.";
        String[] expected = {
                "While",
                "this",
                "text",
                "be",
                "previously",
                "only",
                "available",
                "to",
                "user",
                "of",
                "academic",
                "library",
                "participate",
                "in",
                "the",
                "partnership",
                "at",
                "end",
                "first",
                "phase",
                "EEBO",
                "TCP",
                "current",
                "25,000",
                "have",
                "now",
                "release",
                "into",
                "public",
                "domain"
        };
        Settings settings = Settings.builder()
                .put("index.analysis.analyzer.myanalyzer.type", "custom")
                .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
                .put("index.analysis.analyzer.myanalyzer.filter.0", "lemmatize")
                .put("index.analysis.analyzer.myanalyzer.filter.1", "unique")
                .build();
        ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
                settings,
                new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
        Analyzer myanalyzer = analysis.indexAnalyzers.get( "myanalyzer");
        assertAnalyzesTo(myanalyzer, source, expected);
    }
 
Example #15
Source File: LemmatizeTokenFilterTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
public void testFull() throws Exception {

        String source = "While these texts were previously only available to users of academic libraries " +
                "participating in the partnership, at the end of the first phase of EEBO-TCP the current " +
                "25,000 texts have now been released into the public domain.";
        String[] expected = {
                "While",
                "these",
                "this",
                "texts",
                "text",
                "were",
                "be",
                "previously",
                "only",
                "available",
                "to",
                "users",
                "user",
                "of",
                "academic",
                "libraries",
                "library",
                "participating",
                "participate",
                "in",
                "the",
                "partnership",
                "at",
                "end",
                "first",
                "phase",
                "EEBO",
                "TCP",
                "current",
                "25,000",
                "have",
                "now",
                "been",
                "released",
                "release",
                "into",
                "public",
                "domain"
        };

        Settings settings = Settings.builder()
                .put("index.analysis.filter.myfilter.type", "lemmatize")
                .put("index.analysis.filter.myfilter.lemma_only", "false")
                .put("index.analysis.analyzer.myanalyzer.type", "custom")
                .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
                .put("index.analysis.analyzer.myanalyzer.filter.0", "myfilter")
                .put("index.analysis.analyzer.myanalyzer.filter.1", "unique")
                .build();
        ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
                settings,
                new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
        Analyzer myanalyzer =analysis.indexAnalyzers.get("myanalyzer");
        assertAnalyzesTo(myanalyzer, source, expected);
    }
 
Example #16
Source File: LemmatizeTokenFilterTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
public void testGermanLemmatizer() throws Exception {

        String source = "Die Würde des Menschen ist unantastbar. " +
                "Sie zu achten und zu schützen ist Verpflichtung aller staatlichen Gewalt. " +
                "Das Deutsche Volk bekennt sich darum zu unverletzlichen und unveräußerlichen Menschenrechten " +
                "als Grundlage jeder menschlichen Gemeinschaft, des Friedens und der Gerechtigkeit in der Welt.";
        String[] expected = {
                "Die",
                "Würde",
                "der",
                "Mensch",
                "mein",  // ?
                "unantastbar",
                "Sie",
                "zu",
                "achten",
                "und",
                "zu",
                "schützen",
                "mein",  // ?
                "Verpflichtung",
                "all",
                "staatlich",
                "Gewalt",
                "Das",
                "deutsch",
                "Volk",
                "bekennen",
                "sich",
                "darum",
                "zu",
                "unverletzlichen", // ?
                "und",
                "unveräußerlichen", // ?
                "Menschenrechten", // ?
                "als",
                "Grundlage",
                "jed",
                "menschlich",
                "Gemeinschaft",
                "der",
                "Friede",
                "und",
                "der",
                "Gerechtigkeit",
                "in",
                "der",
                "Welt"
        };
        Settings settings = Settings.builder()
                .put("index.analysis.filter.myfilter.type", "lemmatize")
                .put("index.analysis.filter.myfilter.language", "de")
                .put("index.analysis.analyzer.myanalyzer.type", "custom")
                .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
                .put("index.analysis.analyzer.myanalyzer.filter.0", "myfilter")
                .build();
        ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
                settings,
                new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
        Analyzer myanalyzer = analysis.indexAnalyzers.get("myanalyzer");
        assertAnalyzesTo(myanalyzer, source, expected);
    }
 
Example #17
Source File: GNDReferenceMappingTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
    return Arrays.asList(BundlePlugin.class, CommonAnalysisPlugin.class);
}
 
Example #18
Source File: DecompoundQueryTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
    return Arrays.asList(CommonAnalysisPlugin.class, Netty4Plugin.class, BundlePlugin.class);
}
 
Example #19
Source File: FulltextAnalyzerResolverTest.java    From crate with Apache License 2.0 4 votes vote down vote up
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
    var plugins = new ArrayList<>(super.nodePlugins());
    plugins.add(CommonAnalysisPlugin.class);
    return plugins;
}
 
Example #20
Source File: FulltextITest.java    From crate with Apache License 2.0 4 votes vote down vote up
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
    var plugins = new ArrayList<>(super.nodePlugins());
    plugins.add(CommonAnalysisPlugin.class);
    return plugins;
}
 
Example #21
Source File: CommonAnalyzerITest.java    From crate with Apache License 2.0 4 votes vote down vote up
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
    var plugins = new ArrayList<>(super.nodePlugins());
    plugins.add(CommonAnalysisPlugin.class);
    return plugins;
}