/* * Copyright 2013 Jun Ohtani * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package info.johtani.elasticsearch.indices.extended.analyze; import info.johtani.elasticsearch.action.admin.indices.extended.analyze.ExtendedAnalyzeAction; import info.johtani.elasticsearch.action.admin.indices.extended.analyze.ExtendedAnalyzeRequest; import info.johtani.elasticsearch.action.admin.indices.extended.analyze.ExtendedAnalyzeRequestBuilder; import info.johtani.elasticsearch.action.admin.indices.extended.analyze.ExtendedAnalyzeResponse; import info.johtani.elasticsearch.plugin.extended.analyze.ExtendedAnalyzePlugin; import info.johtani.elasticsearch.rest.action.admin.indices.analyze.RestExtendedAnalyzeAction; import org.elasticsearch.client.Client; import org.elasticsearch.client.IndicesAdminClient; import org.elasticsearch.common.Priority; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.node.Node; import org.hamcrest.core.IsNull; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.util.Map; import static org.elasticsearch.common.settings.Settings.*; import static org.elasticsearch.node.NodeBuilder.*; import static org.hamcrest.MatcherAssert.*; import static org.hamcrest.Matchers.*; import static org.junit.Assert.fail; /** * TODO : currently, simple test only. */ public class ExtendedAnalyzeActionTests { private Node node; @Before public void setupServer() { node = nodeBuilder().settings(settingsBuilder() .put("path.home", "target") .put("path.data", "target/data") .put("cluster.name", "test-cluster-extended-analyze") .put("index.analysis.char_filter.my_mapping.type", "mapping") .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F") .put("index.analysis.analyzer.test_analyzer.type", "custom") .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100") .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard") .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping") .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball") .put("plugin.types", ExtendedAnalyzePlugin.class.getName()) ).node(); } @After public void closeServer() { node.close(); } @Test public void analyzeUsingAnalyzerWithNoIndex() throws Exception { ExtendedAnalyzeResponse analyzeResponse = prepareAnalyze(node.client().admin().indices(), "THIS IS A TEST").setAnalyzer("simple").execute().actionGet(); assertThat(analyzeResponse.tokenizer(), IsNull.nullValue()); assertThat(analyzeResponse.tokenfilters(), IsNull.nullValue()); assertThat(analyzeResponse.charfilters(), IsNull.nullValue()); assertThat(analyzeResponse.analyzer().getName(), equalTo("simple")); assertThat(analyzeResponse.analyzer().getTokens().size(), equalTo(4)); } @Test public void analyzeUsingCustomAnalyzerWithNoIndex() throws Exception { ExtendedAnalyzeResponse analyzeResponse = prepareAnalyze(node.client().admin().indices(), "THIS IS A TEST").setCharFilters("html_strip").setTokenizer("keyword").setTokenFilters("lowercase").execute().actionGet(); assertThat(analyzeResponse.analyzer(), IsNull.nullValue()); //charfilters // global charfilter is not change text. assertThat(analyzeResponse.charfilters().size(), equalTo(1)); assertThat(analyzeResponse.charfilters().get(0).getName(), equalTo("html_strip")); assertThat(analyzeResponse.charfilters().get(0).getTexts().size(), equalTo(1)); assertThat(analyzeResponse.charfilters().get(0).getTexts().get(0), equalTo("THIS IS A TEST")); //tokenizer assertThat(analyzeResponse.tokenizer().getName(), equalTo("keyword")); assertThat(analyzeResponse.tokenizer().getTokens().size(), equalTo(1)); assertThat(analyzeResponse.tokenizer().getTokens().get(0).getTerm(), equalTo("THIS IS A TEST")); //tokenfilters assertThat(analyzeResponse.tokenfilters().size(), equalTo(1)); assertThat(analyzeResponse.tokenfilters().get(0).getName(), equalTo("lowercase")); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().size(), equalTo(1)); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(0).getTerm(), equalTo("this is a test")); //check other attributes analyzeResponse = prepareAnalyze(node.client().admin().indices(), "This is troubled").setTokenizer("standard").setTokenFilters("snowball").execute().actionGet(); assertThat(analyzeResponse.tokenfilters().size(), equalTo(1)); assertThat(analyzeResponse.tokenfilters().get(0).getName(), equalTo("snowball")); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().size(), equalTo(3)); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(2).getTerm(), equalTo("troubl")); String[] expectedAttributesKey = { "org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute#bytes", "org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength", "org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword"}; assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(2).getExtendedAttributes().size(), equalTo(expectedAttributesKey.length)); Map<String, Object> extendedAttribute = null; for (int i = 0; i < expectedAttributesKey.length; i++) { String attClassName = expectedAttributesKey[i].substring(0, expectedAttributesKey[i].indexOf("#")); String key = expectedAttributesKey[i].substring(expectedAttributesKey[i].indexOf("#") + 1); extendedAttribute = analyzeResponse.tokenfilters().get(0).getTokens().get(2).getExtendedAttributes().get(attClassName); assertThat(extendedAttribute, notNullValue()); assertThat(extendedAttribute.size(), equalTo(1)); assertThat(extendedAttribute.containsKey(key), equalTo(true)); } } @Test public void analyzeSpecifyAttributes() throws Exception { ExtendedAnalyzeResponse analyzeResponse = prepareAnalyze(node.client().admin().indices(), "This is troubled") .setTokenizer("standard").setTokenFilters("snowball").setAttributes("KeywordAttribute").execute().actionGet(); assertThat(analyzeResponse.tokenfilters().size(), equalTo(1)); assertThat(analyzeResponse.tokenfilters().get(0).getName(), equalTo("snowball")); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().size(), equalTo(3)); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(2).getTerm(), equalTo("troubl")); String[] expectedAttributesKey = { "org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword"}; assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(2).getExtendedAttributes().size(), equalTo(expectedAttributesKey.length)); Map<String, Object> extendedAttribute = null; for (int i = 0; i < expectedAttributesKey.length; i++) { String attClassName = expectedAttributesKey[i].substring(0, expectedAttributesKey[i].indexOf("#")); String key = expectedAttributesKey[i].substring(expectedAttributesKey[i].indexOf("#") + 1); extendedAttribute = analyzeResponse.tokenfilters().get(0).getTokens().get(2).getExtendedAttributes().get(attClassName); assertThat(extendedAttribute, notNullValue()); assertThat(extendedAttribute.size(), equalTo(1)); assertThat(extendedAttribute.containsKey(key), equalTo(true)); } } private ExtendedAnalyzeRequestBuilder prepareAnalyzeNoText(IndicesAdminClient client, String index) { return new ExtendedAnalyzeRequestBuilder(client, ExtendedAnalyzeAction.INSTANCE, index); } private ExtendedAnalyzeRequestBuilder prepareAnalyze(IndicesAdminClient client, String text) { return new ExtendedAnalyzeRequestBuilder(client, ExtendedAnalyzeAction.INSTANCE, null, text); } private ExtendedAnalyzeRequestBuilder prepareAnalyze(IndicesAdminClient client, String index, String text) { return new ExtendedAnalyzeRequestBuilder(client, ExtendedAnalyzeAction.INSTANCE, index, text); } private Client client() { return node.client(); } @Test public void simpleAnalyzerTests() throws Exception { try { client().admin().indices().prepareDelete("test").execute().actionGet(); } catch (Exception e) { // ignore } client().admin().indices().prepareCreate("test").execute().actionGet(); client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().execute().actionGet(); for (int i = 0; i < 10; i++) { ExtendedAnalyzeResponse analyzeResponse = prepareAnalyze(client().admin().indices(), "test", "THIS IS A PHISH").setCharFilters("my_mapping").setTokenizer("keyword").setTokenFilters("lowercase").execute().actionGet(); assertThat(analyzeResponse.analyzer(), IsNull.nullValue()); //charfilters // global charfilter is not change text. assertThat(analyzeResponse.charfilters().size(), equalTo(1)); assertThat(analyzeResponse.charfilters().get(0).getName(), equalTo("my_mapping")); assertThat(analyzeResponse.charfilters().get(0).getTexts().size(), equalTo(1)); assertThat(analyzeResponse.charfilters().get(0).getTexts().get(0), equalTo("THIS IS A FISH")); //tokenizer assertThat(analyzeResponse.tokenizer().getName(), equalTo("keyword")); assertThat(analyzeResponse.tokenizer().getTokens().size(), equalTo(1)); assertThat(analyzeResponse.tokenizer().getTokens().get(0).getTerm(), equalTo("THIS IS A FISH")); assertThat(analyzeResponse.tokenizer().getTokens().get(0).getStartOffset(), equalTo(0)); assertThat(analyzeResponse.tokenizer().getTokens().get(0).getEndOffset(), equalTo(15)); //tokenfilters assertThat(analyzeResponse.tokenfilters().size(), equalTo(1)); assertThat(analyzeResponse.tokenfilters().get(0).getName(), equalTo("lowercase")); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().size(), equalTo(1)); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(0).getTerm(), equalTo("this is a fish")); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(0).getPosition(), equalTo(0)); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(0).getStartOffset(), equalTo(0)); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(0).getEndOffset(), equalTo(15)); } } @Test public void analyzeSpecifyAttributesWithShortName() throws Exception { ExtendedAnalyzeResponse analyzeResponse = prepareAnalyze(node.client().admin().indices(), "This is troubled") .setTokenizer("standard").setTokenFilters("snowball").setAttributes("KeywordAttribute").setShortAttributeName(true).execute().actionGet(); assertThat(analyzeResponse.tokenfilters().size(), equalTo(1)); assertThat(analyzeResponse.tokenfilters().get(0).getName(), equalTo("snowball")); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().size(), equalTo(3)); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(2).getTerm(), equalTo("troubl")); String[] expectedAttributesKey = { "KeywordAttribute#keyword"}; assertThat(analyzeResponse.tokenfilters().get(0).getTokens().get(2).getExtendedAttributes().size(), equalTo(expectedAttributesKey.length)); Map<String, Object> extendedAttribute = null; for (int i = 0; i < expectedAttributesKey.length; i++) { String attClassName = expectedAttributesKey[i].substring(0, expectedAttributesKey[i].indexOf("#")); String key = expectedAttributesKey[i].substring(expectedAttributesKey[i].indexOf("#") + 1); extendedAttribute = analyzeResponse.tokenfilters().get(0).getTokens().get(2).getExtendedAttributes().get(attClassName); assertThat(extendedAttribute, notNullValue()); assertThat(extendedAttribute.size(), equalTo(1)); assertThat(extendedAttribute.containsKey(key), equalTo(true)); } } @Test public void testParseXContentForExtendedAnalyzeReuqest() throws Exception { BytesReference content = XContentFactory.jsonBuilder() .startObject() .field("text", "THIS IS A TEST") .field("tokenizer", "keyword") .array("filters", "lowercase") .endObject().bytes(); ExtendedAnalyzeRequest analyzeRequest = new ExtendedAnalyzeRequest("for test"); RestExtendedAnalyzeAction.buildFromContent(content, analyzeRequest); assertThat(analyzeRequest.text()[0], equalTo("THIS IS A TEST")); assertThat(analyzeRequest.tokenizer(), equalTo("keyword")); assertThat(analyzeRequest.tokenFilters(), equalTo(new String[]{"lowercase"})); } @Test public void testParseXContentForExtendedAnalyzeRequestWithInvalidJsonThrowsException() throws Exception { ExtendedAnalyzeRequest analyzeRequest = new ExtendedAnalyzeRequest("for test"); BytesReference invalidContent = XContentFactory.jsonBuilder().startObject().value("invalid_json").endObject().bytes(); try { RestExtendedAnalyzeAction.buildFromContent(invalidContent, analyzeRequest); fail("shouldn't get here"); } catch (Exception e) { assertThat(e, instanceOf(IllegalArgumentException.class)); assertThat(e.getMessage(), equalTo("Failed to parse request body")); } } @Test public void testParseXContentForExtendedAnalyzeRequestWithUnknownParamThrowsException() throws Exception { ExtendedAnalyzeRequest analyzeRequest = new ExtendedAnalyzeRequest("for test"); BytesReference invalidContent =XContentFactory.jsonBuilder() .startObject() .field("text", "THIS IS A TEST") .field("unknown", "keyword") .endObject().bytes(); try { RestExtendedAnalyzeAction.buildFromContent(invalidContent, analyzeRequest); fail("shouldn't get here"); } catch (Exception e) { assertThat(e, instanceOf(IllegalArgumentException.class)); assertThat(e.getMessage(), startsWith("Unknown parameter [unknown]")); } } @Test public void analyzeWithMultiValues() throws Exception { try { client().admin().indices().prepareDelete("test2").execute().actionGet(); } catch (Exception e) { // ignore } //only analyzer = client().admin().indices().prepareCreate("test2").get(); client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().execute().actionGet(); client().admin().indices().preparePutMapping("test2") .setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get(); String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"}; ExtendedAnalyzeResponse analyzeResponse = prepareAnalyzeNoText(node.client().admin().indices(), "test2") .setField("simple").setShortAttributeName(true).setText(texts).execute().get(); assertThat(analyzeResponse.analyzer().getName(), equalTo("simple")); assertThat(analyzeResponse.analyzer().getTokens().size(), equalTo(7)); ExtendedAnalyzeResponse.ExtendedAnalyzeToken token = analyzeResponse.analyzer().getTokens().get(3); assertThat(token.getTerm(), equalTo("test")); assertThat(token.getPosition(), equalTo(3)); assertThat(token.getStartOffset(), equalTo(10)); assertThat(token.getEndOffset(), equalTo(14)); token = analyzeResponse.analyzer().getTokens().get(5); assertThat(token.getTerm(), equalTo("second")); assertThat(token.getPosition(), equalTo(105)); assertThat(token.getStartOffset(), equalTo(19)); assertThat(token.getEndOffset(), equalTo(25)); } @Test public void analyzeWithMultiValuesWithCustomAnalyzer() throws Exception { try { client().admin().indices().prepareDelete("test").execute().actionGet(); } catch (Exception e) { // ignore } client().admin().indices().prepareCreate("test").execute().actionGet(); client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().execute().actionGet(); //only analyzer = String[] texts = new String[]{"this is a PHISH", "the troubled text"}; ExtendedAnalyzeResponse analyzeResponse = prepareAnalyzeNoText(node.client().admin().indices(), "test") .setAnalyzer("test_analyzer").setShortAttributeName(true).setText(texts).execute().get(); // charfilter assertThat(analyzeResponse.charfilters().size(), equalTo(1)); assertThat(analyzeResponse.charfilters().get(0).getName(), equalTo("my_mapping")); assertThat(analyzeResponse.charfilters().get(0).getTexts().size(), equalTo(2)); assertThat(analyzeResponse.charfilters().get(0).getTexts().get(0), equalTo("this is a FISH")); assertThat(analyzeResponse.charfilters().get(0).getTexts().get(1), equalTo("the troubled text")); // tokenizer assertThat(analyzeResponse.tokenizer().getName(), equalTo("standard")); assertThat(analyzeResponse.tokenizer().getTokens().size(), equalTo(7)); ExtendedAnalyzeResponse.ExtendedAnalyzeToken token = analyzeResponse.tokenizer().getTokens().get(3); assertThat(token.getTerm(), equalTo("FISH")); assertThat(token.getPosition(), equalTo(3)); assertThat(token.getStartOffset(), equalTo(10)); assertThat(token.getEndOffset(), equalTo(15)); token = analyzeResponse.tokenizer().getTokens().get(5); assertThat(token.getTerm(), equalTo("troubled")); assertThat(token.getPosition(), equalTo(105)); assertThat(token.getStartOffset(), equalTo(20)); assertThat(token.getEndOffset(), equalTo(28)); // tokenfilter assertThat(analyzeResponse.tokenfilters().size(), equalTo(1)); assertThat(analyzeResponse.tokenfilters().get(0).getName(), equalTo("snowball")); assertThat(analyzeResponse.tokenfilters().get(0).getTokens().size(), equalTo(7)); token = analyzeResponse.tokenfilters().get(0).getTokens().get(3); assertThat(token.getTerm(), equalTo("FISH")); assertThat(token.getPosition(), equalTo(3)); assertThat(token.getStartOffset(), equalTo(10)); assertThat(token.getEndOffset(), equalTo(15)); token = analyzeResponse.tokenfilters().get(0).getTokens().get(5); assertThat(token.getTerm(), equalTo("troubl")); assertThat(token.getPosition(), equalTo(105)); assertThat(token.getStartOffset(), equalTo(20)); assertThat(token.getEndOffset(), equalTo(28)); } }