Java Code Examples for org.apache.solr.schema.FieldType#getIndexAnalyzer()

The following examples show how to use org.apache.solr.schema.FieldType#getIndexAnalyzer() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LanguagePrefixedTokenStream.java    From SearchServices with GNU Lesser General Public License v3.0 6 votes vote down vote up
/**
 * Returns the {@link Analyzer} associated with the given language.
 * The proper {@link Analyzer} is retrieved from the first field type not null in the following list:
 *
 * <ul>
 *     <li>highlighted_text_ + locale (e.g. highlighted_text_en)</li>
 *     <li>text_ + locale (e.g. text_en)</li>
 *     <li>text___ (text general field)</li>
 * </ul>
 *
 * @param language the language code.
 * @return the {@link Analyzer} associated with the given language.
 */
Analyzer analyzer(String language) {
    FieldType localisedFieldType =
            ofNullable(indexSchema.getFieldTypeByName(highlightingFieldTypeName(language)))
                    .orElseGet(() -> indexSchema.getFieldTypeByName(localisedFieldTypeName(language)));

    FieldType targetFieldType =
            ofNullable(localisedFieldType)
                    .orElseGet(() ->  indexSchema.getFieldTypeByName(FALLBACK_TEXT_FIELD_TYPE_NAME));
    switch (mode)
    {
        case QUERY:
            return targetFieldType.getQueryAnalyzer();
        case INDEX:
        default:
            return targetFieldType.getIndexAnalyzer();
    }
}
 
Example 2
Source File: SolrQueryParserBase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) {
  if (leadingWildcards == null) leadingWildcards = new HashMap<>();
  ReversedWildcardFilterFactory fac = leadingWildcards.get(fieldType);
  if (fac != null || leadingWildcards.containsKey(fieldType)) {
    return fac;
  }

  Analyzer a = fieldType.getIndexAnalyzer();
  if (a instanceof TokenizerChain) {
    // examine the indexing analysis chain if it supports leading wildcards
    TokenizerChain tc = (TokenizerChain)a;
    TokenFilterFactory[] factories = tc.getTokenFilterFactories();
    for (TokenFilterFactory factory : factories) {
      if (factory instanceof ReversedWildcardFilterFactory) {
        fac = (ReversedWildcardFilterFactory)factory;
        break;
      }
    }
  }

  leadingWildcards.put(fieldType, fac);
  return fac;
}
 
Example 3
Source File: PayloadUtils.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public static String getPayloadEncoder(FieldType fieldType) {
  // TODO: support custom payload encoding fields too somehow - maybe someone has a custom component that encodes payloads as floats
  String encoder = null;
  Analyzer a = fieldType.getIndexAnalyzer();
  if (a instanceof TokenizerChain) {
    // examine the indexing analysis chain for DelimitedPayloadTokenFilterFactory or NumericPayloadTokenFilterFactory
    TokenizerChain tc = (TokenizerChain)a;
    TokenFilterFactory[] factories = tc.getTokenFilterFactories();
    for (TokenFilterFactory factory : factories) {
      if (factory instanceof DelimitedPayloadTokenFilterFactory) {
        encoder = factory.getOriginalArgs().get(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR);
        break;
      }

      if (factory instanceof NumericPayloadTokenFilterFactory) {
        // encodes using `PayloadHelper.encodeFloat(payload)`
        encoder = "float";
        break;
      }
    }
  }

  return encoder;
}
 
Example 4
Source File: FreeTextLookupFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
  Object fieldTypeName = params.get(QUERY_ANALYZER);
  if (fieldTypeName == null) {
    throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
  }
  FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
  if (ft == null) {
    throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
  }
  
  Analyzer indexAnalyzer = ft.getIndexAnalyzer();
  Analyzer queryAnalyzer = ft.getQueryAnalyzer();
  
  int grams = (params.get(NGRAMS) != null) 
      ? Integer.parseInt(params.get(NGRAMS).toString()) 
      : FreeTextSuggester.DEFAULT_GRAMS;
  
  byte separator = (params.get(SEPARATOR) != null) 
      ? params.get(SEPARATOR).toString().getBytes(StandardCharsets.UTF_8)[0]
      : FreeTextSuggester.DEFAULT_SEPARATOR;
  
  return new FreeTextSuggester(indexAnalyzer, queryAnalyzer, grams, separator);
}
 
Example 5
Source File: TokenizeTextBuilder.java    From kite with Apache License 2.0 6 votes vote down vote up
public TokenizeText(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
  super(builder, config, parent, child, context);
  this.inputFieldName = getConfigs().getString(config, "inputField");
  this.outputFieldName = getConfigs().getString(config, "outputField");      
  String solrFieldType = getConfigs().getString(config, "solrFieldType");      
  Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
  SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
  LOG.debug("solrLocator: {}", locator);
  IndexSchema schema = locator.getIndexSchema();
  FieldType fieldType = schema.getFieldTypeByName(solrFieldType);
  if (fieldType == null) {
    throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config);
  }
  this.analyzer = fieldType.getIndexAnalyzer();
  Preconditions.checkNotNull(analyzer);
  // register CharTermAttribute for later (implicit) reuse
  this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class);
  Preconditions.checkNotNull(token);
  validateArguments();
}
 
Example 6
Source File: MLAnalayser.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
private Analyzer selectAnalyzer(FieldType fieldType) {
	 if(mode == Mode.INDEX)
	 {
		 return fieldType.getIndexAnalyzer();
	 }
	 else if(mode == Mode.QUERY)
	 {
		 return fieldType.getQueryAnalyzer();
	 }
	 else
	 {
		 return null;
	 }
}
 
Example 7
Source File: TaggerRequestHandler.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) {
  FieldType fieldType = req.getSchema().getFieldType(field);
  Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer
  if (analyzer instanceof TokenizerChain) {
    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories();
    for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
      if (tokenFilterFactory instanceof StopFilterFactory)
        return true;
    }
  }
  return false;
}
 
Example 8
Source File: TestLuceneMatchVersion.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testStandardTokenizerVersions() throws Exception {
  assertEquals(DEFAULT_VERSION, solrConfig.luceneMatchVersion);
  
  final IndexSchema schema = h.getCore().getLatestSchema();
  
  FieldType type = schema.getFieldType("textDefault");
  TokenizerChain ana = (TokenizerChain) type.getIndexAnalyzer();
  assertEquals(DEFAULT_VERSION, (ana.getTokenizerFactory()).getLuceneMatchVersion());
  assertEquals(DEFAULT_VERSION, (ana.getTokenFilterFactories()[2]).getLuceneMatchVersion());

  type = schema.getFieldType("textTurkishAnalyzerDefault");
  Analyzer ana1 = type.getIndexAnalyzer();
  assertTrue(ana1 instanceof TurkishAnalyzer);
  assertEquals(DEFAULT_VERSION, ana1.getVersion());
}
 
Example 9
Source File: MMSegTokenizerFactoryTest.java    From mmseg4j-solr with Apache License 2.0 5 votes vote down vote up
private Dictionary getDictionaryByFieldType(String fieldTypeName) {
	FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(fieldTypeName);
	Analyzer a = ft.getIndexAnalyzer();
	Assert.assertEquals(a.getClass(), TokenizerChain.class);
	
	TokenizerChain tc = (TokenizerChain) a;
	TokenizerFactory tf = tc.getTokenizerFactory();
	Assert.assertEquals(tf.getClass(), MMSegTokenizerFactory.class);
	
	MMSegTokenizerFactory mtf = (MMSegTokenizerFactory) tf;
	
	Assert.assertNotNull(mtf.dic);
	return mtf.dic;
}
 
Example 10
Source File: TaggerRequestHandler.java    From SolrTextTagger with Apache License 2.0 5 votes vote down vote up
private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) {
  FieldType fieldType = req.getSchema().getFieldType(field);
  Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer
  if (analyzer instanceof TokenizerChain) {
    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories();
    for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
      if (tokenFilterFactory instanceof StopFilterFactory)
        return true;
    }
  }
  return false;
}
 
Example 11
Source File: FuzzyLookupFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
  
  // mandatory parameter
  Object fieldTypeName = params.get(AnalyzingLookupFactory.QUERY_ANALYZER);
  if (fieldTypeName == null) {
    throw new IllegalArgumentException("Error in configuration: " + AnalyzingLookupFactory.QUERY_ANALYZER + " parameter is mandatory");
  }
  // retrieve index and query analyzers for the field
  FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
  if (ft == null) {
    throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
  }
  Analyzer indexAnalyzer = ft.getIndexAnalyzer();
  Analyzer queryAnalyzer = ft.getQueryAnalyzer();
  
  // optional parameters
  boolean exactMatchFirst = (params.get(AnalyzingLookupFactory.EXACT_MATCH_FIRST) != null)
  ? Boolean.valueOf(params.get(AnalyzingLookupFactory.EXACT_MATCH_FIRST).toString())
  : true;
      
  boolean preserveSep = (params.get(AnalyzingLookupFactory.PRESERVE_SEP) != null)
  ? Boolean.valueOf(params.get(AnalyzingLookupFactory.PRESERVE_SEP).toString())
  : true;
      
  int options = 0;
  if (exactMatchFirst) {
    options |= FuzzySuggester.EXACT_FIRST;
  }
  if (preserveSep) {
    options |= FuzzySuggester.PRESERVE_SEP;
  }
  
  int maxSurfaceFormsPerAnalyzedForm = (params.get(AnalyzingLookupFactory.MAX_SURFACE_FORMS) != null)
  ? Integer.parseInt(params.get(AnalyzingLookupFactory.MAX_SURFACE_FORMS).toString())
  : 256;
      
  int maxGraphExpansions = (params.get(AnalyzingLookupFactory.MAX_EXPANSIONS) != null)
  ? Integer.parseInt(params.get(AnalyzingLookupFactory.MAX_EXPANSIONS).toString())
  : -1;

  boolean preservePositionIncrements = params.get(AnalyzingLookupFactory.PRESERVE_POSITION_INCREMENTS) != null
  ? Boolean.valueOf(params.get(AnalyzingLookupFactory.PRESERVE_POSITION_INCREMENTS).toString())
  : false;
  
  int maxEdits = (params.get(MAX_EDITS) != null)
  ? Integer.parseInt(params.get(MAX_EDITS).toString())
  : FuzzySuggester.DEFAULT_MAX_EDITS;
  
  boolean transpositions = (params.get(TRANSPOSITIONS) != null)
  ? Boolean.parseBoolean(params.get(TRANSPOSITIONS).toString())
  : FuzzySuggester.DEFAULT_TRANSPOSITIONS;
      
  int nonFuzzyPrefix = (params.get(NON_FUZZY_PREFIX) != null)
  ? Integer.parseInt(params.get(NON_FUZZY_PREFIX).toString())
  :FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX;
  
  
  int minFuzzyLength = (params.get(MIN_FUZZY_LENGTH) != null)
  ? Integer.parseInt(params.get(MIN_FUZZY_LENGTH).toString())
  :FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH;
  
  boolean unicodeAware = (params.get(UNICODE_AWARE) != null)
  ? Boolean.valueOf(params.get(UNICODE_AWARE).toString())
  : FuzzySuggester.DEFAULT_UNICODE_AWARE;
  
  return new FuzzySuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm,
      maxGraphExpansions, preservePositionIncrements, maxEdits, transpositions, nonFuzzyPrefix,
      minFuzzyLength, unicodeAware);
}
 
Example 12
Source File: AnalyzingLookupFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
  // mandatory parameter
  Object fieldTypeName = params.get(QUERY_ANALYZER);
  if (fieldTypeName == null) {
    throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
  }
  FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
  if (ft == null) {
    throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
  }
  
  Analyzer indexAnalyzer = ft.getIndexAnalyzer();
  Analyzer queryAnalyzer = ft.getQueryAnalyzer();
  
  // optional parameters
  
  boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null
  ? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString())
  : true;
  
  boolean preserveSep = params.get(PRESERVE_SEP) != null
  ? Boolean.valueOf(params.get(PRESERVE_SEP).toString())
  : true;
  
  int flags = 0;
  if (exactMatchFirst) {
    flags |= AnalyzingSuggester.EXACT_FIRST;
  }
  if (preserveSep) {
    flags |= AnalyzingSuggester.PRESERVE_SEP;
  }
  
  int maxSurfaceFormsPerAnalyzedForm = params.get(MAX_SURFACE_FORMS) != null
  ? Integer.parseInt(params.get(MAX_SURFACE_FORMS).toString())
  : 256;
  
  int maxGraphExpansions = params.get(MAX_EXPANSIONS) != null
  ? Integer.parseInt(params.get(MAX_EXPANSIONS).toString())
  : -1;
  
  boolean preservePositionIncrements = params.get(PRESERVE_POSITION_INCREMENTS) != null
  ? Boolean.valueOf(params.get(PRESERVE_POSITION_INCREMENTS).toString())
  : false;

  return new AnalyzingSuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm,
      maxGraphExpansions, preservePositionIncrements);
}