package mtas.analysis.util; import mtas.analysis.MtasTokenizer; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; import java.io.IOException; import java.util.HashMap; import java.util.Map; /** * A factory for creating MtasTokenizer objects. */ public class MtasTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware { /** The Constant log. */ private static final Log log = LogFactory.getLog(MtasTokenizerFactory.class); /** The Constant ARGUMENT_CONFIGFILE. */ public static final String ARGUMENT_CONFIGFILE = "configFile"; /** The Constant ARGUMENT_CONFIG. */ public static final String ARGUMENT_CONFIG = "config"; /** The Constant ARGUMENT_ANALYZER. */ public static final String ARGUMENT_PARSER = "parser"; /** The Constant ARGUMENT_PARSER_ARGS. */ public static final String ARGUMENT_PARSER_ARGS = "parserArgs"; /** The Constant ARGUMENT_DEFAULT. */ public static final String ARGUMENT_DEFAULT = "default"; /** The config argument. */ private String configArgument; /** The default argument. */ private String defaultArgument; /** The config file argument. */ private String configFileArgument; /** The analyzer argument. */ private String analyzerArgument; /** The parser arguments. */ private String analyzerArgumentParserArgs; /** The configs. */ private HashMap<String, MtasConfiguration> configs = null; /** The config. */ private MtasConfiguration config = null; /** * Instantiates a new mtas tokenizer factory. * * @param args the args * @throws IOException Signals that an I/O exception has occurred. */ public MtasTokenizerFactory(Map<String, String> args) throws IOException { this(args, null); } /** * Instantiates a new mtas tokenizer factory. * * @param args the args * @param resourceLoader the resource loader * @throws IOException Signals that an I/O exception has occurred. */ public MtasTokenizerFactory(Map<String, String> args, ResourceLoader resourceLoader) throws IOException { super(args); configFileArgument = get(args, ARGUMENT_CONFIGFILE); configArgument = get(args, ARGUMENT_CONFIG); analyzerArgument = get(args, ARGUMENT_PARSER); analyzerArgumentParserArgs = get(args, ARGUMENT_PARSER_ARGS); defaultArgument = get(args, ARGUMENT_DEFAULT); int numberOfArgs = 0; numberOfArgs = (configFileArgument==null)?numberOfArgs:numberOfArgs+1; numberOfArgs = (configArgument==null)?numberOfArgs:numberOfArgs+1; numberOfArgs = (analyzerArgument==null)?numberOfArgs:numberOfArgs+1; if (numberOfArgs>1) { throw new IOException(this.getClass().getName() + " can't have multiple of " + ARGUMENT_CONFIGFILE + ", " + ARGUMENT_CONFIG+" AND "+ARGUMENT_PARSER); } else if (configArgument == null && defaultArgument != null) { throw new IOException(this.getClass().getName() + " can't have " + ARGUMENT_DEFAULT + " without " + ARGUMENT_CONFIG); } else if (numberOfArgs==0) { throw new IOException(this.getClass().getName() + " should have " + ARGUMENT_CONFIGFILE + " or " + ARGUMENT_CONFIG+" or "+ARGUMENT_PARSER); } init(resourceLoader); } /* * (non-Javadoc) * * @see * org.apache.lucene.analysis.util.TokenizerFactory#create(org.apache.lucene. * util.AttributeFactory) */ @Override public MtasTokenizer create(AttributeFactory factory) { MtasTokenizer tokenizer = null; try { tokenizer = create(factory, null); } catch (IOException e) { log.error(e); } return tokenizer; } public MtasTokenizer create(String configuration) throws IOException { return create(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, configuration); } /** * Creates the. * * @param configuration the configuration * @return the mtas tokenizer * @throws IOException Signals that an I/O exception has occurred. */ public MtasTokenizer create(String configuration, String defaultConfiguration) throws IOException { return create(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, configuration, defaultConfiguration); } public MtasTokenizer create(AttributeFactory factory, String configuration) throws IOException { return create(factory, configuration, null); } /** * Creates the. * * @param factory the factory * @param configuration the configuration * @return the mtas tokenizer * @throws IOException Signals that an I/O exception has occurred. */ public MtasTokenizer create(AttributeFactory factory, String configuration, String defaultConfiguration) throws IOException { if(defaultConfiguration==null) { defaultConfiguration = defaultArgument; } if (configs != null && configs.size() > 0) { if (configuration == null && defaultConfiguration == null) { throw new IOException("no (default)configuration"); } else if (configuration == null) { if (configs.get(defaultConfiguration) != null) { return new MtasTokenizer(factory, configs.get(defaultConfiguration)); } else { throw new IOException( "default configuration " + defaultConfiguration + " not available"); } } else { MtasConfiguration config = configs.get(configuration); if (config == null) { if (defaultConfiguration != null) { if (configs.get(defaultConfiguration) != null) { return new MtasTokenizer(factory, configs.get(defaultConfiguration)); } else { throw new IOException("configuration " + configuration + " not found and default configuration " + defaultConfiguration + " not available"); } } else { throw new IOException("configuration " + configuration + " not available and no default configuration"); } } else { return new MtasTokenizer(factory, config); } } } else if (config != null) { return new MtasTokenizer(factory, config); } else { throw new IOException("no configuration"); } } /** * Inits the. * * @param resourceLoader the resource loader * @throws IOException Signals that an I/O exception has occurred. */ private void init(ResourceLoader resourceLoader) throws IOException { if (config == null && configs == null) { if (resourceLoader == null) { return; } else if (configFileArgument == null && configArgument == null && analyzerArgument==null) { throw new IOException("no configuration"); } else { if (configFileArgument != null) { try { config = MtasConfiguration.readConfiguration( resourceLoader.openResource(configFileArgument)); } catch (IOException e) { throw new IOException( "Problem loading configuration from " + configFileArgument, e); } } if (configArgument != null) { try { configs = MtasConfiguration.readMtasTokenizerConfigurations( resourceLoader, configArgument); } catch (IOException e) { throw new IOException( "Problem loading configurations from " + configArgument, e); } } if (analyzerArgument != null) { configs = null; config = new MtasConfiguration(); MtasConfiguration subConfig = new MtasConfiguration(); subConfig.name = "parser"; subConfig.attributes.put("name", analyzerArgument); subConfig.attributes.put(ARGUMENT_PARSER_ARGS, analyzerArgumentParserArgs); config.children.add(subConfig); } } } } /* * (non-Javadoc) * * @see org.apache.lucene.analysis.util.ResourceLoaderAware#inform(org.apache. * lucene.analysis.util.ResourceLoader) */ @Override public void inform(ResourceLoader loader) throws IOException { init(loader); } }