Java Code Examples for org.datavec.api.conf.Configuration#get()

The following examples show how to use org.datavec.api.conf.Configuration#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TextVectorizer.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public void initialize(Configuration conf) {
    tokenizerFactory = createTokenizerFactory(conf);
    minWordFrequency = conf.getInt(MIN_WORD_FREQUENCY, 5);
    stopWords = conf.getStringCollection(STOP_WORDS);
    if (stopWords == null || stopWords.isEmpty())
        stopWords = StopWords.getStopWords();

    String clazz = conf.get(VOCAB_CACHE, DefaultVocabCache.class.getName());
    try {
        Class<? extends VocabCache> tokenizerFactoryClazz = (Class<? extends VocabCache>) Class.forName(clazz);
        cache = tokenizerFactoryClazz.newInstance();
        cache.initialize(conf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
 
Example 2
Source File: JDBCRecordReader.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Initialize all required jdbc elements and make the reader ready for iteration.
 *
 * Possible configuration keys :
 * <ol>
 *     <li>JDBCRecordReader.TRIM_STRINGS : Whether or not read strings should be trimmed before being returned. False by default</li>
 *     <li>JDBCRecordReader.JDBC_URL : Jdbc url to use for datastource configuration (see JDBCRecordReaderTest for examples)</li>
 *     <li>JDBCRecordReader.JDBC_DRIVER_CLASS_NAME : Driver class to use for datasource configuration</li>
 *     <li>JDBCRecordReader.JDBC_USERNAME && JDBC_PASSWORD : Username and password to use for datasource configuration</li>
 *     <li>JDBCRecordReader.JDBC_RESULTSET_TYPE : ResultSet type to use (int value defined in jdbc doc)</li>
 * </ol>
 *
 * Url and driver class name are not mandatory. If one of them is specified, the other must be specified as well. If
 * they are set and there already is a DataSource set in the reader, it will be discarded and replaced with the
 * newly created one.
 *
 * @param conf a configuration for initialization
 * @param split not handled yet, will be discarded
 */
@Override
public void initialize(Configuration conf, InputSplit split) throws IOException, InterruptedException {
    this.setConf(conf);
    this.setTrimStrings(conf.getBoolean(TRIM_STRINGS, trimStrings));
    this.setResultSetType(conf.getInt(JDBC_RESULTSET_TYPE, resultSetType));

    String jdbcUrl = conf.get(JDBC_URL);
    String driverClassName = conf.get(JDBC_DRIVER_CLASS_NAME);
    // url and driver must be both unset or both present
    if (jdbcUrl == null ^ driverClassName == null) {
        throw new IllegalArgumentException(
            "Both jdbc url and driver class name must be provided in order to configure JDBCRecordReader's datasource");
    }
    // Both set, initialiaze the datasource
    else if (jdbcUrl != null) {
        // FIXME : find a way to read wildcard properties from conf in order to fill the third argument bellow
        this.dataSource = new DriverDataSource(jdbcUrl, driverClassName, new Properties(), conf.get(JDBC_USERNAME),
            conf.get(JDBC_PASSWORD));
    }
    this.initializeJdbc();
}
 
Example 3
Source File: AbstractTfidfVectorizer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public TokenizerFactory createTokenizerFactory(Configuration conf) {
    String clazz = conf.get(TOKENIZER, DefaultTokenizerFactory.class.getName());
    try {
        Class<? extends TokenizerFactory> tokenizerFactoryClazz =
                        (Class<? extends TokenizerFactory>) Class.forName(clazz);
        TokenizerFactory tf = tokenizerFactoryClazz.newInstance();
        String preproc = conf.get(PREPROCESSOR, null);
        if(preproc != null){
            TokenPreProcess tpp = (TokenPreProcess) Class.forName(preproc).newInstance();
            tf.setTokenPreProcessor(tpp);
        }
        return tf;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
 
Example 4
Source File: TextVectorizer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public void initialize(Configuration conf) {
    tokenizerFactory = createTokenizerFactory(conf);
    minWordFrequency = conf.getInt(MIN_WORD_FREQUENCY, 5);
    if(conf.get(STOP_WORDS) != null)
        stopWords = conf.getStringCollection(STOP_WORDS);
    if (stopWords == null)
        stopWords = StopWords.getStopWords();

    String clazz = conf.get(VOCAB_CACHE, DefaultVocabCache.class.getName());
    try {
        Class<? extends VocabCache> tokenizerFactoryClazz = (Class<? extends VocabCache>) Class.forName(clazz);
        cache = tokenizerFactoryClazz.newInstance();
        cache.initialize(conf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
 
Example 5
Source File: JDBCRecordReader.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Initialize all required jdbc elements and make the reader ready for iteration.
 *
 * Possible configuration keys :
 * <ol>
 *     <li>JDBCRecordReader.TRIM_STRINGS : Whether or not read strings should be trimmed before being returned. False by default</li>
 *     <li>JDBCRecordReader.JDBC_URL : Jdbc url to use for datastource configuration (see JDBCRecordReaderTest for examples)</li>
 *     <li>JDBCRecordReader.JDBC_DRIVER_CLASS_NAME : Driver class to use for datasource configuration</li>
 *     <li>JDBCRecordReader.JDBC_USERNAME && JDBC_PASSWORD : Username and password to use for datasource configuration</li>
 *     <li>JDBCRecordReader.JDBC_RESULTSET_TYPE : ResultSet type to use (int value defined in jdbc doc)</li>
 * </ol>
 *
 * Url and driver class name are not mandatory. If one of them is specified, the other must be specified as well. If
 * they are set and there already is a DataSource set in the reader, it will be discarded and replaced with the
 * newly created one.
 *
 * @param conf a configuration for initialization
 * @param split not handled yet, will be discarded
 */
@Override
public void initialize(Configuration conf, InputSplit split) throws IOException, InterruptedException {
    this.setConf(conf);
    this.setTrimStrings(conf.getBoolean(TRIM_STRINGS, trimStrings));
    this.setResultSetType(conf.getInt(JDBC_RESULTSET_TYPE, resultSetType));

    String jdbcUrl = conf.get(JDBC_URL);
    String driverClassName = conf.get(JDBC_DRIVER_CLASS_NAME);
    // url and driver must be both unset or both present
    if (jdbcUrl == null ^ driverClassName == null) {
        throw new IllegalArgumentException(
            "Both jdbc url and driver class name must be provided in order to configure JDBCRecordReader's datasource");
    }
    // Both set, initialiaze the datasource
    else if (jdbcUrl != null) {
        // FIXME : find a way to read wildcard properties from conf in order to fill the third argument bellow
        this.dataSource = new DriverDataSource(jdbcUrl, driverClassName, new Properties(), conf.get(JDBC_USERNAME),
            conf.get(JDBC_PASSWORD));
    }
    this.initializeJdbc();
}
 
Example 6
Source File: ExcelRecordWriter.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception {
    this.workBookName = configuration.get(WORKSHEET_NAME,DEFAULT_WORKSHEET_NAME);
    this.fileTypeToUse = configuration.get(FILE_TYPE,DEFAULT_FILE_TYPE);
    this.conf = configuration;
    partitioner.init(split);
    out = new DataOutputStream(partitioner.currentOutputStream());
    initPoi();
}
 
Example 7
Source File: AbstractTfidfVectorizer.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public TokenizerFactory createTokenizerFactory(Configuration conf) {
    String clazz = conf.get(TOKENIZER, DefaultTokenizerFactory.class.getName());
    try {
        Class<? extends TokenizerFactory> tokenizerFactoryClazz =
                        (Class<? extends TokenizerFactory>) Class.forName(clazz);
        return tokenizerFactoryClazz.newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
 
Example 8
Source File: SVMLightOutputFormat.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter createWriter(Configuration conf) throws DataVecException {
    String outputPath = conf.get(OutputFormat.OUTPUT_PATH, ".");
    try {
        //return new LineRecordWriter(new File(outputPath));
        return new SVMLightRecordWriter();
    } catch (Exception e) {
        throw new DataVecException(e);
    }
}
 
Example 9
Source File: ExcelRecordWriter.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception {
    this.workBookName = configuration.get(WORKSHEET_NAME,DEFAULT_WORKSHEET_NAME);
    this.fileTypeToUse = configuration.get(FILE_TYPE,DEFAULT_FILE_TYPE);
    this.conf = configuration;
    partitioner.init(split);
    out = new DataOutputStream(partitioner.currentOutputStream());
    initPoi();
}
 
Example 10
Source File: SVMLightOutputFormat.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter createWriter(Configuration conf) throws DataVecException {
    String outputPath = conf.get(OutputFormat.OUTPUT_PATH, ".");
    try {
        //return new LineRecordWriter(new File(outputPath));
        return new SVMLightRecordWriter();
    } catch (Exception e) {
        throw new DataVecException(e);
    }
}
 
Example 11
Source File: LineOutputFormat.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public RecordWriter createWriter(Configuration conf) throws DataVecException {
    String outputPath = conf.get(OutputFormat.OUTPUT_PATH, ".");
    return new LineRecordWriter();
}
 
Example 12
Source File: CSVOutputFormat.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public RecordWriter createWriter(Configuration conf) throws DataVecException {
    String outputPath = conf.get(OutputFormat.OUTPUT_PATH, ".");
    return new CSVRecordWriter();
}
 
Example 13
Source File: LineOutputFormat.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public RecordWriter createWriter(Configuration conf) throws DataVecException {
    String outputPath = conf.get(OutputFormat.OUTPUT_PATH, ".");
    return new LineRecordWriter();
}
 
Example 14
Source File: CSVOutputFormat.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public RecordWriter createWriter(Configuration conf) throws DataVecException {
    String outputPath = conf.get(OutputFormat.OUTPUT_PATH, ".");
    return new CSVRecordWriter();
}