ml.dmlc.xgboost4j.java.XGBoost Java Examples

The following examples show how to use ml.dmlc.xgboost4j.java.XGBoost. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TunedXGBoost.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
    //instead of (on a high level) calling build classifier on the same thing 10 times, 
    //with each subsequent call overwriting the training done in the last, 
    //we'll instead build each classifier in the models[] once, storing the traind model for each cv fold
    //when we move to the next num iterations, instead of building from scratch
    //we'll continue iterating from the stored models, which we can do since the 
    //cv folds will be identical.
    // so for a given para set, this build classifier will essentially be called 10 times,
    //once for each cv fold 

    modelIndex++; //going to use this model for this fold
    TunedXGBoost model = models[modelIndex];

    if (numIterations == 0) {
        //first of the 'numiterations' paras, i.e first build of each model. just build normally
        // - including the initialisation of all the meta info
        model.buildClassifier(data);
    } else {
        //continuing on from an already build model with less iterations
        //dont call normal build classifier, since that'll reinitialise 
        //a bunch of stuff, including the booster itself. instead just 
        //continue with a modified call to the trainer function
        model.booster = XGBoost.train(model.trainDMat, model.params, newNumIterations - numIterations, model.watches, null, null, null, 0, model.booster);
    }
}
 
Example #2
Source File: MLXGBoost.java    From RecSys2018 with Apache License 2.0 6 votes vote down vote up
public static Async<Booster> asyncModel(final String modelFile,
		final int nthread) {
	// load xgboost model
	final Async<Booster> modelAsync = new Async<Booster>(() -> {
		try {
			Booster bst = XGBoost.loadModel(modelFile);
			if (nthread > 0) {
				bst.setParam("nthread", nthread);
			}
			return bst;
		} catch (XGBoostError e) {
			e.printStackTrace();
			return null;
		}
	}, Booster::dispose);
	return modelAsync;
}
 
Example #3
Source File: MaxEdgeScoreDependencyParser.java    From SmoothNLP with GNU General Public License v3.0 5 votes vote down vote up
public static Booster loadXgbModel(String modelAddr) {

        try{
            InputStream modelIS = SmoothNLP.IOAdaptor.open(modelAddr);
            Booster booster = XGBoost.loadModel(modelIS);
            return booster;
        }catch(Exception e){
            // add proper warnings later
            System.out.println(e);
            return null;
        }
    }
 
Example #4
Source File: DependencyGraghEdgeCostTrain.java    From SmoothNLP with GNU General Public License v3.0 5 votes vote down vote up
public static void trainXgbModel(String trainFile, String devFile, String modelAddr, int nround, int negSampleRate, int earlyStop, int nthreads) throws IOException{
    final DMatrix trainMatrix = readCoNLL2DMatrix(trainFile,negSampleRate);
    final DMatrix devMatrix = readCoNLL2DMatrix(devFile,negSampleRate);
    try{
        Map<String, Object> params = new HashMap<String, Object>() {
            {
                put("nthread", nthreads);
                put("max_depth", 16);
                put("silent", 0);
                put("objective", "binary:logistic");
                put("colsample_bytree",0.95);
                put("colsample_bylevel",0.95);
                put("eta",0.2);
                put("subsample",0.95);
                put("lambda",0.2);

                put("min_child_weight",5);
                put("scale_pos_weight",negSampleRate);

                // other parameters
                // "objective" -> "multi:softmax", "num_class" -> "6"

                put("eval_metric", "logloss");
                put("tree_method","approx");
            }
        };
        Map<String, DMatrix> watches = new HashMap<String, DMatrix>() {
            {
                put("train", trainMatrix);
                put("dev",devMatrix);
            }
        };
        Booster booster = XGBoost.train(trainMatrix, params, nround, watches, null, null,null,earlyStop);
        OutputStream outstream = SmoothNLP.IOAdaptor.create(modelAddr);
        booster.saveModel(outstream);
    }catch(XGBoostError e){
        System.out.println(e);
    }
}
 
Example #5
Source File: UtilFns.java    From SmoothNLP with GNU General Public License v3.0 5 votes vote down vote up
public static Booster loadXgbModel(String modelAddr) {

        try{
            InputStream modelIS = SmoothNLP.IOAdaptor.open(modelAddr);
            Booster booster = XGBoost.loadModel(modelIS);
            return booster;
        }catch(Exception e){
            // add proper warnings later
            System.out.println(e);
            return null;
        }
    }
 
Example #6
Source File: XGBoostModel.java    From zoltar with Apache License 2.0 5 votes vote down vote up
/**
 * Note: Please use Models from zoltar-models module.
 *
 * <p>Returns a XGBoost model given a URI to the serialized model file.
 */
public static XGBoostModel create(final Model.Id id, final URI modelUri) throws IOException {
  try {
    GompLoader.start();
    final InputStream is = Files.newInputStream(FileSystemExtras.path(modelUri));
    return new AutoValue_XGBoostModel(id, XGBoost.loadModel(is));
  } catch (final XGBoostError xgBoostError) {
    throw new IOException(xgBoostError);
  }
}
 
Example #7
Source File: MLXGBoost.java    From RecSys2018 with Apache License 2.0 5 votes vote down vote up
public static MLXGBoostFeature[] analyzeFeatures(final String modelFile,
		final String featureFile) throws Exception {

	Booster model = XGBoost.loadModel(modelFile);

	List<String> temp = new LinkedList<String>();
	try (BufferedReader reader = new BufferedReader(
			new FileReader(featureFile))) {
		String line;
		while ((line = reader.readLine()) != null) {
			temp.add(line);
		}
	}

	// get feature importance scores
	String[] featureNames = new String[temp.size()];
	temp.toArray(featureNames);
	int[] importances = MLXGBoost.getFeatureImportance(model, featureNames);

	// sort features by their importance
	MLXGBoostFeature[] sortedFeatures = new MLXGBoostFeature[featureNames.length];
	for (int i = 0; i < featureNames.length; i++) {
		sortedFeatures[i] = new MLXGBoostFeature(featureNames[i],
				importances[i]);
	}
	Arrays.sort(sortedFeatures, new MLXGBoostFeature.ScoreComparator(true));

	return sortedFeatures;
}
 
Example #8
Source File: XGBoostUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static Booster deserializeBooster(@Nonnull final Text model) throws HiveException {
    try {
        byte[] b = IOUtils.fromCompressedText(model.getBytes(), model.getLength());
        return XGBoost.loadModel(new FastByteArrayInputStream(b));
    } catch (Throwable e) {
        throw new HiveException("Failed to deserialize a booster", e);
    }
}
 
Example #9
Source File: XGBoostMethod.java    From samantha with MIT License 5 votes vote down vote up
public void learn(PredictiveModel model, LearningData learningData, LearningData validData) {
    try {
        DMatrix dtrain = new DMatrix(new XGBoostIterator(learningData), null);
        Map<String, DMatrix> watches = new HashMap<>();
        if (validData != null) {
            watches.put("Validation", new DMatrix(new XGBoostIterator(validData), null));
        }
        Booster booster = XGBoost.train(dtrain, params, round, watches, null, null);
        XGBoostModel boostModel = (XGBoostModel) model;
        boostModel.setXGBooster(booster);
    } catch (XGBoostError e) {
        throw new BadRequestException(e);
    }
}
 
Example #10
Source File: DependencyGraphRelationshipTagTrain.java    From SmoothNLP with GNU General Public License v3.0 4 votes vote down vote up
public static void trainXgbModel(String trainFile, String devFile, String modelAddr, int nround, int earlyStop,int nthreads ) throws IOException{
    final DMatrix trainMatrix = readCoNLL2DMatrix(trainFile);
    final DMatrix devMatrix = readCoNLL2DMatrix(devFile);
    try{
        Map<String, Object> params = new HashMap<String, Object>() {
            {
                put("nthread", nthreads);
                put("max_depth", 12);
                put("silent", 0);
                put("objective", "multi:softprob");
                put("colsample_bytree",0.90);
                put("colsample_bylevel",0.90);
                put("eta",0.2);
                put("subsample",0.95);
                put("lambda",1.0);

                // tree methods for regulation
                put("min_child_weight",5);
                put("max_leaves",128);

                // other parameters
                // "objective" -> "multi:softmax", "num_class" -> "6"

                put("eval_metric", "merror");
                put("tree_method","approx");
                put("num_class",tag2float.size());

                put("min_child_weight",5);
            }
        };
        Map<String, DMatrix> watches = new HashMap<String, DMatrix>() {
            {
                put("train", trainMatrix);
                put("dev",devMatrix);
            }
        };
        Booster booster = XGBoost.train(trainMatrix, params, nround, watches, null, null,null,earlyStop);
        OutputStream outstream = SmoothNLP.IOAdaptor.create(modelAddr);
        booster.saveModel(outstream);



    }catch(XGBoostError e){
        System.out.println(e);
    }
}
 
Example #11
Source File: TunedXGBoost.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
     * Does the 'actual' initialising and building of the model, as opposed to experimental code
     * setup etc
     * @throws Exception 
     */    
    public void buildActualClassifer() throws Exception {
        if(tuneParameters)
            tuneHyperparameters();

        String objective = "multi:softprob"; 
//        String objective = numClasses == 2 ? "binary:logistic" : "multi:softprob";

        trainDMat = wekaInstancesToDMatrix(trainInsts);
        params = new HashMap<String, Object>();
        //todo: this is a mega hack to enforce 1 thread only on cluster (else bad juju).
        //fix some how at some point. 
        if (runSingleThreaded || System.getProperty("os.name").toLowerCase().contains("linux"))
            params.put("nthread", 1);
        // else == num processors by default

        //fixed params
        params.put("silent", 1);
        params.put("objective", objective);
        if(objective.contains("multi"))
            params.put("num_class", numClasses); //required with multiclass problems
        params.put("seed", seed);
        params.put("subsample", rowSubsampling);
        params.put("colsample_bytree", colSubsampling);

        //tunable params (numiterations passed directly to XGBoost.train(...)
        params.put("learning_rate", learningRate);
        params.put("max_depth", maxTreeDepth);
        params.put("min_child_weight", minChildWeight);

        watches = new HashMap<String, DMatrix>();
//        if (getDebugPrinting() || getDebug())
//        watches.put("train", trainDMat);

//        int earlyStopping = (int) Math.ceil(numIterations / 10.0); 
        //e.g numIts == 25    =>   stop after 3 increases in err 
        //    numIts == 250   =>   stop after 25 increases in err

//        booster = XGBoost.train(trainDMat, params, numIterations, watches, null, null, null, earlyStopping);
        booster = XGBoost.train(trainDMat, params, numIterations, watches, null, null);

    }