weka.classifiers.trees.RandomForest Java Examples

The following examples show how to use weka.classifiers.trees.RandomForest. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultivariateShapeletTransformClassifier.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public void configureEnsemble(){
    ensemble.setWeightingScheme(new TrainAcc(4));
    ensemble.setVotingScheme(new MajorityConfidence());
    
    Classifier[] classifiers = new Classifier[3];
    String[] classifierNames = new String[3];
    
    SMO smo = new SMO();
    smo.turnChecksOff();
    smo.setBuildLogisticModels(true);
    PolyKernel kl = new PolyKernel();
    kl.setExponent(2);
    smo.setKernel(kl);
    if (seedClassifier)
        smo.setRandomSeed((int)seed);
    classifiers[0] = smo;
    classifierNames[0] = "SVMQ";

    RandomForest r=new RandomForest();
    r.setNumTrees(500);
    if(seedClassifier)
       r.setSeed((int)seed);            
    classifiers[1] = r;
    classifierNames[1] = "RandF";
        
        
    RotationForest rf=new RotationForest();
    rf.setNumIterations(100);
    if(seedClassifier)
       rf.setSeed((int)seed);
    classifiers[2] = rf;
    classifierNames[2] = "RotF";
    
    
   ensemble.setClassifiers(classifiers, classifierNames, null);        
    
}
 
Example #2
Source File: TunedRandomForest.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public static void cheatOnMNIST(){
    Instances train=DatasetLoading.loadDataNullable("\\\\cmptscsvr.cmp.uea.ac.uk\\ueatsc\\Data\\LargeProblems\\MNIST\\MNIST_TRAIN");
    Instances test=DatasetLoading.loadDataNullable("\\\\cmptscsvr.cmp.uea.ac.uk\\ueatsc\\Data\\LargeProblems\\MNIST\\MNIST_TEST");
    RandomForest rf=new RandomForest();
    System.out.println("Data loaded ......");
    double a =ClassifierTools.singleTrainTestSplitAccuracy(rf, train, test);
    System.out.println("Trees ="+10+" acc = "+a);
    for(int trees=50;trees<=1000;trees+=50){
        rf.setNumTrees(trees);
        a =ClassifierTools.singleTrainTestSplitAccuracy(rf, train, test);
        System.out.println("Trees ="+trees+" acc = "+a);
    }
    
}
 
Example #3
Source File: CAWPE.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public final void setupAdvancedSettings() {
    this.ensembleName = "CAWPE-A";
    
    this.weightingScheme = new TrainAcc(4);
    this.votingScheme = new MajorityConfidence();
    
    CrossValidationEvaluator cv = new CrossValidationEvaluator(seed, false, false, false, false); 
    cv.setNumFolds(10);
    this.trainEstimator = cv; 

    Classifier[] classifiers = new Classifier[3];
    String[] classifierNames = new String[3];

    SMO smo = new SMO();
    smo.turnChecksOff();
    smo.setBuildLogisticModels(true);
    PolyKernel kl = new PolyKernel();
    kl.setExponent(2);
    smo.setKernel(kl);
    smo.setRandomSeed(seed);
    classifiers[0] = smo;
    classifierNames[0] = "SVMQ";
    RandomForest rf= new RandomForest();
    rf.setNumTrees(500);
    classifiers[1] = rf;
    classifierNames[1] = "RandF";
    RotationForest rotf=new RotationForest();
    rotf.setNumIterations(200);
    classifiers[2] = rotf;
    classifierNames[2] = "RotF";

    setClassifiers(classifiers, classifierNames, null);
}
 
Example #4
Source File: RelExTool.java    From Criteria2Query with Apache License 2.0 5 votes vote down vote up
public void trainClassifier(String trainfile,String modelpath) throws Exception{
	Classifier m_classifier = new RandomForest();
	File inputFile = new File(trainfile);
	ArffLoader atf = new ArffLoader(); 
	atf.setFile(inputFile);
	Instances instancesTrain = atf.getDataSet(); 
	instancesTrain.setClassIndex(6);
       m_classifier.buildClassifier(instancesTrain); 
       saveModel(m_classifier, modelpath);
}
 
Example #5
Source File: WekaFilteredClassifierTest.java    From Java-Data-Science-Cookbook with MIT License 5 votes vote down vote up
public void buildFilteredClassifier(){
	rf = new RandomForest();
	Remove rm = new Remove();
	rm.setAttributeIndices("1");
	FilteredClassifier fc = new FilteredClassifier();
	fc.setFilter(rm);
	fc.setClassifier(rf);
	try{
		fc.buildClassifier(weather);
		for (int i = 0; i < weather.numInstances(); i++){
			double pred = fc.classifyInstance(weather.instance(i));
			System.out.print("given value: " + weather.classAttribute().value((int) weather.instance(i).classValue()));
			System.out.println("---predicted value: " + weather.classAttribute().value((int) pred));
		}
	} catch (Exception e) {
	}
}
 
Example #6
Source File: AutoFEWekaPipelineTest.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void testAutoFEWekaPipelineClone() throws Exception {

	Graph<IFilter> graph = new Graph<>();
	PretrainedNNFilter nnFilter = ImageUtils.getPretrainedNNFilterByName("VGG16", 5,
			DataSetUtilsTest.CIFAR10_INPUT_SHAPE);
	graph.addItem(nnFilter);

	FilterPipeline fp = new FilterPipeline(null, graph);

	AutoFEWekaPipeline pipeline = new AutoFEWekaPipeline(fp, new RandomForest());
	Classifier clonedClassifier = WekaUtil.cloneClassifier(pipeline);
	Assert.assertNotNull(clonedClassifier);
}
 
Example #7
Source File: TimeSeriesBagOfFeaturesLearningAlgorithm.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Function measuring the out-of-bag (OOB) probabilities using a cross
 * validation with <code>numFolds</code> many folds. For each fold, the data
 * given by <code>subSeqValueMatrix</code> is split into a training and test
 * set. The test set's probabilities are then derived by a trained Random Forest
 * classifier.
 *
 * @param subSeqValueMatrix
 *            Input data used to derive the OOB probabilities
 * @param targetMatrix
 *            The target values of the input data
 * @param numProbInstances
 *            Number of instances for which the probabilities should be derived
 * @param numFolds
 *            Number of folds used for the measurement
 * @param numClasses
 *            Number of total classes
 * @param rf
 *            Random Forest classifier which is retrained in each fold
 * @return Returns a matrix storing the probability for each input instance
 *         given by <code>subSeqValueMatrix</code>
 * @throws TrainingException
 *             Thrown when the classifier <code>rf</code> could not be trained
 *             in any fold
 */
public static double[][] measureOOBProbabilitiesUsingCV(final double[][] subSeqValueMatrix, final int[] targetMatrix, final int numProbInstances, final int numFolds, final int numClasses, final RandomForest rf)
		throws TrainingException {

	double[][] probs = new double[numProbInstances][numClasses];
	int numTestInstsPerFold = (int) ((double) probs.length / (double) numFolds);

	for (int i = 0; i < numFolds; i++) {
		// Generate training instances for fold
		Pair<TimeSeriesDataset2, TimeSeriesDataset2> trainingTestDatasets = TimeSeriesUtil.getTrainingAndTestDataForFold(i, numFolds, subSeqValueMatrix, targetMatrix);
		TimeSeriesDataset2 trainingDS = trainingTestDatasets.getX();

		WekaTimeseriesUtil.buildWekaClassifierFromSimplifiedTS(rf, trainingDS);

		// Prepare test instances
		TimeSeriesDataset2 testDataset = trainingTestDatasets.getY();
		Instances testInstances = WekaTimeseriesUtil.simplifiedTimeSeriesDatasetToWekaInstances(testDataset, IntStream.rangeClosed(0, numClasses - 1).boxed().map(String::valueOf).collect(Collectors.toList()));

		double[][] testProbs = null;
		try {
			testProbs = rf.distributionsForInstances(testInstances);
		} catch (Exception e) {
			throw new TrainingException("Could not induce test probabilities in OOB probability estimation due to an internal Weka error.", e);
		}

		// Store induced probabilities
		for (int j = 0; j < testProbs.length; j++) {
			probs[i * numTestInstsPerFold + j] = testProbs[j];
		}
	}

	return probs;
}
 
Example #8
Source File: MultivariateShapeletTransformClassifier.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Classifiers used in the HIVE COTE paper
 */    
    public void configureDefaultEnsemble(){
//HIVE_SHAPELET_SVMQ    HIVE_SHAPELET_RandF    HIVE_SHAPELET_RotF    
//HIVE_SHAPELET_NN    HIVE_SHAPELET_NB    HIVE_SHAPELET_C45    HIVE_SHAPELET_SVML   
        ensemble=new CAWPE();
        ensemble.setWeightingScheme(new TrainAcc(4));
        ensemble.setVotingScheme(new MajorityConfidence());
        Classifier[] classifiers = new Classifier[7];
        String[] classifierNames = new String[7];
        
        SMO smo = new SMO();
        smo.turnChecksOff();
        smo.setBuildLogisticModels(true);
        PolyKernel kl = new PolyKernel();
        kl.setExponent(2);
        smo.setKernel(kl);
        if (seedClassifier)
            smo.setRandomSeed((int)seed);
        classifiers[0] = smo;
        classifierNames[0] = "SVMQ";

        RandomForest r=new RandomForest();
        r.setNumTrees(500);
        if(seedClassifier)
           r.setSeed((int)seed);            
        classifiers[1] = r;
        classifierNames[1] = "RandF";
            
            
        RotationForest rf=new RotationForest();
        rf.setNumIterations(100);
        if(seedClassifier)
           rf.setSeed((int)seed);
        classifiers[2] = rf;
        classifierNames[2] = "RotF";
        IBk nn=new IBk();
        classifiers[3] = nn;
        classifierNames[3] = "NN";
        NaiveBayes nb=new NaiveBayes();
        classifiers[4] = nb;
        classifierNames[4] = "NB";
        J48 c45=new J48();
        classifiers[5] = c45;
        classifierNames[5] = "C45";
        SMO svml = new SMO();
        svml.turnChecksOff();
        svml.setBuildLogisticModels(true);
        PolyKernel k2 = new PolyKernel();
        k2.setExponent(1);
        smo.setKernel(k2);
        classifiers[6] = svml;
        classifierNames[6] = "SVML";
        ensemble.setClassifiers(classifiers, classifierNames, null);
    }
 
Example #9
Source File: CAWPE.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
     * Comps: NN, SVML, SVMQ, C4.5, NB,  RotF, RandF, BN,
     * Weight: TrainAcc
     * Vote: MajorityVote
     *
     * As used originally in ST_HESCA, COTE.
     * NOTE the original also contained Bayes Net (BN). We have removed it because the classifier crashes
     * unpredictably when discretising features (due to lack of variance in the feature, but not easily detected and
     * dealt with
     *
     */
    public final void setupOriginalHESCASettings() {
        this.ensembleName = "HESCA";
        
        this.weightingScheme = new TrainAcc();
        this.votingScheme = new MajorityVote();
        
        CrossValidationEvaluator cv = new CrossValidationEvaluator(seed, false, false, false, false); 
        cv.setNumFolds(10);
        this.trainEstimator = cv; 
        int numClassifiers=7;
        Classifier[] classifiers = new Classifier[numClassifiers];
        String[] classifierNames = new String[numClassifiers];

        kNN k=new kNN(100);
        k.setCrossValidate(true);
        k.normalise(false);
        k.setDistanceFunction(new EuclideanDistance());
        classifiers[0] = k;
        classifierNames[0] = "NN";

        classifiers[1] = new NaiveBayes();
        classifierNames[1] = "NB";

        classifiers[2] = new J48();
        classifierNames[2] = "C45";

        SMO svml = new SMO();
        svml.turnChecksOff();
        PolyKernel kl = new PolyKernel();
        kl.setExponent(1);
        svml.setKernel(kl);
        svml.setRandomSeed(seed);
        classifiers[3] = svml;
        classifierNames[3] = "SVML";

        SMO svmq =new SMO();
//Assumes no missing, all real valued and a discrete class variable
        svmq.turnChecksOff();
        PolyKernel kq = new PolyKernel();
        kq.setExponent(2);
        svmq.setKernel(kq);
        svmq.setRandomSeed(seed);
        classifiers[4] =svmq;
        classifierNames[4] = "SVMQ";

        RandomForest r=new RandomForest();
        r.setNumTrees(500);
        r.setSeed(seed);
        classifiers[5] = r;
        classifierNames[5] = "RandF";

        RotationForest rf=new RotationForest();
        rf.setNumIterations(50);
        rf.setSeed(seed);
        classifiers[6] = rf;
        classifierNames[6] = "RotF";

//        classifiers[7] = new BayesNet();
//        classifierNames[7] = "bayesNet";

        setClassifiers(classifiers, classifierNames, null);
    }
 
Example #10
Source File: EnsembleProvider.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Initializes the HIVE COTE ensemble consisting of 7 classifiers using a
 * majority voting strategy as described in J. Lines, S. Taylor and A. Bagnall,
 * "HIVE-COTE: The Hierarchical Vote Collective of Transformation-Based
 * Ensembles for Time Series Classification," 2016 IEEE 16th International
 * Conference on Data Mining (ICDM), Barcelona, 2016, pp. 1041-1046. doi:
 * 10.1109/ICDM.2016.0133.
 *
 * @param seed
 *            Seed used within the classifiers and the majority confidence
 *            voting scheme
 * @param numFolds
 *            Number of folds used within the determination of the classifier
 *            weights for the {@link MajorityConfidenceVote}
 * @return Returns the initialized (but untrained) HIVE COTE ensemble model.
 */
public static Classifier provideHIVECOTEEnsembleModel(final long seed) {
	Classifier[] classifier = new Classifier[7];

	Vote voter = new MajorityConfidenceVote(5, seed);

	// SMO poly2
	SMO smop = new SMO();
	smop.turnChecksOff();
	smop.setBuildCalibrationModels(true);
	PolyKernel kernel = new PolyKernel();
	kernel.setExponent(2);
	smop.setKernel(kernel);
	smop.setRandomSeed((int)seed);
	classifier[0] = smop;

	// Random Forest
	RandomForest rf = new RandomForest();
	rf.setSeed((int)seed);
	rf.setNumIterations(500);
	classifier[1] = rf;

	// Rotation forest
	RotationForest rotF = new RotationForest();
	rotF.setSeed((int)seed);
	rotF.setNumIterations(100);
	classifier[2] = rotF;

	// NN
	IBk nn = new IBk();
	classifier[3] = nn;

	// Naive Bayes
	NaiveBayes nb = new NaiveBayes();
	classifier[4] = nb;

	// C45
	J48 c45 = new J48();
	c45.setSeed((int)seed);
	classifier[5] = c45;

	// SMO linear
	SMO smol = new SMO();
	smol.turnChecksOff();
	smol.setBuildCalibrationModels(true);
	PolyKernel linearKernel = new PolyKernel();
	linearKernel.setExponent(1);
	smol.setKernel(linearKernel);
	classifier[6] = smol;

	voter.setClassifiers(classifier);
	return voter;
}
 
Example #11
Source File: TimeSeriesBagOfFeaturesClassifier.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * @return the subseriesClf
 */
public RandomForest getSubseriesClf() {
	return this.subseriesClf;
}
 
Example #12
Source File: TimeSeriesBagOfFeaturesClassifier.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * @return the finalClf
 */
public RandomForest getFinalClf() {
	return this.finalClf;
}
 
Example #13
Source File: Ex02_Classifiers.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
public static void main(String[] args) throws Exception {
    
    // We'll use this data throughout, see Ex01_Datahandling
    int seed = 0;
    Instances[] trainTest = DatasetLoading.sampleItalyPowerDemand(seed);
    Instances train = trainTest[0];
    Instances test = trainTest[1];

    // Here's the super basic workflow, this is pure weka: 
    RandomForest randf = new RandomForest();                       
    randf.setNumTrees(500);
    randf.setSeed(seed);
    
    randf.buildClassifier(train);                                   //aka fit, train
    
    double acc = .0;
    for (Instance testInst : test) {
        double pred = randf.classifyInstance(testInst);             //aka predict
        //double [] dist = randf.distributionForInstance(testInst); //aka predict_proba
        
        if (pred == testInst.classValue())
            acc++;
    }
    
    acc /= test.numInstances();
    System.out.println("Random Forest accuracy on ItalyPowerDemand: " + acc);

    
    
    
    
    
    
    
    
    // All classifiers implement the Classifier interface. this guarantees 
    // the buildClassifier, classifyInstance and distributionForInstance methods, 
    // which is mainly what we want
    // Most if not all classifiers should extend AbstractClassifier, which adds 
    // on a little extra common functionality
    
    
    // There are also a number of classifiers listed in experiments.ClassifierLists
    // This class is updated over time and may eventually turn in to factories etc
    // on the backend, but for now what this is just a way to get a classifier 
    // with defined settings (parameters etc). We use this to record the exact
    // parameters used in papers for example. We also use this to instantiate
    // particular classifiers from a string argument when running on clusters
    
    Classifier classifier = ClassifierLists.setClassifierClassic("RandF", seed);
    classifier.buildClassifier(train);
    classifier.distributionForInstance(test.instance(0));
    
    
    
}
 
Example #14
Source File: TimeSeriesBagOfFeaturesClassifier.java    From AILibs with GNU Affero General Public License v3.0 2 votes vote down vote up
/**
 * @param subseriesClf
 *            the subseriesClf to set
 */
public void setSubseriesClf(final RandomForest subseriesClf) {
	this.subseriesClf = subseriesClf;
}
 
Example #15
Source File: TimeSeriesBagOfFeaturesClassifier.java    From AILibs with GNU Affero General Public License v3.0 2 votes vote down vote up
/**
 * @param finalClf
 *            the finalClf to set
 */
public void setFinalClf(final RandomForest finalClf) {
	this.finalClf = finalClf;
}