Java Code Examples for weka.core.Instances#trainCV()

The following examples show how to use weka.core.Instances#trainCV() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AttributeSelection.java    From tsml with GNU General Public License v3.0 7 votes vote down vote up
/**
  * Perform a cross validation for attribute selection. With subset
  * evaluators the number of times each attribute is selected over
  * the cross validation is reported. For attribute evaluators, the
  * average merit and average ranking + std deviation is reported for
  * each attribute.
  *
  * @return the results of cross validation as a String
  * @exception Exception if an error occurs during cross validation
  */
 public String CrossValidateAttributes () throws Exception {
   Instances cvData = new Instances(m_trainInstances);
   Instances train;

   Random random = new Random(m_seed);
   cvData.randomize(random);

   if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && 
!(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
     if (cvData.classAttribute().isNominal()) {
cvData.stratify(m_numFolds);
     }

   }

   for (int i = 0; i < m_numFolds; i++) {
     // Perform attribute selection
     train = cvData.trainCV(m_numFolds, i, random);
     selectAttributesCVSplit(train);
   }

   return  CVResultsString();
 }
 
Example 2
Source File: WekaUtilTester.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
@Test
public void checkSplit() throws Exception {

	Instances inst = new Instances(new BufferedReader(new FileReader(VOWEL_ARFF)));
	inst.setClassIndex(inst.numAttributes() - 1);
	for (Classifier c : this.portfolio) {

		/* eval for CV */
		inst.stratify(10);
		Instances train = inst.trainCV(10, 0);
		Instances test = inst.testCV(10, 0);
		Assert.assertEquals(train.size() + test.size(), inst.size());
		Evaluation eval = new Evaluation(train);
		eval.crossValidateModel(c, inst, 10, new Random(0));

		c.buildClassifier(train);
		eval.evaluateModel(c, test);
		System.out.println(eval.pctCorrect());
	}
}
 
Example 3
Source File: EvaluationUtils.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Generate a bunch of predictions ready for processing, by performing a
 * cross-validation on the supplied dataset.
 *
 * @param classifier the Classifier to evaluate
 * @param data the dataset
 * @param numFolds the number of folds in the cross-validation.
 * @exception Exception if an error occurs
 */
public FastVector getCVPredictions(Classifier classifier, 
                                   Instances data, 
                                   int numFolds) 
  throws Exception {

  FastVector predictions = new FastVector();
  Instances runInstances = new Instances(data);
  Random random = new Random(m_Seed);
  runInstances.randomize(random);
  if (runInstances.classAttribute().isNominal() && (numFolds > 1)) {
    runInstances.stratify(numFolds);
  }
  int inst = 0;
  for (int fold = 0; fold < numFolds; fold++) {
    Instances train = runInstances.trainCV(numFolds, fold, random);
    Instances test = runInstances.testCV(numFolds, fold);
    FastVector foldPred = getTrainTestPredictions(classifier, train, test);
    predictions.appendElements(foldPred);
  } 
  return predictions;
}
 
Example 4
Source File: LogisticBase.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Runs LogitBoost, determining the best number of iterations by cross-validation.
    * 
    * @throws Exception if something goes wrong
    */
   protected void performBoostingCV() throws Exception{			

//completed iteration keeps track of the number of iterations that have been
//performed in every fold (some might stop earlier than others). 
//Best iteration is selected only from these.
int completedIterations = m_maxIterations;

Instances allData = new Instances(m_train);

allData.stratify(m_numFoldsBoosting);	      

double[] error = new double[m_maxIterations + 1];	

for (int i = 0; i < m_numFoldsBoosting; i++) {
    //split into training/test data in fold
    Instances train = allData.trainCV(m_numFoldsBoosting,i);
    Instances test = allData.testCV(m_numFoldsBoosting,i);

    //initialize LogitBoost
    m_numRegressions = 0;
    m_regressions = initRegressions();

    //run LogitBoost iterations
    int iterations = performBoosting(train,test,error,completedIterations);	    
    if (iterations < completedIterations) completedIterations = iterations;	    
}

//determine iteration with minimum error over the folds
int bestIteration = getBestIteration(error,completedIterations);

//rebuild model on all of the training data
m_numRegressions = 0;
performBoosting(bestIteration);
   }
 
Example 5
Source File: WekaDeeplearning4jExamples.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
private static void dl4jResnet50() throws Exception {
        String folderPath = "src/test/resources/nominal/plant-seedlings-small";
        ImageDirectoryLoader loader = new ImageDirectoryLoader();
        loader.setInputDirectory(new File(folderPath));
        Instances inst = loader.getDataSet();
        inst.setClassIndex(1);

        Dl4jMlpClassifier classifier = new Dl4jMlpClassifier();
        classifier.setNumEpochs(3);

        KerasEfficientNet kerasEfficientNet = new KerasEfficientNet();
        kerasEfficientNet.setVariation(EfficientNet.VARIATION.EFFICIENTNET_B1);
        classifier.setZooModel(kerasEfficientNet);

        ImageInstanceIterator iterator = new ImageInstanceIterator();
        iterator.setImagesLocation(new File(folderPath));

        classifier.setInstanceIterator(iterator);

        // Stratify and split the data
        Random rand = new Random(0);
        inst.randomize(rand);
        inst.stratify(5);
        Instances train = inst.trainCV(5, 0);
        Instances test = inst.testCV(5, 0);

// Build the classifier on the training data
        classifier.buildClassifier(train);

// Evaluate the model on test data
        Evaluation eval = new Evaluation(test);
        eval.evaluateModel(classifier, test);

// Output some summary statistics
        System.out.println(eval.toSummaryString());
        System.out.println(eval.toMatrixString());
    }
 
Example 6
Source File: Evaluation.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
 * CVModel - Split D into train/test folds, and then train and evaluate on each one.
 * @param	h		 a multi-output classifier
 * @param	D      	 test data Instances
 * @param	numFolds number of folds of CV
 * @param	top    	 Threshold OPtion (pertains to multi-label data only)
 * @param	vop    	Verbosity OPtion (which measures do we want to calculate/output)
 * @return	Result	raw prediction data with evaluation statistics included.
 */
public static Result cvModel(MultiLabelClassifier h, Instances D, int numFolds, String top, String vop) throws Exception {
	Result r_[] = new Result[numFolds];
	for(int i = 0; i < numFolds; i++) {
		Instances D_train = D.trainCV(numFolds,i);
		Instances D_test = D.testCV(numFolds,i);
		if (h.getDebug()) System.out.println(":- Fold ["+i+"/"+numFolds+"] -: "+MLUtils.getDatasetName(D)+"\tL="+D.classIndex()+"\tD(t:T)=("+D_train.numInstances()+":"+D_test.numInstances()+")\tLC(t:T)="+Utils.roundDouble(MLUtils.labelCardinality(D_train,D.classIndex()),2)+":"+Utils.roundDouble(MLUtils.labelCardinality(D_test,D.classIndex()),2)+")");
		r_[i] = evaluateModel(h, D_train, D_test); // <-- should not run stats yet!
	}
	Result r = MLEvalUtils.combinePredictions(r_);
	if (h instanceof MultiTargetClassifier || isMT(D)) {
		r.setInfo("Type","MT-CV");
	}
	else if (h instanceof MultiLabelClassifier) {
		r.setInfo("Type","ML-CV");
		try {
			r.setInfo("Threshold",String.valueOf(Double.parseDouble(top)));
		} catch(Exception e) {
			System.err.println("[WARNING] Automatic threshold calibration not currently enabled for cross-fold validation, setting threshold = 0.5.\n");
			r.setInfo("Threshold",String.valueOf(0.5));
		}
	}
	r.setInfo("Verbosity",vop);
	r.output = Result.getStats(r, vop);
	// Need to reset this because of CV
	r.setValue("Number of training instances",D.numInstances());
	r.setValue("Number of test instances",D.numInstances());
	return r;
}
 
Example 7
Source File: WekaDeeplearning4jExamples.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
private static void dl4jResnet50() throws Exception {
        String folderPath = "src/test/resources/nominal/plant-seedlings-small";
        ImageDirectoryLoader loader = new ImageDirectoryLoader();
        loader.setInputDirectory(new File(folderPath));
        Instances inst = loader.getDataSet();
        inst.setClassIndex(1);

        Dl4jMlpClassifier classifier = new Dl4jMlpClassifier();
        classifier.setNumEpochs(3);

        KerasEfficientNet kerasEfficientNet = new KerasEfficientNet();
        kerasEfficientNet.setVariation(EfficientNet.VARIATION.EFFICIENTNET_B1);
        classifier.setZooModel(kerasEfficientNet);

        ImageInstanceIterator iterator = new ImageInstanceIterator();
        iterator.setImagesLocation(new File(folderPath));

        classifier.setInstanceIterator(iterator);

        // Stratify and split the data
        Random rand = new Random(0);
        inst.randomize(rand);
        inst.stratify(5);
        Instances train = inst.trainCV(5, 0);
        Instances test = inst.testCV(5, 0);

// Build the classifier on the training data
        classifier.buildClassifier(train);

// Evaluate the model on test data
        Evaluation eval = new Evaluation(test);
        eval.evaluateModel(classifier, test);

// Output some summary statistics
        System.out.println(eval.toSummaryString());
        System.out.println(eval.toMatrixString());
    }
 
Example 8
Source File: Ridor.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Builds a single rule learner with REP dealing with 2 classes.
    * This rule learner always tries to predict the class with label 
    * m_Class.
    *
    * @param instances the training data
    * @throws Exception if classifier can't be built successfully
    */
   public void buildClassifier(Instances instances) throws Exception {
     m_ClassAttribute = instances.classAttribute();
     if (!m_ClassAttribute.isNominal()) 
throw new UnsupportedClassTypeException(" Only nominal class, please.");
     if(instances.numClasses() != 2)
throw new Exception(" Only 2 classes, please.");
    
     Instances data = new Instances(instances);
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" No training data.");
    
     data.deleteWithMissingClass();
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" The class labels of all the training data are missing.");	
    
     if(data.numInstances() < m_Folds)
throw new Exception(" Not enough data for REP.");
    
     m_Antds = new FastVector();	
    
     /* Split data into Grow and Prune*/
     m_Random = new Random(m_Seed);
     data.randomize(m_Random);
     data.stratify(m_Folds);
     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);
    
     grow(growData);      // Build this rule
    
     prune(pruneData);    // Prune this rule
   }
 
Example 9
Source File: Stacking.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Generates the meta data
  * 
  * @param newData the data to work on
  * @param random the random number generator to use for cross-validation
  * @throws Exception if generation fails
  */
 protected void generateMetaLevel(Instances newData, Random random) 
   throws Exception {

   Instances metaData = metaFormat(newData);
   m_MetaFormat = new Instances(metaData, 0);
   for (int j = 0; j < m_NumFolds; j++) {
     Instances train = newData.trainCV(m_NumFolds, j, random);
     
     // start the executor pool (if necessary)
     // has to be done after each set of classifiers as the
     // executor pool gets shut down in order to prevent the
     // program executing as a server (and not returning to
     // the command prompt when run from the command line
     super.buildClassifier(train);
     
     // construct the actual classifiers
     buildClassifiers(train);
     
     // Classify test instances and add to meta data
     Instances test = newData.testCV(m_NumFolds, j);
     for (int i = 0; i < test.numInstances(); i++) {
metaData.add(metaInstance(test.instance(i)));
     }
   }

   m_MetaClassifier.buildClassifier(metaData);    
 }
 
Example 10
Source File: ThresholdSelector.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Collects the classifier predictions using the specified evaluation method.
 *
 * @param instances the set of <code>Instances</code> to generate
 * predictions for.
 * @param mode the evaluation mode.
 * @param numFolds the number of folds to use if not evaluating on the
 * full training set.
 * @return a <code>FastVector</code> containing the predictions.
 * @throws Exception if an error occurs generating the predictions.
 */
protected FastVector getPredictions(Instances instances, int mode, int numFolds) 
  throws Exception {

  EvaluationUtils eu = new EvaluationUtils();
  eu.setSeed(m_Seed);
  
  switch (mode) {
  case EVAL_TUNED_SPLIT:
    Instances trainData = null, evalData = null;
    Instances data = new Instances(instances);
    Random random = new Random(m_Seed);
    data.randomize(random);
    data.stratify(numFolds);
    
    // Make sure that both subsets contain at least one positive instance
    for (int subsetIndex = 0; subsetIndex < numFolds; subsetIndex++) {
      trainData = data.trainCV(numFolds, subsetIndex, random);
      evalData = data.testCV(numFolds, subsetIndex);
      if (checkForInstance(trainData) && checkForInstance(evalData)) {
        break;
      }
    }
    return eu.getTrainTestPredictions(m_Classifier, trainData, evalData);
  case EVAL_TRAINING_SET:
    return eu.getTrainTestPredictions(m_Classifier, instances, instances);
  case EVAL_CROSS_VALIDATION:
    return eu.getCVPredictions(m_Classifier, instances, numFolds);
  default:
    throw new RuntimeException("Unrecognized evaluation mode");
  }
}
 
Example 11
Source File: Grading.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Generates the meta data
  * 
  * @param newData the data to work on
  * @param random the random number generator used in the generation
  * @throws Exception if generation fails
  */
 protected void generateMetaLevel(Instances newData, Random random) 
   throws Exception {

   m_MetaFormat = metaFormat(newData);
   Instances [] metaData = new Instances[m_Classifiers.length];
   for (int i = 0; i < m_Classifiers.length; i++) {
     metaData[i] = metaFormat(newData);
   }
   for (int j = 0; j < m_NumFolds; j++) {

     Instances train = newData.trainCV(m_NumFolds, j, random);
     Instances test = newData.testCV(m_NumFolds, j);

     // Build base classifiers
     for (int i = 0; i < m_Classifiers.length; i++) {
getClassifier(i).buildClassifier(train);
       for (int k = 0; k < test.numInstances(); k++) {
  metaData[i].add(metaInstance(test.instance(k),i));
       }
     }
   }
       
   // calculate InstPerClass
   m_InstPerClass = new double[newData.numClasses()];
   for (int i=0; i < newData.numClasses(); i++) m_InstPerClass[i]=0.0;
   for (int i=0; i < newData.numInstances(); i++) {
     m_InstPerClass[(int)newData.instance(i).classValue()]++;
   }
   
   m_MetaClassifiers = AbstractClassifier.makeCopies(m_MetaClassifier,
				      m_Classifiers.length);

   for (int i = 0; i < m_Classifiers.length; i++) {
     m_MetaClassifiers[i].buildClassifier(metaData[i]);
   }
 }
 
Example 12
Source File: NN_DTW_A.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
Pair<List<Double>, List<Double>> findScores(Instances data){
    List<Double> S_dSuccess = new ArrayList<>();
    List<Double> S_iSuccess = new ArrayList<>();
    
    for(int i=0; i<data.numInstances(); i++){
        try {
            //LOOCV search for distances.
            Instances cv_train = data.trainCV(data.numInstances(), i);
            Instances cv_test = data.testCV(data.numInstances(), i);
            Instance test = cv_test.firstInstance();
            
            Pair<Instance, Double> pair_D = findMinDistance(cv_train, test, D);
            Pair<Instance, Double> pair_I = findMinDistance(cv_train, test, I);
            
            //we know we only have one instance.
            double pred_d = pair_D.var1.classValue();
            double pred_i = pair_I.var1.classValue();
            double dist_d = pair_D.var2;
            double dist_i = pair_I.var2;
            double S = dist_d / (dist_i+0.000000001);
            
            //if d is correct and i is incorrect.
            if(test.classValue() == pred_d && test.classValue() != pred_i)
                S_dSuccess.add(S);
            //if d is incorrect and i is correct.
            if(test.classValue() != pred_d && test.classValue() == pred_i)
                S_iSuccess.add(S);
        } catch (Exception ex) {
            System.out.println(ex);
        }
        
    }
   
    return new Pair(S_dSuccess, S_iSuccess);
}
 
Example 13
Source File: LearnShapelets.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public void buildClassifier(Instances trainData) throws Exception {
    long startTime=System.currentTimeMillis();
    
    if(paraSearch){
        double[] paramsLambdaW;
        double[] paramsPercentageOfSeriesLength;
        int[] paramsShapeletLengthScale;

        paramsLambdaW=lambdaWRange;
        paramsPercentageOfSeriesLength=percentageOfSeriesLengthRange;
        paramsShapeletLengthScale=shapeletLengthScaleRange;

        int noFolds = 2; 
        double bsfAccuracy = 0;
        int[] params = {0,0,0};
        double accuracy = 0;

        // randomize and stratify the data prior to cross validation
        trainData.randomize(rand); 
        trainData.stratify(noFolds); 

        int numHpsCombinations=1;


        for (int i = 0; i < paramsLambdaW.length; i++) {
            for (int j = 0; j < paramsPercentageOfSeriesLength.length; j++) {
                for (int k = 0; k < paramsShapeletLengthScale.length; k++) {

                    percentageOfSeriesLength = paramsPercentageOfSeriesLength[j];
                    R = paramsShapeletLengthScale[k];
                    lambdaW = paramsLambdaW[i];

                    print("HPS Combination #"+numHpsCombinations+": {R="+R + 
                            ", L="+percentageOfSeriesLength + ", lambdaW="+lambdaW + "}" ); 
                    print("--------------------------------------");

                    double sumAccuracy = 0;
                    //build our test and train sets. for cross-validation.
                    for (int l = 0; l < noFolds; l++) {
                        Instances trainCV = trainData.trainCV(noFolds, l);
                        Instances testCV = trainData.testCV(noFolds, l);

                        // fixed hyper-parameters
                        eta = 0.1;
                        alpha = -30;
                        maxIter=300;

                        print("Learn model for Fold-"+l + ":" ); 

                        train(trainCV);

                        //test on the remaining fold.
                        accuracy = utilities.ClassifierTools.accuracy(testCV, this);
                        sumAccuracy += accuracy;

                        print("Accuracy-Fold-"+l + " = " + accuracy ); 

                        trainCV=null;
                        testCV=null;
                    }
                    sumAccuracy/=noFolds;

                    print("Accuracy-CV = " + sumAccuracy ); 
                    print("--------------------------------------"); 

                    if(sumAccuracy > bsfAccuracy){
                        int[] p = {i,j,k};
                        params = p;
                        bsfAccuracy = sumAccuracy;
                    }

                    numHpsCombinations++; 
                }
            }
        }

        System.gc();
        maxAcc=bsfAccuracy;
        lambdaW = paramsLambdaW[params[0]];
        percentageOfSeriesLength = paramsPercentageOfSeriesLength[params[1]];
        R = paramsShapeletLengthScale[params[2]];

        eta = 0.1; 
        alpha = -30;
        maxIter=600;
        print("Learn final model with best hyper-parameters: R="+R
                            +", L="+percentageOfSeriesLength + ", lambdaW="+lambdaW); 
    }
    else{
        fixParameters();
        print("Fixed parameters: R="+R
                            +", L="+percentageOfSeriesLength + ", lambdaW="+lambdaW); 
    }
    
    train(trainData);
    
    trainResults.setBuildTime(System.currentTimeMillis()-startTime);
    
}
 
Example 14
Source File: CVParameterSelection.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Finds the best parameter combination. (recursive for each parameter
  * being optimised).
  * 
  * @param depth the index of the parameter to be optimised at this level
  * @param trainData the data the search is based on
  * @param random a random number generator
  * @throws Exception if an error occurs
  */
 protected void findParamsByCrossValidation(int depth, Instances trainData,
				     Random random)
   throws Exception {

   if (depth < m_CVParams.size()) {
     CVParameter cvParam = (CVParameter)m_CVParams.elementAt(depth);

     double upper;
     switch ((int)(cvParam.m_Lower - cvParam.m_Upper + 0.5)) {
     case 1:
upper = m_NumAttributes;
break;
     case 2:
upper = m_TrainFoldSize;
break;
     default:
upper = cvParam.m_Upper;
break;
     }
     double increment = (upper - cvParam.m_Lower) / (cvParam.m_Steps - 1);
     for(cvParam.m_ParamValue = cvParam.m_Lower; 
  cvParam.m_ParamValue <= upper; 
  cvParam.m_ParamValue += increment) {
findParamsByCrossValidation(depth + 1, trainData, random);
     }
   } else {
     
     Evaluation evaluation = new Evaluation(trainData);

     // Set the classifier options
     String [] options = createOptions();
     if (m_Debug) {
System.err.print("Setting options for " 
		 + m_Classifier.getClass().getName() + ":");
for (int i = 0; i < options.length; i++) {
  System.err.print(" " + options[i]);
}
System.err.println("");
     }
     ((OptionHandler)m_Classifier).setOptions(options);
     for (int j = 0; j < m_NumFolds; j++) {

       // We want to randomize the data the same way for every 
       // learning scheme.
Instances train = trainData.trainCV(m_NumFolds, j, new Random(1));
Instances test = trainData.testCV(m_NumFolds, j);
m_Classifier.buildClassifier(train);
evaluation.setPriors(train);
evaluation.evaluateModel(m_Classifier, test);
     }
     double error = evaluation.errorRate();
     if (m_Debug) {
System.err.println("Cross-validated error rate: " 
		   + Utils.doubleToString(error, 6, 4));
     }
     if ((m_BestPerformance == -99) || (error < m_BestPerformance)) {

m_BestPerformance = error;
m_BestClassifierOptions = createOptions();
     }
   }
 }
 
Example 15
Source File: ConjunctiveRule.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Builds a single rule learner with REP dealing with nominal classes or
  * numeric classes.
  * For nominal classes, this rule learner predicts a distribution on
  * the classes.
  * For numeric classes, this learner predicts a single value.
  *
  * @param instances the training data
  * @throws Exception if classifier can't be built successfully
  */
 public void buildClassifier(Instances instances) throws Exception {
   // can classifier handle the data?
   getCapabilities().testWithFail(instances);

   // remove instances with missing class
   Instances data = new Instances(instances);
   data.deleteWithMissingClass();
   
   if(data.numInstances() < m_Folds)
     throw new Exception("Not enough data for REP.");

   m_ClassAttribute = data.classAttribute();
   if(m_ClassAttribute.isNominal())
     m_NumClasses = m_ClassAttribute.numValues();
   else
     m_NumClasses = 1;

   m_Antds = new FastVector();
   m_DefDstr = new double[m_NumClasses];
   m_Cnsqt = new double[m_NumClasses];
   m_Targets = new FastVector();	    
   m_Random = new Random(m_Seed);
   
   if(m_NumAntds != -1){
     grow(data);
   }
   else{

     data.randomize(m_Random);

     // Split data into Grow and Prune	   
     data.stratify(m_Folds);

     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);

     grow(growData);      // Build this rule  
     prune(pruneData);    // Prune this rule		  	  
   }

   if(m_ClassAttribute.isNominal()){			   
     Utils.normalize(m_Cnsqt);
     if(Utils.gr(Utils.sum(m_DefDstr), 0))
Utils.normalize(m_DefDstr);
   }	
 }
 
Example 16
Source File: MajorityConfidenceVote.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Builds the ensemble by assessing the classifier weights using a cross
 * validation of each classifier of the ensemble and then training the
 * classifiers using the complete <code>data</code>.
 *
 * @param data
 *            Training instances
 */
@Override
public void buildClassifier(final Instances data) throws Exception {

	this.classifierWeights = new double[this.m_Classifiers.length];

	// remove instances with missing class
	Instances newData = new Instances(data);
	newData.deleteWithMissingClass();
	this.m_structure = new Instances(newData, 0);

	// can classifier handle the data?
	this.getCapabilities().testWithFail(data);

	for (int i = 0; i < this.m_Classifiers.length; i++) {
		if (Thread.currentThread().isInterrupted()) {
			throw new InterruptedException();
		}

		// Perform cross validation to determine the classifier weights
		for (int n = 0; n < this.numFolds; n++) {
			Instances train = data.trainCV(this.numFolds, n, new Random(this.seed));
			Instances test = data.testCV(this.numFolds, n);

			this.getClassifier(i).buildClassifier(train);
			Evaluation eval = new Evaluation(train);
			eval.evaluateModel(this.getClassifier(i), test);
			this.classifierWeights[i] += eval.pctCorrect() / 100d;
		}

		this.classifierWeights[i] = Math.pow(this.classifierWeights[i], 2);
		this.classifierWeights[i] /= this.numFolds;

		this.getClassifier(i).buildClassifier(newData);
	}

	// If no classifier predicted something correctly, assume uniform distribution
	if (Arrays.stream(this.classifierWeights).allMatch(d -> d < 0.000001d)) {
		for (int i = 0; i < this.classifierWeights.length; i++) {
			this.classifierWeights[i] = 1d / this.classifierWeights.length;
		}
	}
}
 
Example 17
Source File: CDTClassifierEvaluation.java    From NLIWOD with GNU Affero General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {		
	/*
	 * For multilable classification:
	 */
	
	//load the data
	Path datapath= Paths.get("./src/main/resources/old/Qald6Logs.arff");
	BufferedReader reader = new BufferedReader(new FileReader(datapath.toString()));
	ArffReader arff = new ArffReader(reader);
	Instances data = arff.getData();
	data.setClassIndex(6);
	
    // randomize data
	long seed = System.currentTimeMillis();
	int folds = 100;
	
	String qasystem = "KWGAnswer";
	
	
    Random rand = new Random(seed);
    Instances randData = new Instances(data);
    randData.randomize(rand);
	ArrayList<String> systems = Lists.newArrayList("KWGAnswer", "NbFramework", "PersianQA", "SemGraphQA", "UIQA_withoutManualEntries", "UTQA_English");
	
	
	// perform cross-validation
	Double foldavep = 0.0;
	Double foldaver = 0.0;
	Double foldavef = 0.0;
	Double foldsys = 0.0;

    for (int n = 0; n < folds; n++) {
      Instances train = randData.trainCV(folds, n);
      Instances test = randData.testCV(folds, n);
      // build and evaluate classifier
      PSt pst = new PSt();
      pst.buildClassifier(train);
		float ave_p = 0;
		float ave_r = 0;
		float sysp = 0;
		float sysr = 0;

		for(int j = 0; j < test.size(); j++){
			Instance ins = test.get(j);
			double[] confidences = pst.distributionForInstance(ins);
			int argmax = -1;
			double max = -1;
				for(int i = 0; i < 6; i++){
					if(confidences[i]>max){
						max = confidences[i];
						argmax = i;
					}
				}	
			String sys2ask = systems.get(systems.size() - argmax -1);
			ave_p += Float.parseFloat(loadSystemP(sys2ask).get(j));				
			ave_r += Float.parseFloat(loadSystemR(sys2ask).get(j));
			sysp += Float.parseFloat(loadSystemP(qasystem).get(j));				
			sysr += Float.parseFloat(loadSystemR(sys2ask).get(j));
			}
		double p = ave_p/test.size();
		double r = ave_r/test.size();
		double syspave = sysp/test.size();
		double sysrave = sysr/test.size();
		double sysfmeasure = 2*sysrave*syspave/(sysrave + syspave);
		System.out.println(" RESULT FOR FOLD " + n);
		System.out.println("macro P : " + p);
		System.out.println("macro R : " + r);
		double fmeasure = 2*p*r/(p + r);
		System.out.println("macro F : " + fmeasure + '\n');
		foldavep += p/folds;
		foldaver += r/folds;
		foldavef += fmeasure/folds;
		foldsys += sysfmeasure/folds;
   }
	System.out.println(" RESULT FOR CV ");
	System.out.println("macro aveP : " + foldavep);
	System.out.println("macro aveR : " + foldaver);
	System.out.println("macro aveF : " + foldavef);
	System.out.println("macro aveF " + qasystem + " : " + foldsys);


}
 
Example 18
Source File: CrossValidationExperiments.java    From NLIWOD with GNU Affero General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {		

		Path datapath= Paths.get("./src/main/resources/old/Qald6Logs.arff");
		BufferedReader reader = new BufferedReader(new FileReader(datapath.toString()));
		ArffReader arff = new ArffReader(reader);
		Instances data = arff.getData();
		data.setClassIndex(6);
		
		ArrayList<String> systems = Lists.newArrayList("KWGAnswer", "NbFramework", "PersianQA", "SemGraphQA", "UIQA_withoutManualEntries", "UTQA_English" );


		int seed = 133;
		// Change to 100 for leave-one-out CV
		int folds = 10;
		
		Random rand = new Random(seed);
		Instances randData = new Instances(data);
		randData.randomize(rand);

		float cv_ave_f = 0;
		
		for(int n=0; n < folds; n++){
		    Instances train = randData.trainCV(folds,  n);
		    Instances test = randData.testCV(folds,  n);
		    
		    //Change to the Classifier of your choice
			CDN Classifier = new CDN();
			Classifier.buildClassifier(train);
			

			float ave_p = 0;
			float ave_r = 0;
	
			for(int j = 0; j < test.size(); j++){
				Instance ins = test.get(j);
				int k = 0; 
				for(int l=0; l < data.size(); l++){
					Instance tmp = data.get(l);
					if(tmp.toString().equals(ins.toString())){
						k = l;
					}
				}		
				double[] confidences = Classifier.distributionForInstance(ins);
				int argmax = -1;
				double max = -1;
					for(int i = 0; i < 6; i++){
						if(confidences[i]>max){
							max = confidences[i];
							argmax = i;
						}
				}
				String sys2ask = systems.get(systems.size() - argmax -1);
				ave_p += Float.parseFloat(Utils.loadSystemP(sys2ask).get(k));				
				ave_r += Float.parseFloat(Utils.loadSystemR(sys2ask).get(k));
			}
			
			double p = ave_p/test.size();
			double r = ave_r/test.size();
			double fmeasure = 0;
			if(p>0&&r>0){fmeasure = 2*p*r/(p + r);}
			System.out.println("macro F on fold " + n + ": " + fmeasure);
			
			cv_ave_f += fmeasure/folds;
						
		}
		System.out.println("macro F average: " + cv_ave_f);
		System.out.println('\n');
	}
 
Example 19
Source File: StackingC.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Method that builds meta level.
  * 
  * @param newData the data to work with
  * @param random the random number generator to use for cross-validation
  * @throws Exception if generation fails
  */
 protected void generateMetaLevel(Instances newData, Random random) 
   throws Exception {

   Instances metaData = metaFormat(newData);
   m_MetaFormat = new Instances(metaData, 0);
   for (int j = 0; j < m_NumFolds; j++) {
     Instances train = newData.trainCV(m_NumFolds, j, random);

     // Build base classifiers
     for (int i = 0; i < m_Classifiers.length; i++) {
getClassifier(i).buildClassifier(train);
     }

     // Classify test instances and add to meta data
     Instances test = newData.testCV(m_NumFolds, j);
     for (int i = 0; i < test.numInstances(); i++) {
metaData.add(metaInstance(test.instance(i)));
     }
   }
   
   m_MetaClassifiers = AbstractClassifier.makeCopies(m_MetaClassifier,
				      m_BaseFormat.numClasses());
   
   int [] arrIdc = new int[m_Classifiers.length + 1];
   arrIdc[m_Classifiers.length] = metaData.numAttributes() - 1;
   Instances newInsts;
   for (int i = 0; i < m_MetaClassifiers.length; i++) {
     for (int j = 0; j < m_Classifiers.length; j++) {
arrIdc[j] = m_BaseFormat.numClasses() * j + i;
     }
     m_makeIndicatorFilter = new weka.filters.unsupervised.attribute.MakeIndicator();
     m_makeIndicatorFilter.setAttributeIndex("" + (metaData.classIndex() + 1));
     m_makeIndicatorFilter.setNumeric(true);
     m_makeIndicatorFilter.setValueIndex(i);
     m_makeIndicatorFilter.setInputFormat(metaData);
     newInsts = Filter.useFilter(metaData,m_makeIndicatorFilter);
     
     m_attrFilter = new weka.filters.unsupervised.attribute.Remove();
     m_attrFilter.setInvertSelection(true);
     m_attrFilter.setAttributeIndicesArray(arrIdc);
     m_attrFilter.setInputFormat(m_makeIndicatorFilter.getOutputFormat());
     newInsts = Filter.useFilter(newInsts,m_attrFilter);
     
     newInsts.setClassIndex(newInsts.numAttributes()-1);
     
     m_MetaClassifiers[i].buildClassifier(newInsts);
   }
 }