Java Code Examples for weka.classifiers.Evaluation#evaluateModel()

The following examples show how to use weka.classifiers.Evaluation#evaluateModel() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestWekaBayes.java    From Java-Data-Analysis with MIT License 8 votes vote down vote up
public static void main(String[] args) throws Exception {
//        ConverterUtils.DataSource source = new ConverterUtils.DataSource("data/AnonFruit.arff");
        DataSource source = new DataSource("data/AnonFruit.arff");
        Instances train = source.getDataSet();
        train.setClassIndex(3);  // target attribute: (Sweet)
        //build model
        NaiveBayes model=new NaiveBayes();
        model.buildClassifier(train);

        //use
        Instances test = train;
        Evaluation eval = new Evaluation(test);
        eval.evaluateModel(model,test);
        List <Prediction> predictions = eval.predictions();
        int k = 0;
        for (Instance instance : test) {
            double actual = instance.classValue();
            double prediction = eval.evaluateModelOnce(model, instance);
            System.out.printf("%2d.%4.0f%4.0f", ++k, actual, prediction);
            System.out.println(prediction != actual? " *": "");
        }
    }
 
Example 2
Source File: TestUtil.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Perform simple holdout with a given percentage
 *
 * @param clf Classifier
 * @param data Full dataset
 * @param p Split percentage
 */
public static void holdout(Classifier clf, Instances data, double p) throws Exception {
  Instances[] split = splitTrainTest(data, p);

  Instances train = split[0];
  Instances test = split[1];

  logger.info("Classifier: \n{}", clf.toString());
  clf.buildClassifier(train);
  Evaluation trainEval = new Evaluation(train);
  trainEval.evaluateModel(clf, train);
  logger.info("Weka Train Evaluation:");
  logger.info(trainEval.toSummaryString());
  if (!data.classAttribute().isNumeric()) {
    logger.info(trainEval.toMatrixString());
  }

  Evaluation testEval = new Evaluation(train);
  logger.info("Weka Test Evaluation:");
  testEval.evaluateModel(clf, test);
  logger.info(testEval.toSummaryString());
  if (!data.classAttribute().isNumeric()) {
    logger.info(testEval.toMatrixString());
  }
}
 
Example 3
Source File: ReductionOptimizer.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
private int getLossForClassifier(final MCTreeNode tree, final Instances data) {

		this.completeTree(tree);

		synchronized (this) {
			/* now eval the tree */
			try {
				DescriptiveStatistics stats = new DescriptiveStatistics();
				for (int i = 0; i < 2; i++) {
					List<IWekaInstances> split = (WekaUtil.getStratifiedSplit(new WekaInstances(data), this.seed + i, .6f));
					tree.buildClassifier(split.get(0).getList());

					Evaluation eval = new Evaluation(data);
					eval.evaluateModel(tree, split.get(1).getList());
					stats.addValue(eval.pctIncorrect());
				}
				return (int) Math.round((stats.getMean() * 100));

			} catch (Exception e) {
				this.logger.error(LoggerUtil.getExceptionInfo(e));
				return Integer.MAX_VALUE;
			}
		}

	}
 
Example 4
Source File: NBTreeNoSplit.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Utility method for fast 5-fold cross validation of a naive bayes
  * model
  *
  * @param fullModel a <code>NaiveBayesUpdateable</code> value
  * @param trainingSet an <code>Instances</code> value
  * @param r a <code>Random</code> value
  * @return a <code>double</code> value
  * @exception Exception if an error occurs
  */
 public static double crossValidate(NaiveBayesUpdateable fullModel,
		       Instances trainingSet,
		       Random r) throws Exception {
   // make some copies for fast evaluation of 5-fold xval
   Classifier [] copies = AbstractClassifier.makeCopies(fullModel, 5);
   Evaluation eval = new Evaluation(trainingSet);
   // make some splits
   for (int j = 0; j < 5; j++) {
     Instances test = trainingSet.testCV(5, j);
     // unlearn these test instances
     for (int k = 0; k < test.numInstances(); k++) {
test.instance(k).setWeight(-test.instance(k).weight());
((NaiveBayesUpdateable)copies[j]).updateClassifier(test.instance(k));
// reset the weight back to its original value
test.instance(k).setWeight(-test.instance(k).weight());
     }
     eval.evaluateModel(copies[j], test);
   }
   return eval.incorrect();
 }
 
Example 5
Source File: StabilityTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
public static void evaluate(Dl4jMlpClassifier clf, Instances data, double minPerfomance)
    throws Exception {
  Instances[] split = TestUtil.splitTrainTest(data);

  Instances train = split[0];
  Instances test = split[1];

  clf.buildClassifier(train);
  Evaluation trainEval = new Evaluation(train);
  trainEval.evaluateModel(clf, train);

  Evaluation testEval = new Evaluation(train);
  testEval.evaluateModel(clf, test);

  final double testPctCorrect = testEval.pctCorrect();
  final double trainPctCorrect = trainEval.pctCorrect();

  log.info("Train: {}, Test: {}", trainPctCorrect, testPctCorrect);
  boolean success = testPctCorrect > minPerfomance && trainPctCorrect > minPerfomance;
  log.info("Success: " + success);

  log.info(clf.getModel().conf().toYaml());
  Assert.assertTrue("Performance was < " + minPerfomance + ". TestPctCorrect: " + testPctCorrect
      + ", TrainPctCorrect: " + trainPctCorrect, success);
}
 
Example 6
Source File: AllPairsTable.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
public AllPairsTable(final Instances training, final Instances validation, final Classifier c) throws Exception {
	Collection<String> classes = WekaUtil.getClassesActuallyContainedInDataset(training);
	for (Collection<String> set : SetUtil.getAllPossibleSubsetsWithSize(classes, 2)) {
		List<String> pair = set.stream().sorted().collect(Collectors.toList());
		String a = pair.get(0);
		String b = pair.get(1);
		Instances trainingData = WekaUtil.getInstancesOfClass(training, a);
		trainingData.addAll(WekaUtil.getInstancesOfClass(training, b));

		c.buildClassifier(trainingData);

		Instances validationData = WekaUtil.getInstancesOfClass(validation, a);
		validationData.addAll(WekaUtil.getInstancesOfClass(validation, b));
		Evaluation eval = new Evaluation(trainingData);
		eval.evaluateModel(c, validationData);


		if (!this.separabilities.containsKey(a)) {
			this.separabilities.put(a, new HashMap<>());
		}
		this.separabilities.get(a).put(b, eval.pctCorrect() / 100);

	}
	this.classCount = WekaUtil.getNumberOfInstancesPerClass(training);
	this.sum = training.size();
}
 
Example 7
Source File: EvaluationUtils.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static double evaluateMLPlan(final int timeout, final Instances training, final Instances test,
		final int seed, final Logger logger, final int numCores)
				throws Exception {

	logger.debug("Starting ML-Plan execution. Training on {} instances with "
			+ "{} attributes.", training.numInstances(), training.numAttributes());

	/* Initialize MLPlan using WEKA components */
	MLPlanWekaBuilder builder = AbstractMLPlanBuilder.forWeka();
	builder.withTimeOut(new Timeout(timeout, TimeUnit.SECONDS));
	builder.withNumCpus(numCores);
	builder.withDataset(training);
	MLPlan mlplan = builder.build();
	mlplan.setRandomSeed(seed);
	Classifier clf = mlplan.call();

	if (mlplan.getSelectedClassifier() == null
			|| ((MLPipeline) mlplan.getSelectedClassifier()).getBaseClassifier() == null) {
		logger.warn("Could not find a model using ML-Plan. Returning -1...");
		return -1;
	}

	String solutionString = ((MLPipeline) mlplan.getSelectedClassifier()).getBaseClassifier().getClass().getName()
			+ " | " + ((MLPipeline) mlplan.getSelectedClassifier()).getPreprocessors();
	logger.debug("Selected classifier: {}", solutionString);

	/* evaluate solution produced by mlplan */
	Evaluation eval = new Evaluation(training);
	eval.evaluateModel(clf, test);

	return eval.pctCorrect();
}
 
Example 8
Source File: EvaluationUtils.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static double performEnsemble(Instances instances) throws Exception {
	List<Instances> subsample = WekaUtil.getStratifiedSplit(instances, 42, .05f);
	instances = subsample.get(0);

	/* Relief */
	ReliefFAttributeEval relief = new ReliefFAttributeEval();
	relief.buildEvaluator(instances);
	double attEvalSum = 0;
	for (int i = 0; i < instances.numAttributes() - 1; i++) {
		attEvalSum += relief.evaluateAttribute(i);
	}
	attEvalSum /= instances.numAttributes();

	/* Variance */
	double varianceMean = 0;
	int totalNumericCount = 0;
	for (int i = 0; i < instances.numAttributes() - 1; i++) {
		if (instances.attribute(i).isNumeric()) {
			instances.attributeStats(i).numericStats.calculateDerived();
			varianceMean += Math.pow(instances.attributeStats(i).numericStats.stdDev, 2);
			totalNumericCount++;
		}
	}
	varianceMean /= (totalNumericCount != 0 ? totalNumericCount : 1);

	/* KNN */
	List<Instances> split = WekaUtil.getStratifiedSplit(instances, 42, .7f);
	IBk knn = new IBk(10);
	knn.buildClassifier(split.get(0));
	Evaluation eval = new Evaluation(split.get(0));
	eval.evaluateModel(knn, split.get(1));
	double knnResult = eval.pctCorrect() / 100d;

	return 1 - (0.33 * attEvalSum + 0.33 * knnResult + 0.33 * varianceMean);
}
 
Example 9
Source File: Util.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static List<Map<String, Object>> conductSingleOneStepReductionExperiment(final ReductionExperiment experiment) throws Exception {
	/* load data */
	Instances data = new Instances(new BufferedReader(new FileReader(experiment.getDataset())));
	data.setClassIndex(data.numAttributes() - 1);

	/* prepare basis for experiments */
	int seed = experiment.getSeed();
	Classifier classifierForRPNDSplit = AbstractClassifier.forName(experiment.getNameOfInnerClassifier(), null);
	Classifier leftClassifier = AbstractClassifier.forName(experiment.getNameOfLeftClassifier(), null);
	Classifier innerClassifier = AbstractClassifier.forName(experiment.getNameOfInnerClassifier(), null);
	Classifier rightClassifier = AbstractClassifier.forName(experiment.getNameOfRightClassifier(), null);

	RPNDSplitter splitter = new RPNDSplitter(new Random(seed), classifierForRPNDSplit);

	/* conduct experiments */
	List<Map<String, Object>> results = new ArrayList<>();
	for (int k = 0; k < 10; k++) {
		List<Collection<String>> classSplit;
		try {
			classSplit = new ArrayList<>(splitter.split(data));
		} catch (Exception e) {
			throw new RuntimeException("Could not create RPND split.", e);
		}
		MCTreeNodeReD classifier = new MCTreeNodeReD(innerClassifier, classSplit.get(0), leftClassifier, classSplit.get(1), rightClassifier);
		long start = System.currentTimeMillis();
		Map<String, Object> result = new HashMap<>();
		List<Instances> dataSplit = WekaUtil.getStratifiedSplit(data, (seed + k), .7);
		classifier.buildClassifier(dataSplit.get(0));
		long time = System.currentTimeMillis() - start;
		Evaluation eval = new Evaluation(dataSplit.get(0));
		eval.evaluateModel(classifier, dataSplit.get(1));
		double loss = (100 - eval.pctCorrect()) / 100f;
		logger.info("Conducted experiment {} with split {}/{}. Loss: {}. Time: {}ms.", k, classSplit.get(0), classSplit.get(1), loss, time);
		result.put("errorRate", loss);
		result.put(LABEL_TRAIN_TIME, time);
		results.add(result);
	}
	return results;
}
 
Example 10
Source File: WekaDeeplearning4jExamples.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
private static void dl4jResnet50() throws Exception {
        String folderPath = "src/test/resources/nominal/plant-seedlings-small";
        ImageDirectoryLoader loader = new ImageDirectoryLoader();
        loader.setInputDirectory(new File(folderPath));
        Instances inst = loader.getDataSet();
        inst.setClassIndex(1);

        Dl4jMlpClassifier classifier = new Dl4jMlpClassifier();
        classifier.setNumEpochs(3);

        KerasEfficientNet kerasEfficientNet = new KerasEfficientNet();
        kerasEfficientNet.setVariation(EfficientNet.VARIATION.EFFICIENTNET_B1);
        classifier.setZooModel(kerasEfficientNet);

        ImageInstanceIterator iterator = new ImageInstanceIterator();
        iterator.setImagesLocation(new File(folderPath));

        classifier.setInstanceIterator(iterator);

        // Stratify and split the data
        Random rand = new Random(0);
        inst.randomize(rand);
        inst.stratify(5);
        Instances train = inst.trainCV(5, 0);
        Instances test = inst.testCV(5, 0);

// Build the classifier on the training data
        classifier.buildClassifier(train);

// Evaluate the model on test data
        Evaluation eval = new Evaluation(test);
        eval.evaluateModel(classifier, test);

// Output some summary statistics
        System.out.println(eval.toSummaryString());
        System.out.println(eval.toMatrixString());
    }
 
Example 11
Source File: Main-SVG.java    From Java-for-Data-Science with MIT License 5 votes vote down vote up
public Main() {
    try {
        BufferedReader datafile;
        datafile = readDataFile("camping.txt");
        Instances data = new Instances(datafile);
        data.setClassIndex(data.numAttributes() - 1);

        Instances trainingData = new Instances(data, 0, 14);
        Instances testingData = new Instances(data, 14, 5);
        Evaluation evaluation = new Evaluation(trainingData);

        SMO smo = new SMO();
        smo.buildClassifier(data);

        evaluation.evaluateModel(smo, testingData);
        System.out.println(evaluation.toSummaryString());

        // Test instance 
        Instance instance = new DenseInstance(3);
        instance.setValue(data.attribute("age"), 78);
        instance.setValue(data.attribute("income"), 125700);
        instance.setValue(data.attribute("camps"), 1);            
        instance.setDataset(data);
        System.out.println("The instance: " + instance);
        System.out.println(smo.classifyInstance(instance));
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}
 
Example 12
Source File: LDAEvaluationTest.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void evaluateTest() throws Exception {
    logger.info("Starting LDA evaluation test...");

    /* load dataset and create a train-test-split */
    OpenmlConnector connector = new OpenmlConnector();
    DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
    File file = ds.getDataset(DataSetUtils.API_KEY);
    Instances data = new Instances(new BufferedReader(new FileReader(file)));
    data.setClassIndex(data.numAttributes() - 1);
    List<Instances> dataSplit = WekaUtil.getStratifiedSplit(data, 42, .05f);

    Instances insts = dataSplit.get(0);
    List<Instances> split = WekaUtil.getStratifiedSplit(insts, 42, .7f);

    long timeStart = System.currentTimeMillis();

    LDA lda = new LDA();
    lda.buildClassifier(split.get(0));

    long timeStartEval = System.currentTimeMillis();

    Evaluation eval = new Evaluation(split.get(0));
    eval.evaluateModel(lda, split.get(1));
    logger.debug("LDA pct correct: " + eval.pctCorrect());
    Assert.assertTrue(eval.pctCorrect() > 0);

    long timeTaken = System.currentTimeMillis() - timeStart;
    long timeTakenEval = System.currentTimeMillis() - timeStartEval;

    logger.debug("LDA took " + (timeTaken / 1000) + " s.");
    logger.debug("LDA eval took " + (timeTakenEval / 1000) + " s.");
}
 
Example 13
Source File: DecisionTreeEstimator.java    From jMetal with MIT License 4 votes vote down vote up
public double doPrediction(int index,S testSolution) {
  double result = 0.0d;

  try {
    int numberOfObjectives = solutionList.get(0).getNumberOfObjectives();
    //Attributes
    //numeric
    Attribute attr = new Attribute("my-numeric");

    //nominal
    ArrayList<String> myNomVals = new ArrayList<>();

    for (int i=0; i<numberOfObjectives; i++)
      myNomVals.add(VALUE_STRING+i);
    Attribute attr1 = new Attribute(NOMINAL_STRING, myNomVals);
    //System.out.println(attr1.isNominal());

    //string
    Attribute attr2 = new Attribute(MY_STRING, (List<String>)null);
    //System.out.println(attr2.isString());

    //2.create dataset
    ArrayList<Attribute> attrs = new ArrayList<>();
    attrs.add(attr);
    attrs.add(attr1);
    attrs.add(attr2);
    Instances dataset = new Instances("my_dataset", attrs, 0);

    //Add instances
    for (S solution : solutionList) {
      //instaces
      for (int i = 0; i <numberOfObjectives ; i++) {
        double[] attValues = new double[dataset.numAttributes()];
        attValues[0] = solution.getObjective(i);
        attValues[1] = dataset.attribute(NOMINAL_STRING).indexOfValue(VALUE_STRING+i);
        attValues[2] = dataset.attribute(MY_STRING).addStringValue(solution.toString()+i);
        dataset.add(new DenseInstance(1.0, attValues));
      }
    }


    //DataSet test
    Instances datasetTest = new Instances("my_dataset_test", attrs, 0);

    //Add instances
    for (int i = 0; i < numberOfObjectives; i++) {
      Instance test = new DenseInstance(3);
      test.setValue(attr, testSolution.getObjective(i));
      test.setValue(attr1, VALUE_STRING+i);
      test.setValue(attr2, testSolution.toString()+i);
      datasetTest.add(test);
    //  dataset.add(test);
    }


    //split to 70:30 learn and test set

    //Preprocess strings (almost no classifier supports them)
    StringToWordVector filter = new StringToWordVector();

    filter.setInputFormat(dataset);
    dataset = Filter.useFilter(dataset, filter);

    //Buid classifier
    dataset.setClassIndex(1);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);
    //resample if needed
    //dataset = dataset.resample(new Random(42));
    dataset.setClassIndex(1);
    datasetTest.setClassIndex(1);
    //do eval
    Evaluation eval = new Evaluation(datasetTest); //trainset
    eval.evaluateModel(classifier, datasetTest); //testset
    result = classifier.classifyInstance(datasetTest.get(index));
  } catch (Exception e) {
    result = testSolution.getObjective(index);
  }
  return result;
}
 
Example 14
Source File: ModelEvaluation.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 4 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try {
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ModelEvaluation/segment-challenge.arff");
        Instances dt = src.getDataSet();
        dt.setClassIndex(dt.numAttributes()- 1);

        String[] options = new String[4];
        options[0] = "-C";
        options[1] = "0.1";
        options[2] = "-M";
        options[3] = "2";
        J48 mytree = new J48();
        mytree.setOptions(options);
        mytree.buildClassifier(dt);
        
        Evaluation eval = new Evaluation(dt);
        Random rand = new Random(1);
        
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ModelEvaluation/segment-test.arff");
        Instances tdt = src1.getDataSet();
        tdt.setClassIndex(tdt.numAttributes() - 1);
        
        eval.evaluateModel(mytree, tdt);
        
        System.out.println(eval.toSummaryString("Evaluation results:\n", false));
            System.out.println("Correct % = " + eval.pctCorrect());
            System.out.println("Incorrect % = " + eval.pctIncorrect());
            System.out.println("kappa = " + eval.kappa());
            System.out.println("MAE = " + eval.meanAbsoluteError());
            System.out.println("RMSE = " + eval.rootMeanSquaredError());
            System.out.println("RAE = " + eval.relativeAbsoluteError());
            System.out.println("Precision = " + eval.precision(1));
            System.out.println("Recall = " + eval.recall(1));
            System.out.println("fMeasure = " + eval.fMeasure(1));
            System.out.println(eval.toMatrixString("=== Overall Confusion Matrix ==="));
    } catch (Exception e) {
        System.out.println("Error!!!!\n" + e.getMessage());
    }
}
 
Example 15
Source File: IsotonicRegression.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Does the actual regression.
 */
protected void regress(Attribute attribute, Instances insts, boolean ascending) 
  throws Exception {

  // Sort values according to current attribute
  insts.sort(attribute);
  
  // Initialize arrays
  double[] values = new double[insts.numInstances()];
  double[] weights = new double[insts.numInstances()];
  double[] cuts = new double[insts.numInstances() - 1];
  int size = 0;
  values[0] = insts.instance(0).classValue();
  weights[0] = insts.instance(0).weight();
  for (int i = 1; i < insts.numInstances(); i++) {
    if (insts.instance(i).value(attribute) >
        insts.instance(i - 1).value(attribute)) {
      cuts[size] = (insts.instance(i).value(attribute) +
                    insts.instance(i - 1).value(attribute)) / 2;
      size++;
    }
    values[size] += insts.instance(i).classValue();
    weights[size] += insts.instance(i).weight();
  }
  size++;
  
  // While there is a pair of adjacent violators
  boolean violators;
  do {
    violators = false;
    
    // Initialize arrays
    double[] tempValues = new double[size];
    double[] tempWeights = new double[size];
    double[] tempCuts = new double[size - 1];
    
    // Merge adjacent violators
    int newSize = 0;
    tempValues[0] = values[0];
    tempWeights[0] = weights[0];
    for (int j = 1; j < size; j++) {
      if ((ascending && (values[j] / weights[j] > 
                         tempValues[newSize] / tempWeights[newSize])) ||
          (!ascending && (values[j] / weights[j] < 
                          tempValues[newSize] / tempWeights[newSize]))) {
        tempCuts[newSize] = cuts[j - 1];
        newSize++;
        tempValues[newSize] = values[j];
        tempWeights[newSize] = weights[j];
      } else {
        tempWeights[newSize] += weights[j];
        tempValues[newSize] += values[j];
        violators = true;
      }
    }
    newSize++;
    
    // Copy references
    values = tempValues;
    weights = tempWeights;
    cuts = tempCuts;
    size = newSize;
  } while (violators);
  
  // Compute actual predictions
  for (int i = 0; i < size; i++) {
    values[i] /= weights[i];
  }
  
  // Backup best instance variables
  Attribute attributeBackedup = m_attribute;
  double[] cutsBackedup = m_cuts;
  double[] valuesBackedup = m_values;
  
  // Set instance variables to values computed for this attribute
  m_attribute = attribute;
  m_cuts = cuts;
  m_values = values;
  
  // Compute sum of squared errors
  Evaluation eval = new Evaluation(insts);
  eval.evaluateModel(this, insts);
  double msq = eval.rootMeanSquaredError();
  
  // Check whether this is the best attribute
  if (msq < m_minMsq) {
    m_minMsq = msq;
  } else {
    m_attribute = attributeBackedup;
    m_cuts = cutsBackedup;
    m_values = valuesBackedup;
  }
}
 
Example 16
Source File: DecisionTreeEstimator.java    From jMetal with MIT License 4 votes vote down vote up
public double doPredictionVariable(int index,S testSolution) {
  double result = 0.0d;

  try {
    int numberOfVariables = solutionList.get(0).getNumberOfVariables();
    //Attributes
    //numeric
    Attribute attr = new Attribute("my-numeric");

    //nominal
    ArrayList<String> myNomVals = new ArrayList<>();

    for (int i=0; i<numberOfVariables; i++)
      myNomVals.add(VALUE_STRING+i);
    Attribute attr1 = new Attribute(NOMINAL_STRING, myNomVals);

    //string
    Attribute attr2 = new Attribute(MY_STRING, (List<String>)null);

    //2.create dataset
    ArrayList<Attribute> attrs = new ArrayList<>();
    attrs.add(attr);
    attrs.add(attr1);
    attrs.add(attr2);
    Instances dataset = new Instances("my_dataset", attrs, 0);

    //Add instances
    for (S solution : solutionList) {
      //instaces
      for (int i = 0; i <numberOfVariables ; i++) {
        double[] attValues = new double[dataset.numAttributes()];
        attValues[0] = ((DoubleSolution)solution).getVariable(i);
        attValues[1] = dataset.attribute(NOMINAL_STRING).indexOfValue(VALUE_STRING+i);
        attValues[2] = dataset.attribute(MY_STRING).addStringValue(solution.toString()+i);
        dataset.add(new DenseInstance(1.0, attValues));
      }
    }


    //DataSet test
    Instances datasetTest = new Instances("my_dataset_test", attrs, 0);

    //Add instances
    for (int i = 0; i < numberOfVariables; i++) {
      Instance test = new DenseInstance(3);
      test.setValue(attr, ((DoubleSolution)testSolution).getVariable(i));
      test.setValue(attr1, VALUE_STRING+i);
      test.setValue(attr2, testSolution.toString()+i);
      datasetTest.add(test);
      //  dataset.add(test);
    }


    //split to 70:30 learn and test set

    //Preprocess strings (almost no classifier supports them)
    StringToWordVector filter = new StringToWordVector();

    filter.setInputFormat(dataset);
    dataset = Filter.useFilter(dataset, filter);

    //Buid classifier
    dataset.setClassIndex(1);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);
    //resample if needed
    //dataset = dataset.resample(new Random(42));
    dataset.setClassIndex(1);
    datasetTest.setClassIndex(1);
    //do eval
    Evaluation eval = new Evaluation(datasetTest); //trainset
    eval.evaluateModel(classifier, datasetTest); //testset
    result = classifier.classifyInstance(datasetTest.get(index));
  } catch (Exception e) {
    result = ((DoubleSolution)testSolution).getVariable(index);
  }
  return result;
}
 
Example 17
Source File: CVParameterSelection.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Finds the best parameter combination. (recursive for each parameter
  * being optimised).
  * 
  * @param depth the index of the parameter to be optimised at this level
  * @param trainData the data the search is based on
  * @param random a random number generator
  * @throws Exception if an error occurs
  */
 protected void findParamsByCrossValidation(int depth, Instances trainData,
				     Random random)
   throws Exception {

   if (depth < m_CVParams.size()) {
     CVParameter cvParam = (CVParameter)m_CVParams.elementAt(depth);

     double upper;
     switch ((int)(cvParam.m_Lower - cvParam.m_Upper + 0.5)) {
     case 1:
upper = m_NumAttributes;
break;
     case 2:
upper = m_TrainFoldSize;
break;
     default:
upper = cvParam.m_Upper;
break;
     }
     double increment = (upper - cvParam.m_Lower) / (cvParam.m_Steps - 1);
     for(cvParam.m_ParamValue = cvParam.m_Lower; 
  cvParam.m_ParamValue <= upper; 
  cvParam.m_ParamValue += increment) {
findParamsByCrossValidation(depth + 1, trainData, random);
     }
   } else {
     
     Evaluation evaluation = new Evaluation(trainData);

     // Set the classifier options
     String [] options = createOptions();
     if (m_Debug) {
System.err.print("Setting options for " 
		 + m_Classifier.getClass().getName() + ":");
for (int i = 0; i < options.length; i++) {
  System.err.print(" " + options[i]);
}
System.err.println("");
     }
     ((OptionHandler)m_Classifier).setOptions(options);
     for (int j = 0; j < m_NumFolds; j++) {

       // We want to randomize the data the same way for every 
       // learning scheme.
Instances train = trainData.trainCV(m_NumFolds, j, new Random(1));
Instances test = trainData.testCV(m_NumFolds, j);
m_Classifier.buildClassifier(train);
evaluation.setPriors(train);
evaluation.evaluateModel(m_Classifier, test);
     }
     double error = evaluation.errorRate();
     if (m_Debug) {
System.err.println("Cross-validated error rate: " 
		   + Utils.doubleToString(error, 6, 4));
     }
     if ((m_BestPerformance == -99) || (error < m_BestPerformance)) {

m_BestPerformance = error;
m_BestClassifierOptions = createOptions();
     }
   }
 }
 
Example 18
Source File: WekaEmailIntentClassifier.java    From EmailIntentDataSet with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	
	if (args.length != 2) {
		System.out.println("Usage: WekaSpeechActClassifier <train_set_input_file> <test_set_input_file>");
		System.exit(0);
	}
	
	String arffFileTrain = args[0];
	String arffFileTest = args[1];

	LibSVM wekaClassifier = new LibSVM();
	wekaClassifier.setOptions(new String[] {"-B", "-H"});

	Instances preparedData = (Instances) SerializationHelper.read(arffFileTrain);
	Instances preparedTest = (Instances) SerializationHelper.read(arffFileTest);
	
	System.out.println("Reading train set and test set done!");

	System.out.print("\nTraining...");
	wekaClassifier.buildClassifier(preparedData);
	
	System.out.println("\nTraining...done!");
	
	Evaluation evalTrain = new Evaluation(preparedData);
	evalTrain.evaluateModel(wekaClassifier, preparedData);

	DecimalFormat formatter = new DecimalFormat("#0.0");
	
	System.out.println("\nEvaluating on trainSet...");
	System.out.println(evalTrain.toSummaryString());
	
	System.out.println("\nResult on trainSet...");
	System.out.println("Precision:" + formatter.format(100*evalTrain.precision(0)) + "%" +
			" - Recal: " + formatter.format(100*evalTrain.recall(0)) + "%" +
			" - F1: " + formatter.format(evalTrain.fMeasure(0)) + "%");
	
	Evaluation eval = new Evaluation(preparedTest);
	eval.evaluateModel(wekaClassifier, preparedTest);

	System.out.println("\nEvaluating on testSet...");
	System.out.println(eval.toSummaryString());
	
	System.out.println("\nResult on testSet...");
	System.out.println("Precision:" + formatter.format(100*eval.precision(0)) + "%" +
			" - Recal: " + formatter.format(100*eval.recall(0)) + "%" +
			" - F1: " + formatter.format(100*eval.fMeasure(0)) + "%");

	System.out.println("True positive rate: " + formatter.format(100*eval.truePositiveRate(0)) + "%" + 
			" - True negative rate: "  + formatter.format(100*eval.trueNegativeRate(0)) + "%");
	System.out.println("Accuracy: " + formatter.format(100*((eval.truePositiveRate(0) + eval.trueNegativeRate(0)) / 2)) + "%");
	
	System.out.println("\nDone!");
}
 
Example 19
Source File: UCR_Trillion.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Running the experiment for a single dataset
 * @param datasetName
 * @throws Exception
 */
private static void singleProblem (String datasetName) throws Exception {
	// Setting output directory
	resDir = projectPath + "outputs/Benchmark/" + datasetName + "/";

	// Check if it exist, else create the directory
	File dir = new File(resDir);
	if (!dir.exists())
		dir.mkdirs();

	// Reading the dataset
	System.out.println("Processing: " + datasetName);
	Instances[] data = ExperimentsLauncher.readTrainAndTest(datasetPath, datasetName);

	Instances train = data[0];
	Instances test = data[1];

	// Go through different runs and randomize the dataset
       for (int i = 0; i < nbRuns; i++) {
       	// Sampling the dataset
       	train = Sampling.random(train);        	
       	
       	// Initialising the classifier
       	System.out.println("Run " + i + ", Launching " + method);
       	Trillion classifier = new Trillion(datasetName);
       	classifier.setResDir(resDir);
       	classifier.setType(method);
       	
       	// Training the classifier for best window
       	long start = System.nanoTime();
       	classifier.buildClassifier(train);
       	long stop = System.nanoTime();
       	double searchTime = (stop - start)/1e9;
       	System.out.println(searchTime + " s");

       	bestWarpingWindow = classifier.getBestWin();
       	bestScore = classifier.getBestScore();

       	// Evaluate the trained classfier with test set
       	Evaluation eval = new Evaluation(train);
       	eval.evaluateModel(classifier, test);
       	System.out.println(eval.errorRate());

       	// Save result
       	saveSearchTime(searchTime, eval.errorRate());
       }
}
 
Example 20
Source File: UCR_UCRSuitePrunedDTW.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Running the experiment for a single dataset
 * @param datasetName
 * @throws Exception
 */
private static void singleProblem (String datasetName) throws Exception {
	// Setting output directory
	resDir = projectPath + "outputs/Benchmark/" + datasetName + "/";
       
	// Check if it exist, else create the directory
       File dir = new File(resDir);
       if (!dir.exists())
       	dir.mkdirs();
       
       // Reading the dataset
       System.out.println("Processing: " + datasetName);
       Instances[] data = ExperimentsLauncher.readTrainAndTest(datasetPath, datasetName);

       Instances train = data[0];
       Instances test = data[1];
       
       // Go through different runs and randomize the dataset
       for (int i = 0; i < nbRuns; i++) {
       	// Sampling the dataset
       	train = Sampling.random(train);        	
       	
       	// Initialising the classifier
       	System.out.println("Run " + i + ", Launching " + method);
       	UCRSuitePrunedDTW classifier = new UCRSuitePrunedDTW(datasetName);
       	classifier.setResDir(resDir);
       	classifier.setType(method);
       	
       	// Training the classifier for best window
       	long start = System.nanoTime();
       	classifier.buildClassifier(train);
       	long stop = System.nanoTime();
       	double searchTime = (stop - start)/1e9;
       	System.out.println(searchTime + " s");

       	bestWarpingWindow = classifier.getBestWin();
       	bestScore = classifier.getBestScore();

       	// Evaluate the trained classfier with test set
       	Evaluation eval = new Evaluation(train);
       	eval.evaluateModel(classifier, test);
       	System.out.println(eval.errorRate());

       	// Save result
       	saveSearchTime(searchTime, eval.errorRate());
       }
}