Java Code Examples for weka.core.Instances#deleteWithMissingClass()

The following examples show how to use weka.core.Instances#deleteWithMissingClass() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ConsistencySubsetEval.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Generates a attribute evaluator. Has to initialize all fields of the 
 * evaluator that are not being set via options.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully
 */
public void buildEvaluator (Instances data) throws Exception {
  
  // can evaluator handle data?
  getCapabilities().testWithFail(data);

  m_trainInstances = new Instances(data);
  m_trainInstances.deleteWithMissingClass();
  m_classIndex = m_trainInstances.classIndex();
  m_numAttribs = m_trainInstances.numAttributes();
  m_numInstances = m_trainInstances.numInstances();

  m_disTransform = new Discretize();
  m_disTransform.setUseBetterEncoding(true);
  m_disTransform.setInputFormat(m_trainInstances);
  m_trainInstances = Filter.useFilter(m_trainInstances, m_disTransform);
}
 
Example 2
Source File: MIWrapper.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Builds the classifier
 *
 * @param data the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances train = new Instances(data);
  train.deleteWithMissingClass();
  
  if (m_Classifier == null) {
    throw new Exception("A base classifier has not been specified!");
  }

  if (getDebug())
    System.out.println("Start training ...");
  m_NumClasses = train.numClasses();

  //convert the training dataset into single-instance dataset
  m_ConvertToProp.setWeightMethod(getWeightMethod());
  m_ConvertToProp.setInputFormat(train);
  train = Filter.useFilter(train, m_ConvertToProp);
  train.deleteAttributeAt(0); // remove the bag index attribute

  m_Classifier.buildClassifier(train);
}
 
Example 3
Source File: RandomCommittee.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
   * Builds the committee of randomizable classifiers.
   *
   * @param data the training data to be used for generating the
   * bagged classifier.
   * @exception Exception if the classifier could not be built successfully
   */
  public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    m_data = new Instances(data);
    m_data.deleteWithMissingClass();
    super.buildClassifier(m_data);
    
    if (!(m_Classifier instanceof Randomizable)) {
      throw new IllegalArgumentException("Base learner must implement Randomizable!");
    }

    m_Classifiers = AbstractClassifier.makeCopies(m_Classifier, m_NumIterations);

    Random random = m_data.getRandomNumberGenerator(m_Seed);

    // Resample data based on weights if base learner can't handle weights
    if (!(m_Classifier instanceof WeightedInstancesHandler)) {
      m_data = m_data.resampleWithWeights(random);
    }

    for (int j = 0; j < m_Classifiers.length; j++) {

      // Set the random number seed for the current classifier.
      ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
      
      // Build the classifier.
//      m_Classifiers[j].buildClassifier(m_data);
    }
    
    buildClassifiers();
    
    // save memory
    m_data = null;
  }
 
Example 4
Source File: Vote.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Buildclassifier selects a classifier from the set of classifiers by
 * minimising error on the training data.
 * 
 * @param data the training data to be used for generating the boosted
 *          classifier.
 * @throws Exception if the classifier could not be built successfully
 */
@Override
public void buildClassifier(Instances data) throws Exception {

  // remove instances with missing class
  Instances newData = new Instances(data);
  newData.deleteWithMissingClass();
  m_structure = new Instances(newData, 0);

  m_Random = new Random(getSeed());

  if (m_classifiersToLoad.size() > 0) {
    m_preBuiltClassifiers.clear();
    loadClassifiers(data);

    int index = 0;
    if (m_Classifiers.length == 1
        && m_Classifiers[0] instanceof weka.classifiers.rules.ZeroR) {
      // remove the single ZeroR
      m_Classifiers = new Classifier[0];
    }
  }

  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  for (int i = 0; i < m_Classifiers.length; i++) {
    getClassifier(i).buildClassifier(newData);
  }
}
 
Example 5
Source File: Stacking.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Buildclassifier selects a classifier from the set of classifiers
 * by minimising error on the training data.
 *
 * @param data the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

  if (m_MetaClassifier == null) {
    throw new IllegalArgumentException("No meta classifier has been set");
  }

  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances newData = new Instances(data);
  m_BaseFormat = new Instances(data, 0);
  newData.deleteWithMissingClass();
  
  Random random = new Random(m_Seed);
  newData.randomize(random);
  if (newData.classAttribute().isNominal()) {
    newData.stratify(m_NumFolds);
  }

  // Create meta level
  generateMetaLevel(newData, random);

  // restart the executor pool because at the end of processing
  // a set of classifiers it gets shutdown to prevent the program
  // executing as a server
  super.buildClassifier(newData);
  
  // Rebuild all the base classifiers on the full training data
  buildClassifiers(newData);
}
 
Example 6
Source File: Ridor.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Builds a single rule learner with REP dealing with 2 classes.
    * This rule learner always tries to predict the class with label 
    * m_Class.
    *
    * @param instances the training data
    * @throws Exception if classifier can't be built successfully
    */
   public void buildClassifier(Instances instances) throws Exception {
     m_ClassAttribute = instances.classAttribute();
     if (!m_ClassAttribute.isNominal()) 
throw new UnsupportedClassTypeException(" Only nominal class, please.");
     if(instances.numClasses() != 2)
throw new Exception(" Only 2 classes, please.");
    
     Instances data = new Instances(instances);
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" No training data.");
    
     data.deleteWithMissingClass();
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" The class labels of all the training data are missing.");	
    
     if(data.numInstances() < m_Folds)
throw new Exception(" Not enough data for REP.");
    
     m_Antds = new FastVector();	
    
     /* Split data into Grow and Prune*/
     m_Random = new Random(m_Seed);
     data.randomize(m_Random);
     data.stratify(m_Folds);
     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);
    
     grow(growData);      // Build this rule
    
     prune(pruneData);    // Prune this rule
   }
 
Example 7
Source File: LMT.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Builds the classifier.
 *
 * @param data the data to train with
 * @throws Exception if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception{
	
  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances filteredData = new Instances(data);
  filteredData.deleteWithMissingClass();
  
  //replace missing values
  m_replaceMissing = new ReplaceMissingValues();
  m_replaceMissing.setInputFormat(filteredData);	
  filteredData = Filter.useFilter(filteredData, m_replaceMissing);	
	
  //possibly convert nominal attributes globally
  if (m_convertNominal) {	    
    m_nominalToBinary = new NominalToBinary();
    m_nominalToBinary.setInputFormat(filteredData);	
    filteredData = Filter.useFilter(filteredData, m_nominalToBinary);
  }

  int minNumInstances = 2;
	
  //create ModelSelection object, either for splits on the residuals or for splits on the class value 
  ModelSelection modSelection;	
  if (m_splitOnResiduals) {
    modSelection = new ResidualModelSelection(minNumInstances);
  } else {
    modSelection = new C45ModelSelection(minNumInstances, filteredData, true);
  }
	
  //create tree root
  m_tree = new LMTNode(modSelection, m_numBoostingIterations, m_fastRegression, 
	 m_errorOnProbabilities, m_minNumInstances, m_weightTrimBeta, m_useAIC);
  //build tree
  m_tree.buildClassifier(filteredData);

  if (modSelection instanceof C45ModelSelection) ((C45ModelSelection)modSelection).cleanup();
}
 
Example 8
Source File: DMNBtext.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Generates the classifier.
 *
 * @param instances set of instances serving as training data
 * @exception Exception if the classifier has not been generated successfully
 */
public void buildClassifier(Instances data) throws Exception {
  // can classifier handle the data?
  getCapabilities().testWithFail(data);
  // remove instances with missing class
  Instances instances =  new Instances(data);
  instances.deleteWithMissingClass();

  m_binaryClassifiers = new DNBBinary[instances.numClasses()];
  m_numClasses=instances.numClasses();
  m_headerInfo = new Instances(instances, 0);
  for (int i = 0; i < instances.numClasses(); i++) {
    m_binaryClassifiers[i] = new DNBBinary();
    m_binaryClassifiers[i].setTargetClass(i);
    m_binaryClassifiers[i].initClassifier(instances);
  }

  if (instances.numInstances() == 0)
    return;
  //Iterative update
  Random random = new Random();
  for (int it = 0; it < m_NumIterations; it++) {
    for (int i = 0; i < instances.numInstances(); i++) {
      updateClassifier(instances.instance(i));
    }
  }

  //  Utils.normalize(m_oldClassDis);
  // Utils.normalize(m_ClassDis);
  // m_originalPositive = m_oldClassDis[0];
  //   m_positive = m_ClassDis[0];

}
 
Example 9
Source File: NSR.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
public Instances convertInstances(Instances D, int L) throws Exception {

		//Gather combinations
		HashMap<String,Integer> distinctCombinations = MLUtils.classCombinationCounts(D);
		if(getDebug())
			System.out.println("Found "+distinctCombinations.size()+" unique combinations");

		//Prune combinations
		MLUtils.pruneCountHashMap(distinctCombinations,m_P);
		if(getDebug())
			System.out.println("Pruned to "+distinctCombinations.size()+" with P="+m_P);

		// Remove all class attributes
		Instances D_ = MLUtils.deleteAttributesAt(new Instances(D),MLUtils.gen_indices(L));
		// Add a new class attribute
		D_.insertAttributeAt(new Attribute("CLASS", new ArrayList(distinctCombinations.keySet())),0); // create the class attribute
		D_.setClassIndex(0);

		//Add class values
		for (int i = 0; i < D.numInstances(); i++) {
			String y = MLUtils.encodeValue(MLUtils.toIntArray(D.instance(i),L));
			// add it
			if(distinctCombinations.containsKey(y)) 	//if its class value exists
				D_.instance(i).setClassValue(y);
			// decomp
			else if(m_N > 0) { 
				String d_subsets[] = SuperLabelUtils.getTopNSubsets(y, distinctCombinations, m_N);
				for (String s : d_subsets) {
					int w = distinctCombinations.get(s);
					Instance copy = (Instance)(D_.instance(i)).copy();
					copy.setClassValue(s);
					copy.setWeight(1.0 / d_subsets.length);
					D_.add(copy);
				}
			}
		}

		// remove with missing class
		D_.deleteWithMissingClass();

		// keep the header of new dataset for classification
		m_InstancesTemplate = new Instances(D_, 0);

		if (getDebug())
			System.out.println(""+D_);

		return D_;
	}
 
Example 10
Source File: RaceSearch.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Searches the attribute subset space by racing cross validation
 * errors of competing subsets
 *
 * @param ASEval the attribute evaluator to guide the search
 * @param data the training instances.
 * @return an array (not necessarily ordered) of selected attribute indexes
 * @throws Exception if the search can't be completed
 */
public int[] search (ASEvaluation ASEval, Instances data)
  throws Exception {
  if (!(ASEval instanceof SubsetEvaluator)) {
    throw  new Exception(ASEval.getClass().getName() 
                         + " is not a " 
                         + "Subset evaluator! (RaceSearch)");
  }

  if (ASEval instanceof UnsupervisedSubsetEvaluator) {
    throw new Exception("Can't use an unsupervised subset evaluator "
                        +"(RaceSearch).");
  }

  if (!(ASEval instanceof HoldOutSubsetEvaluator)) {
    throw new Exception("Must use a HoldOutSubsetEvaluator, eg. "
                        +"weka.attributeSelection.ClassifierSubsetEval "
                        +"(RaceSearch)");
  }

  if (!(ASEval instanceof ErrorBasedMeritEvaluator)) {
    throw new Exception("Only error based subset evaluators can be used, "
                        +"eg. weka.attributeSelection.ClassifierSubsetEval "
                        +"(RaceSearch)");
  }

  m_Instances = new Instances(data);
  m_Instances.deleteWithMissingClass();
  if (m_Instances.numInstances() == 0) {
    throw new Exception("All train instances have missing class! (RaceSearch)");
  }
  if (m_rankingRequested && m_numToSelect > m_Instances.numAttributes()-1) {
    throw new Exception("More attributes requested than exist in the data "
                        +"(RaceSearch).");
  }
  m_theEvaluator = (HoldOutSubsetEvaluator)ASEval;
  m_numAttribs = m_Instances.numAttributes();
  m_classIndex = m_Instances.classIndex();

  if (m_rankingRequested) {
    m_rankedAtts = new double[m_numAttribs-1][2];
    m_rankedSoFar = 0;
  }

  if (m_xvalType == LEAVE_ONE_OUT) {
    m_numFolds = m_Instances.numInstances();
  } else {
    m_numFolds = 10;
  }

  Random random = new Random(1); // I guess this should really be a parameter?
  m_Instances.randomize(random);
  int [] bestSubset=null;

  switch (m_raceType) {
  case FORWARD_RACE:
  case BACKWARD_RACE: 
    bestSubset = hillclimbRace(m_Instances, random);
    break;
  case SCHEMATA_RACE:
    bestSubset = schemataRace(m_Instances, random);
    break;
  case RANK_RACE:
    bestSubset = rankRace(m_Instances, random);
    break;
  }

  return bestSubset;
}
 
Example 11
Source File: MajorityConfidenceVote.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Builds the ensemble by assessing the classifier weights using a cross
 * validation of each classifier of the ensemble and then training the
 * classifiers using the complete <code>data</code>.
 *
 * @param data
 *            Training instances
 */
@Override
public void buildClassifier(final Instances data) throws Exception {

	this.classifierWeights = new double[this.m_Classifiers.length];

	// remove instances with missing class
	Instances newData = new Instances(data);
	newData.deleteWithMissingClass();
	this.m_structure = new Instances(newData, 0);

	// can classifier handle the data?
	this.getCapabilities().testWithFail(data);

	for (int i = 0; i < this.m_Classifiers.length; i++) {
		if (Thread.currentThread().isInterrupted()) {
			throw new InterruptedException();
		}

		// Perform cross validation to determine the classifier weights
		for (int n = 0; n < this.numFolds; n++) {
			Instances train = data.trainCV(this.numFolds, n, new Random(this.seed));
			Instances test = data.testCV(this.numFolds, n);

			this.getClassifier(i).buildClassifier(train);
			Evaluation eval = new Evaluation(train);
			eval.evaluateModel(this.getClassifier(i), test);
			this.classifierWeights[i] += eval.pctCorrect() / 100d;
		}

		this.classifierWeights[i] = Math.pow(this.classifierWeights[i], 2);
		this.classifierWeights[i] /= this.numFolds;

		this.getClassifier(i).buildClassifier(newData);
	}

	// If no classifier predicted something correctly, assume uniform distribution
	if (Arrays.stream(this.classifierWeights).allMatch(d -> d < 0.000001d)) {
		for (int i = 0; i < this.classifierWeights.length; i++) {
			this.classifierWeights[i] = 1d / this.classifierWeights.length;
		}
	}
}
 
Example 12
Source File: OneR.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Generates the classifier.
  *
  * @param instances the instances to be used for building the classifier
  * @throws Exception if the classifier can't be built successfully
  */
 public void buildClassifier(Instances instances) 
   throws Exception {
   
   boolean noRule = true;

   // can classifier handle the data?
   getCapabilities().testWithFail(instances);

   // remove instances with missing class
   Instances data = new Instances(instances);
   data.deleteWithMissingClass();

   // only class? -> build ZeroR model
   if (data.numAttributes() == 1) {
     System.err.println(
  "Cannot build model (only class attribute present in data!), "
  + "using ZeroR model instead!");
     m_ZeroR = new weka.classifiers.rules.ZeroR();
     m_ZeroR.buildClassifier(data);
     return;
   }
   else {
     m_ZeroR = null;
   }
   
   // for each attribute ...
   Enumeration enu = instances.enumerateAttributes();
   while (enu.hasMoreElements()) {
     try {
OneRRule r = newRule((Attribute) enu.nextElement(), data);

// if this attribute is the best so far, replace the rule
if (noRule || r.m_correct > m_rule.m_correct) {
  m_rule = r;
}
noRule = false;
     } catch (Exception ex) {
     }
   }
   
   if (noRule)
     throw new WekaException("No attributes found to work with!");
 }
 
Example 13
Source File: ConjunctiveRule.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Builds a single rule learner with REP dealing with nominal classes or
  * numeric classes.
  * For nominal classes, this rule learner predicts a distribution on
  * the classes.
  * For numeric classes, this learner predicts a single value.
  *
  * @param instances the training data
  * @throws Exception if classifier can't be built successfully
  */
 public void buildClassifier(Instances instances) throws Exception {
   // can classifier handle the data?
   getCapabilities().testWithFail(instances);

   // remove instances with missing class
   Instances data = new Instances(instances);
   data.deleteWithMissingClass();
   
   if(data.numInstances() < m_Folds)
     throw new Exception("Not enough data for REP.");

   m_ClassAttribute = data.classAttribute();
   if(m_ClassAttribute.isNominal())
     m_NumClasses = m_ClassAttribute.numValues();
   else
     m_NumClasses = 1;

   m_Antds = new FastVector();
   m_DefDstr = new double[m_NumClasses];
   m_Cnsqt = new double[m_NumClasses];
   m_Targets = new FastVector();	    
   m_Random = new Random(m_Seed);
   
   if(m_NumAntds != -1){
     grow(data);
   }
   else{

     data.randomize(m_Random);

     // Split data into Grow and Prune	   
     data.stratify(m_Folds);

     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);

     grow(growData);      // Build this rule  
     prune(pruneData);    // Prune this rule		  	  
   }

   if(m_ClassAttribute.isNominal()){			   
     Utils.normalize(m_Cnsqt);
     if(Utils.gr(Utils.sum(m_DefDstr), 0))
Utils.normalize(m_DefDstr);
   }	
 }
 
Example 14
Source File: Ridor.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Builds a ripple-down manner rule learner.
 *
 * @param instances the training data
 * @throws Exception if classifier can't be built successfully
 */
public void buildClassifier(Instances instances) throws Exception {

  // can classifier handle the data?
  getCapabilities().testWithFail(instances);

  // remove instances with missing class
  Instances data = new Instances(instances);
  data.deleteWithMissingClass();
  
  int numCl = data.numClasses();
  m_Root = new Ridor_node();
  m_Class = instances.classAttribute();     // The original class label
	
  int index = data.classIndex();
  m_Cover = data.sumOfWeights();

  m_Random = new Random(m_Seed);
	
  /* Create a binary attribute */
  FastVector binary_values = new FastVector(2);
  binary_values.addElement("otherClasses");
  binary_values.addElement("defClass");
  Attribute attr = new Attribute ("newClass", binary_values);
  data.insertAttributeAt(attr, index);	
  data.setClassIndex(index);                 // The new class label

  /* Partition the data into bags according to their original class values */
  Instances[] dataByClass = new Instances[numCl];
  for(int i=0; i < numCl; i++)
    dataByClass[i] = new Instances(data, data.numInstances()); // Empty bags
  for(int i=0; i < data.numInstances(); i++){ // Partitioning
    Instance inst = data.instance(i);
    inst.setClassValue(0);           // Set new class vaue to be 0
    dataByClass[(int)inst.value(index+1)].add(inst); 
  }	
	
  for(int i=0; i < numCl; i++)    
    dataByClass[i].deleteAttributeAt(index+1);   // Delete original class
	
  m_Root.findRules(dataByClass, 0);
  
}
 
Example 15
Source File: FT.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Builds the classifier.
 *
 * @param data the data to train with
 * @throws Exception if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception{
	
    
  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances filteredData = new Instances(data);
  filteredData.deleteWithMissingClass();
  
  //replace missing values
  m_replaceMissing = new ReplaceMissingValues();
  m_replaceMissing.setInputFormat(filteredData);	
  filteredData = Filter.useFilter(filteredData, m_replaceMissing);
  
  //possibly convert nominal attributes globally
  if (m_convertNominal) {	    
    m_nominalToBinary = new NominalToBinary();
    m_nominalToBinary.setInputFormat(filteredData);	
    filteredData = Filter.useFilter(filteredData, m_nominalToBinary);
  }
	
  int minNumInstances = 2;  
  
  
  //create a FT  tree root
  if (m_modelType==0)
    m_tree = new FTNode( m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, 
                         m_weightTrimBeta, m_useAIC);
                     
  //create a FTLeaves  tree root
  if (m_modelType==1){ 
    m_tree = new FTLeavesNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, 
                              m_weightTrimBeta, m_useAIC);
  }
  //create a FTInner  tree root
  if (m_modelType==2)
    m_tree = new FTInnerNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, 
                             m_weightTrimBeta, m_useAIC);
      
  //build tree
  m_tree.buildClassifier(filteredData);
  // prune tree
  m_tree.prune();
  m_tree.assignIDs(0);
  m_tree.cleanup();         
}
 
Example 16
Source File: AttributeSelectedClassifier.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Build the classifier on the dimensionally reduced data.
 *
 * @param data the training data
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {
  if (m_Classifier == null) {
    throw new Exception("No base classifier has been set!");
  }

  if (m_Evaluator == null) {
    throw new Exception("No attribute evaluator has been set!");
  }

  if (m_Search == null) {
    throw new Exception("No search method has been set!");
  }
 
  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances newData = new Instances(data);
  newData.deleteWithMissingClass();
  
  if (newData.numInstances() == 0) {
    m_Classifier.buildClassifier(newData);
    return;
  }
  if (newData.classAttribute().isNominal()) {
    m_numClasses = newData.classAttribute().numValues();
  } else {
    m_numClasses = 1;
  }

  Instances resampledData = null;
  // check to see if training data has all equal weights
  double weight = newData.instance(0).weight();
  boolean ok = false;
  for (int i = 1; i < newData.numInstances(); i++) {
    if (newData.instance(i).weight() != weight) {
      ok = true;
      break;
    }
  }
  
  if (ok) {
    if (!(m_Evaluator instanceof WeightedInstancesHandler) || 
        !(m_Classifier instanceof WeightedInstancesHandler)) {
      Random r = new Random(1);
      for (int i = 0; i < 10; i++) {
        r.nextDouble();
      }
      resampledData = newData.resampleWithWeights(r);
    }
  } else {
    // all equal weights in the training data so just use as is
    resampledData = newData;
  }

  m_AttributeSelection = new AttributeSelection();
  m_AttributeSelection.setEvaluator(m_Evaluator);
  m_AttributeSelection.setSearch(m_Search);
  long start = System.currentTimeMillis();
  m_AttributeSelection.
    SelectAttributes((m_Evaluator instanceof WeightedInstancesHandler) 
                     ? newData
                     : resampledData);
  long end = System.currentTimeMillis();
  if (m_Classifier instanceof WeightedInstancesHandler) {
    newData = m_AttributeSelection.reduceDimensionality(newData);
    m_Classifier.buildClassifier(newData);
  } else {
    resampledData = m_AttributeSelection.reduceDimensionality(resampledData);
    m_Classifier.buildClassifier(resampledData);
  }

  long end2 = System.currentTimeMillis();
  m_numAttributesSelected = m_AttributeSelection.numberAttributesSelected();
  m_ReducedHeader = 
    new Instances((m_Classifier instanceof WeightedInstancesHandler) ?
                  newData
                  : resampledData, 0);
  m_selectionTime = (double)(end - start);
  m_totalTime = (double)(end2 - start);
}
 
Example 17
Source File: CVParameterSelection.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Generates the classifier.
 *
 * @param instances set of instances serving as training data 
 * @throws Exception if the classifier has not been generated successfully
 */
public void buildClassifier(Instances instances) throws Exception {

  // can classifier handle the data?
  getCapabilities().testWithFail(instances);

  // remove instances with missing class
  Instances trainData = new Instances(instances);
  trainData.deleteWithMissingClass();
  
  if (!(m_Classifier instanceof OptionHandler)) {
    throw new IllegalArgumentException("Base classifier should be OptionHandler.");
  }
  m_InitOptions = ((OptionHandler)m_Classifier).getOptions();
  m_BestPerformance = -99;
  m_NumAttributes = trainData.numAttributes();
  Random random = new Random(m_Seed);
  trainData.randomize(random);
  m_TrainFoldSize = trainData.trainCV(m_NumFolds, 0).numInstances();

  // Check whether there are any parameters to optimize
  if (m_CVParams.size() == 0) {
     m_Classifier.buildClassifier(trainData);
     m_BestClassifierOptions = m_InitOptions;
     return;
  }

  if (trainData.classAttribute().isNominal()) {
    trainData.stratify(m_NumFolds);
  }
  m_BestClassifierOptions = null;
  
  // Set up m_ClassifierOptions -- take getOptions() and remove
  // those being optimised.
  m_ClassifierOptions = ((OptionHandler)m_Classifier).getOptions();
  for (int i = 0; i < m_CVParams.size(); i++) {
    Utils.getOption(((CVParameter)m_CVParams.elementAt(i)).m_ParamChar,
      m_ClassifierOptions);
  }
  findParamsByCrossValidation(0, trainData, random);

  String [] options = (String [])m_BestClassifierOptions.clone();
  ((OptionHandler)m_Classifier).setOptions(options);
  m_Classifier.buildClassifier(trainData);
}
 
Example 18
Source File: RacedIncrementalLogitBoost.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/** 
    * performs a boosting iteration, returning a new model for the committee
    * 
    * @param data the data to boost on
    * @return the new model
    * @throws Exception if anything goes wrong
    */
   protected Classifier[] boost(Instances data) throws Exception {
     
     Classifier[] newModel = AbstractClassifier.makeCopies(m_Classifier, m_NumClasses);
     
     // Create a copy of the data with the class transformed into numeric
     Instances boostData = new Instances(data);
     boostData.deleteWithMissingClass();
     int numInstances = boostData.numInstances();
     
     // Temporarily unset the class index
     int classIndex = data.classIndex();
     boostData.setClassIndex(-1);
     boostData.deleteAttributeAt(classIndex);
     boostData.insertAttributeAt(new Attribute("'pseudo class'"), classIndex);
     boostData.setClassIndex(classIndex);
     double [][] trainFs = new double [numInstances][m_NumClasses];
     double [][] trainYs = new double [numInstances][m_NumClasses];
     for (int j = 0; j < m_NumClasses; j++) {
for (int i = 0, k = 0; i < numInstances; i++, k++) {
  while (data.instance(k).classIsMissing()) k++;
  trainYs[i][j] = (data.instance(k).classValue() == j) ? 1 : 0;
}
     }
     
     // Evaluate / increment trainFs from the classifiers
     for (int x = 0; x < m_models.size(); x++) {
for (int i = 0; i < numInstances; i++) {
  double [] pred = new double [m_NumClasses];
  double predSum = 0;
  Classifier[] model = (Classifier[]) m_models.elementAt(x);
  for (int j = 0; j < m_NumClasses; j++) {
    pred[j] = model[j].classifyInstance(boostData.instance(i));
    predSum += pred[j];
  }
  predSum /= m_NumClasses;
  for (int j = 0; j < m_NumClasses; j++) {
    trainFs[i][j] += (pred[j] - predSum) * (m_NumClasses-1) 
      / m_NumClasses;
  }
}
     }

     for (int j = 0; j < m_NumClasses; j++) {

// Set instance pseudoclass and weights
for (int i = 0; i < numInstances; i++) {
  double p = RtoP(trainFs[i], j);
  Instance current = boostData.instance(i);
  double z, actual = trainYs[i][j];
  if (actual == 1) {
    z = 1.0 / p;
    if (z > Z_MAX) { // threshold
      z = Z_MAX;
    }
  } else if (actual == 0) {
    z = -1.0 / (1.0 - p);
    if (z < -Z_MAX) { // threshold
      z = -Z_MAX;
    }
  } else {
    z = (actual - p) / (p * (1 - p));
  }

  double w = (actual - p) / z;
  current.setValue(classIndex, z);
  current.setWeight(numInstances * w);
}

Instances trainData = boostData;
if (m_UseResampling) {
  double[] weights = new double[boostData.numInstances()];
  for (int kk = 0; kk < weights.length; kk++) {
    weights[kk] = boostData.instance(kk).weight();
  }
  trainData = boostData.resampleWithWeights(m_RandomInstance, 
					    weights);
}

// Build the classifier
newModel[j].buildClassifier(trainData);
     }      
     
     return newModel;
   }
 
Example 19
Source File: RandomSubSpace.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * builds the classifier.
  *
  * @param data 	the training data to be used for generating the
  * 			classifier.
  * @throws Exception 	if the classifier could not be built successfully
  */
 public void buildClassifier(Instances data) throws Exception {

   // can classifier handle the data?
   getCapabilities().testWithFail(data);

   // remove instances with missing class
   m_data = new Instances(data);
   m_data.deleteWithMissingClass();
   
   // only class? -> build ZeroR model
   if (m_data.numAttributes() == 1) {
     System.err.println(
  "Cannot build model (only class attribute present in data!), "
  + "using ZeroR model instead!");
     m_ZeroR = new weka.classifiers.rules.ZeroR();
     m_ZeroR.buildClassifier(m_data);
     return;
   }
   else {
     m_ZeroR = null;
   }
   
   super.buildClassifier(data);

   Integer[] indices = new Integer[data.numAttributes()-1];
   int classIndex = data.classIndex();
   int offset = 0;
   for(int i = 0; i < indices.length+1; i++) {
     if (i != classIndex) {
indices[offset++] = i+1;
     }
   }
   int subSpaceSize = numberOfAttributes(indices.length, getSubSpaceSize());
   Random random = data.getRandomNumberGenerator(m_Seed);
   
   for (int j = 0; j < m_Classifiers.length; j++) {
     if (m_Classifier instanceof Randomizable) {
((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
     }
     FilteredClassifier fc = new FilteredClassifier();
     fc.setClassifier(m_Classifiers[j]);
     m_Classifiers[j] = fc;
     Remove rm = new Remove();
     rm.setOptions(new String[]{"-V", "-R", randomSubSpace(indices,subSpaceSize,classIndex+1,random)});
     fc.setFilter(rm);

     // build the classifier
     //m_Classifiers[j].buildClassifier(m_data);
   }
   
   buildClassifiers();
   
   // save memory
   m_data = null;
 }
 
Example 20
Source File: AdditiveRegression.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Build the classifier on the supplied data
  *
  * @param data the training data
  * @throws Exception if the classifier could not be built successfully
  */
 public void buildClassifier(Instances data) throws Exception {

   super.buildClassifier(data);

   // can classifier handle the data?
   getCapabilities().testWithFail(data);

   // remove instances with missing class
   Instances newData = new Instances(data);
   newData.deleteWithMissingClass();

   double sum = 0;
   double temp_sum = 0;
   // Add the model for the mean first
   m_zeroR = new ZeroR();
   m_zeroR.buildClassifier(newData);
   
   // only class? -> use only ZeroR model
   if (newData.numAttributes() == 1) {
     System.err.println(
  "Cannot build model (only class attribute present in data!), "
  + "using ZeroR model instead!");
     m_SuitableData = false;
     return;
   }
   else {
     m_SuitableData = true;
   }
   
   newData = residualReplace(newData, m_zeroR, false);
   for (int i = 0; i < newData.numInstances(); i++) {
     sum += newData.instance(i).weight() *
newData.instance(i).classValue() * newData.instance(i).classValue();
   }
   if (m_Debug) {
     System.err.println("Sum of squared residuals "
		 +"(predicting the mean) : " + sum);
   }

   m_NumIterationsPerformed = 0;
   do {
     temp_sum = sum;

     // Build the classifier
     m_Classifiers[m_NumIterationsPerformed].buildClassifier(newData);

     newData = residualReplace(newData, m_Classifiers[m_NumIterationsPerformed], true);
     sum = 0;
     for (int i = 0; i < newData.numInstances(); i++) {
sum += newData.instance(i).weight() *
  newData.instance(i).classValue() * newData.instance(i).classValue();
     }
     if (m_Debug) {
System.err.println("Sum of squared residuals : "+sum);
     }
     m_NumIterationsPerformed++;
   } while (((temp_sum - sum) > Utils.SMALL) && 
     (m_NumIterationsPerformed < m_Classifiers.length));
 }