Java Code Examples for weka.core.Utils#eq()

The following examples show how to use weka.core.Utils#eq() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FastRandomTree.java    From android-speaker-audioanalysis with MIT License 6 votes vote down vote up
/**
 * Normalizes branch sizes so they contain frequencies (stored in "props")
 * instead of counts (stored in "dist"). Creates a new double[] which it 
 * returns.
 */  
protected static double[] countsToFreqs( double[][] dist ) {
  
  double[] props = new double[dist.length];
  
  for (int k = 0; k < props.length; k++) {
    props[k] = Utils.sum(dist[k]);
  }
  if (Utils.eq(Utils.sum(props), 0)) {
    for (int k = 0; k < props.length; k++) {
      props[k] = 1.0 / (double) props.length;
    }
  } else {
    FastRfUtils.normalize(props);
  }
  return props;
}
 
Example 2
Source File: END.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance the instance to be classified
  * @return preedicted class probability distribution
  * @throws Exception if distribution can't be computed successfully 
  */
 public double[] distributionForInstance(Instance instance) throws Exception {
   
   double [] sums = new double [instance.numClasses()], newProbs; 
   
   for (int i = 0; i < m_NumIterations; i++) {
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(instance);
     } else {
newProbs = m_Classifiers[i].distributionForInstance(instance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)m_NumIterations;
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }
 
Example 3
Source File: Bagging.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance the instance to be classified
  * @return predicted class probability distribution
  * @throws Exception if distribution can't be computed successfully 
  */
 public double[] distributionForInstance(Instance instance) throws Exception {

   double [] sums = new double [instance.numClasses()], newProbs; 
   for (int i = 0; i < m_NumIterations; i++) {
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(instance);
     } else {
newProbs = m_Classifiers[i].distributionForInstance(instance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)m_NumIterations;
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }
 
Example 4
Source File: InfoGainSplitCrit.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * This method computes the information gain in the same way 
 * C4.5 does.
 *
 * @param bags the distribution
 * @param totalNoInst weight of ALL instances (including the
 * ones with missing values).
 */
public final double splitCritValue(Distribution bags, double totalNoInst) {
  
  double numerator;
  double noUnknown;
  double unknownRate;
  int i;
  
  noUnknown = totalNoInst-bags.total();
  unknownRate = noUnknown/totalNoInst;
  numerator = (oldEnt(bags)-newEnt(bags));
  numerator = (1-unknownRate)*numerator;
  
  // Splits with no gain are useless.
  if (Utils.eq(numerator,0))
    return 0;
  
  return numerator/bags.total();
}
 
Example 5
Source File: SMO.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Quick and dirty check whether the quadratic programming problem is solved.
    * 
    * @throws Exception if checking fails
    */
   protected void checkClassifier() throws Exception {

     double sum = 0;
     for (int i = 0; i < m_alpha.length; i++) {
if (m_alpha[i] > 0) {
  sum += m_class[i] * m_alpha[i];
}
     }
     System.err.println("Sum of y(i) * alpha(i): " + sum);

     for (int i = 0; i < m_alpha.length; i++) {
double output = SVMOutput(i, m_data.instance(i));
if (Utils.eq(m_alpha[i], 0)) {
  if (Utils.sm(m_class[i] * output, 1)) {
    System.err.println("KKT condition 1 violated: " + m_class[i] * output);
  }
} 
if (Utils.gr(m_alpha[i], 0) && 
    Utils.sm(m_alpha[i], m_C * m_data.instance(i).weight())) {
  if (!Utils.eq(m_class[i] * output, 1)) {
    System.err.println("KKT condition 2 violated: " + m_class[i] * output);
  }
} 
if (Utils.eq(m_alpha[i], m_C * m_data.instance(i).weight())) {
  if (Utils.gr(m_class[i] * output, 1)) {
    System.err.println("KKT condition 3 violated: " + m_class[i] * output);
  }
} 
     }
   }
 
Example 6
Source File: Ridor.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Builds a single rule learner with REP dealing with 2 classes.
    * This rule learner always tries to predict the class with label 
    * m_Class.
    *
    * @param instances the training data
    * @throws Exception if classifier can't be built successfully
    */
   public void buildClassifier(Instances instances) throws Exception {
     m_ClassAttribute = instances.classAttribute();
     if (!m_ClassAttribute.isNominal()) 
throw new UnsupportedClassTypeException(" Only nominal class, please.");
     if(instances.numClasses() != 2)
throw new Exception(" Only 2 classes, please.");
    
     Instances data = new Instances(instances);
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" No training data.");
    
     data.deleteWithMissingClass();
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" The class labels of all the training data are missing.");	
    
     if(data.numInstances() < m_Folds)
throw new Exception(" Not enough data for REP.");
    
     m_Antds = new FastVector();	
    
     /* Split data into Grow and Prune*/
     m_Random = new Random(m_Seed);
     data.randomize(m_Random);
     data.stratify(m_Folds);
     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);
    
     grow(growData);      // Build this rule
    
     prune(pruneData);    // Prune this rule
   }
 
Example 7
Source File: RandomSubSpace.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance 	the instance to be classified
  * @return 		preedicted class probability distribution
  * @throws Exception 	if distribution can't be computed successfully 
  */
 public double[] distributionForInstance(Instance instance) throws Exception {

   // default model?
   if (m_ZeroR != null) {
     return m_ZeroR.distributionForInstance(instance);
   }
   
   double[] sums = new double [instance.numClasses()], newProbs; 
   
   for (int i = 0; i < m_NumIterations; i++) {
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(instance);
     } else {
newProbs = m_Classifiers[i].distributionForInstance(instance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)m_NumIterations;
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }
 
Example 8
Source File: Ridor.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Private function to compute default number of accurate instances
    * in the specified data for m_Class
    * 
    * @param data the data in question
    * @return the default accuracy number
    */
   private double computeDefAccu(Instances data){ 
     double defAccu=0;
     for(int i=0; i<data.numInstances(); i++){
Instance inst = data.instance(i);
if(Utils.eq(inst.classValue(), m_Class))
  defAccu += inst.weight();
     }
     return defAccu;
   }
 
Example 9
Source File: CollectiveInstances.java    From collective-classification-weka-package with GNU General Public License v3.0 5 votes vote down vote up
/**
 * flips labels in the part of the given set, with the current flipping'
 * algorithm. 
 * @param c             the current collective classifier
 * @param instances     the instances to work on
 * @param from          the first instance to flip
 * @param count         the number of instances to flip
 * @param history       the flipping history
 * @return              the flipped instances
 * @throws Exception    if something goes wrong
 * @see                 #setFlipper(Flipper)
 */
public Instances flipLabels( Classifier c, Instances instances,
                             int from, int count, FlipHistory history ) 
  throws Exception {

  int         i;
  double      oldLabel;
  double      newLabel;
  
  // reset flip count
  m_FlippedLabels = 0;
  
  m_Flipper.setRandom(m_Random);

  for (i = from; i < from + count; i++) {
    oldLabel = instances.instance(i).classValue();
    newLabel = m_Flipper.flipLabel(c, instances, from, count, i, history);

    instances.instance(i).setClassValue(newLabel);
    
    // keep track of flipped labels
    if (!Utils.eq(oldLabel, newLabel))
      m_FlippedLabels += 1.0 / count;
  }

  return instances;
}
 
Example 10
Source File: ClassifierTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Builds the tree structure with hold out set
  *
  * @param train the data for which the tree structure is to be
  * generated.
  * @param test the test data for potential pruning
  * @param keepData is training Data to be kept?
  * @throws Exception if something goes wrong
  */
 public void buildTree(Instances train, Instances test, boolean keepData)
      throws Exception {
   
   Instances [] localTrain, localTest;
   int i;
   
   if (keepData) {
     m_train = train;
   }
   m_isLeaf = false;
   m_isEmpty = false;
   m_sons = null;
   m_localModel = m_toSelectModel.selectModel(train, test);
   m_test = new Distribution(test, m_localModel);
   if (m_localModel.numSubsets() > 1) {
     localTrain = m_localModel.split(train);
     localTest = m_localModel.split(test);
     train = test = null;
     m_sons = new ClassifierTree [m_localModel.numSubsets()];
     for (i=0;i<m_sons.length;i++) {
m_sons[i] = getNewTree(localTrain[i], localTest[i]);
localTrain[i] = null;
localTest[i] = null;
     }
   }else{
     m_isLeaf = true;
     if (Utils.eq(train.sumOfWeights(), 0))
m_isEmpty = true;
     train = test = null;
   }
 }
 
Example 11
Source File: C45PruneableClassifierTreeG.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Computes estimated errors for leaf.
  *
  * @param theDistribution the distribution to use
  * @return the estimated errors
  */
 private double getEstimatedErrorsForDistribution(Distribution 
					   theDistribution){

   if (Utils.eq(theDistribution.total(),0))
     return 0;
   else
     return theDistribution.numIncorrect()+
Stats.addErrs(theDistribution.total(),
	      theDistribution.numIncorrect(),m_CF);
 }
 
Example 12
Source File: FTtree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Computes estimated errors for leaf.
 */
protected double getEstimatedErrorsForDistribution(Distribution
                                                   theDistribution){
  double numInc;
  double numTotal;
  if (Utils.eq(theDistribution.total(),0))
    return 0;
  else// stats.addErrs returns p - numberofincorrect.=p
    {
      numInc=theDistribution.numIncorrect();
      numTotal=theDistribution.total();
      return ((Stats.addErrs(numTotal, numInc,m_CF)) + numInc)/numTotal;
    }

}
 
Example 13
Source File: Ridor.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
    * Build one rule using the growing data
    *
    * @param data the growing data used to build the rule
    */    
   private void grow(Instances data){
     Instances growData = new Instances(data);
    
     m_AccuG = computeDefAccu(growData);
     m_CoverG = growData.sumOfWeights();
     /* Compute the default accurate rate of the growing data */
     double defAcRt= m_AccuG / m_CoverG; 
    
     /* Keep the record of which attributes have already been used*/    
     boolean[] used=new boolean [growData.numAttributes()];
     for (int k=0; k<used.length; k++)
used[k]=false;
     int numUnused=used.length;
    
     double maxInfoGain;
     boolean isContinue = true; // The stopping criterion of this rule
    
     while (isContinue){   
maxInfoGain = 0;       // We require that infoGain be positive
	
/* Build a list of antecedents */
Antd oneAntd=null;
Instances coverData = null;
Enumeration enumAttr=growData.enumerateAttributes();	    
int index=-1;  
	
/* Build one condition based on all attributes not used yet*/
while (enumAttr.hasMoreElements()){
  Attribute att= (Attribute)(enumAttr.nextElement());
  index++;
	    
  Antd antd =null;	
  if(att.isNumeric())
    antd = new NumericAntd(att);
  else
    antd = new NominalAntd(att);
	    
  if(!used[index]){
    /* Compute the best information gain for each attribute,
       it's stored in the antecedent formed by this attribute.
       This procedure returns the data covered by the antecedent*/
    Instances coveredData = computeInfoGain(growData, defAcRt, antd);
    if(coveredData != null){
      double infoGain = antd.getMaxInfoGain();			
      if(Utils.gr(infoGain, maxInfoGain)){
	oneAntd=antd;
	coverData = coveredData;  
	maxInfoGain = infoGain;
      }		    
    }
  }
}
	
if(oneAntd == null)	 return;
	
//Numeric attributes can be used more than once
if(!oneAntd.getAttr().isNumeric()){ 
  used[oneAntd.getAttr().index()]=true;
  numUnused--;
}
	
m_Antds.addElement((Object)oneAntd);
growData = coverData;// Grow data size is shrinking 
	
defAcRt = oneAntd.getAccuRate();
	
/* Stop if no more data, rule perfect, no more attributes */
if(Utils.eq(growData.sumOfWeights(), 0.0) || Utils.eq(defAcRt, 1.0) || (numUnused == 0))
  isContinue = false;
     }
   }
 
Example 14
Source File: MIWrapper.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Computes the distribution for a given exemplar
  *
  * @param exmp the exemplar for which distribution is computed
  * @return the distribution
  * @throws Exception if the distribution can't be computed successfully
  */
 public double[] distributionForInstance(Instance exmp) 
   throws Exception {	

   Instances testData = new Instances (exmp.dataset(),0);
   testData.add(exmp);

   // convert the training dataset into single-instance dataset
   m_ConvertToProp.setWeightMethod(
       new SelectedTag(
         MultiInstanceToPropositional.WEIGHTMETHOD_ORIGINAL, 
         MultiInstanceToPropositional.TAGS_WEIGHTMETHOD));
   testData = Filter.useFilter(testData, m_ConvertToProp);
   testData.deleteAttributeAt(0); //remove the bag index attribute

   // Compute the log-probability of the bag
   double [] distribution = new double[m_NumClasses];
   double nI = (double)testData.numInstances();
   double [] maxPr = new double [m_NumClasses];

   for(int i=0; i<nI; i++){
     double[] dist = m_Classifier.distributionForInstance(testData.instance(i));
     for(int j=0; j<m_NumClasses; j++){

       switch(m_Method){
         case TESTMETHOD_ARITHMETIC:
           distribution[j] += dist[j]/nI;
           break;
         case TESTMETHOD_GEOMETRIC:
           // Avoid 0/1 probability
           if(dist[j]<0.001)
             dist[j] = 0.001;
           else if(dist[j]>0.999)
             dist[j] = 0.999;

           distribution[j] += Math.log(dist[j])/nI;
           break;
         case TESTMETHOD_MAXPROB:
           if (dist[j]>maxPr[j]) 
             maxPr[j] = dist[j];
           break;
       }
     }
   }

   if(m_Method == TESTMETHOD_GEOMETRIC)
     for(int j=0; j<m_NumClasses; j++)
       distribution[j] = Math.exp(distribution[j]);

   if(m_Method == TESTMETHOD_MAXPROB){   // for positive bag
     distribution[1] = maxPr[1];
     distribution[0] = 1 - distribution[1];
   }

   if (Utils.eq(Utils.sum(distribution), 0)) {
     for (int i = 0; i < distribution.length; i++)
distribution[i] = 1.0 / (double) distribution.length;
   }
   else {
     Utils.normalize(distribution);
   }
   
   return distribution;
 }
 
Example 15
Source File: DecisionTreeNode.java    From collective-classification-weka-package with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Computes class distribution of an instance using the decision tree.
  * 
  * @param instance	the instance to compute the distribution for
  * @return		the class distribution
  * @throws Exception	if something goes wrong
  */
 public double[] distributionForInstance(Instance instance) throws Exception {
   
   double[] returnedDist = null;
   
   if (getAttribute() > -1) {
     // Node is not a leaf
     if (instance.isMissing(getAttribute())) {
       if (getDebugLevel() > 0)
         System.out.println(toStringNode());

// Value is missing
returnedDist = new double[getInformation().numClasses()];
       
// Split instance up
for (int i = 0; i < getChildCount(); i++) {
  double[] help = getNodeAt(i).distributionForInstance(instance);
         if (getDebugLevel() > 1)
           System.out.println("help: " + Utils.arrayToString(help));
  if (help != null) {
    for (int j = 0; j < help.length; j++) {
      returnedDist[j] += m_Prop[i] * help[j];
    }
  }
}
       if (getDebugLevel() > 1)
         System.out.println(   "--> returnedDist: " 
                             + Utils.arrayToString(returnedDist));
     } 
     else if (getInformation().attribute(getAttribute()).isNominal()) {
// For nominal attributes
       int branch = 0;

       // branch for each nominal value?
       if (getNominalSplit() == null) {
         branch = (int) instance.value(getAttribute());
       }
       else {
         // determine the branch we have to go down
         for (int i = 0; i < getNominalSplit().length; i++) {
           for (int n = 0; n < getNominalSplit()[i].length; n++) {
             if (Utils.eq(instance.value(getAttribute()), 
                          getNominalSplit()[i][n])) {
               branch = i;
               break;
             }
           }
         }
       }

       returnedDist = getNodeAt(branch).distributionForInstance(instance);
     } 
     else {
// For numeric attributes
if (Utils.sm(instance.value(getAttribute()), getSplitPoint())) {
  returnedDist = getNodeAt(0).distributionForInstance(instance);
} 
       else {
  returnedDist = getNodeAt(1).distributionForInstance(instance);
}
     }
   }

   if ((getAttribute() == -1) || (returnedDist == null)) {
     // Node is a leaf or successor is empty
     return getClassProbabilities();
   } 
   else {
     return returnedDist;
   }
 }
 
Example 16
Source File: PairedStats.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {
     double tval = differencesStats.mean
* Math.sqrt(count)
/ differencesStats.stdDev;

     if (m_degreesOfFreedom >= 1){
       differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                        m_degreesOfFreedom);
     } else {
       if (count > 1) {
         differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                          (int) count - 1);
       } else {
         differencesProbability = 1;
       }
     }
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }
 
Example 17
Source File: CorrelationSplitInfo.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Finds the best splitting point for an attribute in the instances
  *
  * @param attr the splitting attribute
  * @param inst the instances
  * @exception Exception if something goes wrong
  */
 public final void attrSplit(int attr, Instances inst) throws Exception {
   int		i;
   int		len;
   int		part;
   int		low = 0;
   int		high = inst.numInstances() - 1;
   PairedStats full = new PairedStats(0.01);
   PairedStats leftSubset = new PairedStats(0.01);
   PairedStats rightSubset = new PairedStats(0.01);
   int		classIndex = inst.classIndex();
   double      leftCorr, rightCorr;
   double      leftVar, rightVar, allVar;
   double      order = 2.0;

   initialize(low, high, attr);

   if (m_number < 4) {
     return;
   } 

   len = ((high - low + 1) < 5) ? 1 : (high - low + 1) / 5;
   m_position = low;
   part = low + len - 1;

   // prime the subsets
   for (i = low; i < len; i++) {
     full.add(inst.instance(i).value(attr), 
       inst.instance(i).value(classIndex));
     leftSubset.add(inst.instance(i).value(attr), 
	     inst.instance(i).value(classIndex));
   } 

   for (i = len; i < inst.numInstances(); i++) {
     full.add(inst.instance(i).value(attr), 
       inst.instance(i).value(classIndex));
     rightSubset.add(inst.instance(i).value(attr), 
	      inst.instance(i).value(classIndex));
   } 

   full.calculateDerived();

   allVar = (full.yStats.stdDev * full.yStats.stdDev);
   allVar = Math.abs(allVar);
   allVar = Math.pow(allVar, (1.0 / order));

   for (i = low + len; i < high - len - 1; i++) {
     rightSubset.subtract(inst.instance(i).value(attr), 
		   inst.instance(i).value(classIndex));
     leftSubset.add(inst.instance(i).value(attr), 
	     inst.instance(i).value(classIndex));

     if (!Utils.eq(inst.instance(i + 1).value(attr), 
	    inst.instance(i).value(attr))) {
leftSubset.calculateDerived();
rightSubset.calculateDerived();

leftCorr = Math.abs(leftSubset.correlation);
rightCorr = Math.abs(rightSubset.correlation);
leftVar = (leftSubset.yStats.stdDev * leftSubset.yStats.stdDev);
leftVar = Math.abs(leftVar);
leftVar = Math.pow(leftVar, (1.0 / order));
rightVar = (rightSubset.yStats.stdDev * rightSubset.yStats.stdDev);
rightVar = Math.abs(rightVar);
rightVar = Math.pow(rightVar, (1.0 / order));

double score = allVar - ((leftSubset.count / full.count) * leftVar) 
	       - ((rightSubset.count / full.count) * rightVar);

// score /= allVar;
leftCorr = (leftSubset.count / full.count) * leftCorr;
rightCorr = (rightSubset.count / full.count) * rightCorr;

double c_score = (leftCorr + rightCorr) - Math.abs(full.correlation);

// c_score += score;
if (!Utils.eq(score, 0.0)) {
  if (score > m_maxImpurity) {
    m_maxImpurity = score;
    m_splitValue = 
      (inst.instance(i).value(attr) + inst.instance(i + 1)
      .value(attr)) * 0.5;
    m_position = i;
  } 
} 
     } 
   } 
 }
 
Example 18
Source File: ClassifierDecList.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Builds the partial tree without hold out set.
  *
  * @exception Exception if something goes wrong
  */
 public void buildDecList(Instances data, boolean leaf) throws Exception {
   
   Instances [] localInstances,localPruneInstances;
   int index,ind;
   int i,j;
   double sumOfWeights;
   NoSplit noSplit;
   
   m_train = null;
   m_test = null;
   m_isLeaf = false;
   m_isEmpty = false;
   m_sons = null;
   indeX = 0;
   sumOfWeights = data.sumOfWeights();
   noSplit = new NoSplit (new Distribution((Instances)data));
   if (leaf)
     m_localModel = noSplit;
   else
     m_localModel = m_toSelectModel.selectModel(data);
   if (m_localModel.numSubsets() > 1) {
     localInstances = m_localModel.split(data);
     data = null;
     m_sons = new ClassifierDecList [m_localModel.numSubsets()];
     i = 0;
     do {
i++;
ind = chooseIndex();
if (ind == -1) {
  for (j = 0; j < m_sons.length; j++) 
    if (m_sons[j] == null)
      m_sons[j] = getNewDecList(localInstances[j],true);
  if (i < 2) {
    m_localModel = noSplit;
    m_isLeaf = true;
    m_sons = null;
    if (Utils.eq(sumOfWeights,0))
      m_isEmpty = true;
    return;
  }
  ind = 0;
  break;
} else 
  m_sons[ind] = getNewDecList(localInstances[ind],false);
     } while ((i < m_sons.length) && (m_sons[ind].m_isLeaf));
     
     // Choose rule
     indeX = chooseLastIndex();
   }else{
     m_isLeaf = true;
     if (Utils.eq(sumOfWeights, 0))
m_isEmpty = true;
   }
 }
 
Example 19
Source File: YATSI.java    From collective-classification-weka-package with GNU General Public License v3.0 4 votes vote down vote up
/**
 * internal function for determining the class distribution for an instance, 
 * will be overridden by derived classes. <br/>
 * 
 * @param instance	the instance to get the distribution for
 * @return		the distribution for the given instance
 * @throws Exception	if something goes wrong
 */
@Override
protected double[] getDistribution(Instance instance) throws Exception {
  int         index;
  int         i;
  double[]    result;
  Instances   neighbors;
  Instance    inst;
  double[]    count;
  double[]    countNum;
  int         labelIndex;

  result = null;

  // find instance
  index = m_Data.indexOf(instance);
  if (index > -1) {
    // get neighbors
    neighbors = m_NNSearch.kNearestNeighbours(
                  m_Data.get(index), m_KNNDetermined);

    // count class label
    count    = new double[neighbors.numClasses()];
    countNum = new double[neighbors.numClasses()];
    for (i = 0; i < neighbors.numInstances(); i++) {
      inst = neighbors.instance(i);
      if (!inst.classIsMissing()) {
        count[(int) inst.classValue()] += inst.weight();
        countNum[(int) inst.classValue()]++;
      }
    }

    // build result
    result = new double[instance.numClasses()];
    for (i = 0; i < result.length; i++)
      result[i] = count[i];
    if (Utils.gr(Utils.sum(result), 0))
      Utils.normalize(result);
    else
      System.out.println(
          "No summed up weights: " + instance 
          + ", counts=" + Utils.arrayToString(countNum));
    labelIndex = Utils.maxIndex(count);
    // is it a clear-cut distribution?
    if (!Utils.eq(Utils.sum(count) - count[labelIndex], 0))
      m_ClearCutDistribution++;
    // did the label change due to weights?
    if (Utils.maxIndex(countNum) != labelIndex)
      m_WeightFlips++;
  }
  else {
    throw new Exception("Cannot find instance: " + instance + "\n" 
        + " -> pos=" + index 
        + " = " + m_Data.get(StrictMath.abs(index)));
  }

  return result;
}
 
Example 20
Source File: Ridor.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Prints this antecedent
 *
 * @return a textual description of this antecedent
 */
public String toString() {
  String symbol = Utils.eq(value, 0.0) ? " <= " : " > ";
  return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
}