weka.core.ContingencyTables Java Examples

The following examples show how to use weka.core.ContingencyTables. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DecisionStump.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Finds best split for nominal attribute and nominal class
  * and returns value.
  *
  * @param index attribute index
  * @return value of criterion for the best split
  * @throws Exception if something goes wrong
  */
 protected double findSplitNominalNominal(int index) throws Exception {

   double bestVal = Double.MAX_VALUE, currVal;
   double[][] counts = new double[m_Instances.attribute(index).numValues() 
			  + 1][m_Instances.numClasses()];
   double[] sumCounts = new double[m_Instances.numClasses()];
   double[][] bestDist = new double[3][m_Instances.numClasses()];
   int numMissing = 0;

   // Compute counts for all the values
   for (int i = 0; i < m_Instances.numInstances(); i++) {
     Instance inst = m_Instances.instance(i);
     if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
  [(int)inst.classValue()] += inst.weight();
     } else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
  .weight();
     }
   }

   // Compute sum of counts
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
     }
   }
   
   // Make split counts for each possible split and evaluate
   System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
	     m_Distribution[2], 0, m_Instances.numClasses());
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
     }
     currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
     if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
  System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, 
		   m_Instances.numClasses());
}
     }
   }

   // No missing values in training data.
   if (numMissing == 0) {
     System.arraycopy(sumCounts, 0, bestDist[2], 0, 
	       m_Instances.numClasses());
   }
  
   m_Distribution = bestDist;
   return bestVal;
 }
 
Example #2
Source File: Discretize.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Test using Fayyad and Irani's MDL criterion.
  *
  * @param priorCounts
  * @param bestCounts
  * @param numInstances
  * @param numCutPoints
  * @return true if the splits is acceptable
  */
 private boolean FayyadAndIranisMDL(double[] priorCounts,
			     double[][] bestCounts,
			     double numInstances,
			     int numCutPoints) {

   double priorEntropy, entropy, gain;
   double entropyLeft, entropyRight, delta;
   int numClassesTotal, numClassesRight, numClassesLeft;

   // Compute entropy before split.
   priorEntropy = ContingencyTables.entropy(priorCounts);

   // Compute entropy after split.
   entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);

   // Compute information gain.
   gain = priorEntropy - entropy;

   // Number of classes occuring in the set
   numClassesTotal = 0;
   for (int i = 0; i < priorCounts.length; i++) {
     if (priorCounts[i] > 0) {
numClassesTotal++;
     }
   }

   // Number of classes occuring in the left subset
   numClassesLeft = 0;
   for (int i = 0; i < bestCounts[0].length; i++) {
     if (bestCounts[0][i] > 0) {
numClassesLeft++;
     }
   }

   // Number of classes occuring in the right subset
   numClassesRight = 0;
   for (int i = 0; i < bestCounts[1].length; i++) {
     if (bestCounts[1][i] > 0) {
numClassesRight++;
     }
   }

   // Entropy of the left and the right subsets
   entropyLeft = ContingencyTables.entropy(bestCounts[0]);
   entropyRight = ContingencyTables.entropy(bestCounts[1]);

   // Compute terms for MDL formula
   delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) -
     (((double) numClassesTotal * priorEntropy) -
      (numClassesRight * entropyRight) -
      (numClassesLeft * entropyLeft));

   // Check if split is to be accepted
   return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances);
 }
 
Example #3
Source File: LPS.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
Example #4
Source File: LPS.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist the distributions
 * @param priorVal the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
Example #5
Source File: RandomTree.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
Example #6
Source File: RandomTree.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist the distributions
 * @param priorVal the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
Example #7
Source File: REPTree.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
Example #8
Source File: REPTree.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 * @param priorVal the splitting criterion
 * @return the gain after splitting
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
Example #9
Source File: RandomTree.java    From KEEL with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
Example #10
Source File: RandomTree.java    From KEEL with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
Example #11
Source File: RandomTree.java    From KEEL with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
Example #12
Source File: RandomTree.java    From KEEL with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
Example #13
Source File: RandomTree.java    From KEEL with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion before split.
 * 
 * @param dist
 *            the distributions
 * @return the splitting criterion
 */
protected double priorVal(double[][] dist) {

  return ContingencyTables.entropyOverColumns(dist);
}
 
Example #14
Source File: RandomTree.java    From KEEL with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
Example #15
Source File: CollectiveTree.java    From collective-classification-weka-package with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion before split.
 *
 * @param dist	the distribution
 * @return		prior val
 */
protected double priorVal(double[][] dist) {
  return ContingencyTables.entropyOverColumns(dist);
}
 
Example #16
Source File: CollectiveTree.java    From collective-classification-weka-package with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Computes value of splitting criterion after split.
 *
 * @param dist	the distribution
 * @param priorVal	the prior val
 * @return		the gain
 */
protected double gain(double[][] dist, double priorVal) {
  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}