Java Code Examples for weka.core.Utils#sum()

The following examples show how to use weka.core.Utils#sum() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FastRandomTree.java    From android-speaker-audioanalysis with MIT License 6 votes vote down vote up
/**
 * Normalizes branch sizes so they contain frequencies (stored in "props")
 * instead of counts (stored in "dist"). Creates a new double[] which it 
 * returns.
 */  
protected static double[] countsToFreqs( double[][] dist ) {
  
  double[] props = new double[dist.length];
  
  for (int k = 0; k < props.length; k++) {
    props[k] = Utils.sum(dist[k]);
  }
  if (Utils.eq(Utils.sum(props), 0)) {
    for (int k = 0; k < props.length; k++) {
      props[k] = 1.0 / (double) props.length;
    }
  } else {
    FastRfUtils.normalize(props);
  }
  return props;
}
 
Example 2
Source File: RandomRBF.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * returns a random index based on the given proportions
 *
 * @param proportionArray     the proportions
 * @param random              the random number generator to use
 * @return the random index
 */
protected int chooseRandomIndexBasedOnProportions(
    double[] proportionArray, Random random) {

  double      probSum;
  double      val;
  int         index;
  double      sum;

  probSum = Utils.sum(proportionArray);
  val     = random.nextDouble() * probSum;
  index   = 0;
  sum     = 0.0;
  
  while ((sum <= val) && (index < proportionArray.length))
    sum += proportionArray[index++];
  
  return index - 1;
}
 
Example 3
Source File: sIB.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Compute the JS divergence between an instance and a cluster, used for test data
  * @param inst instance to be clustered
  * @param t index of the cluster
  * @param pi1
  * @param pi2
  * @return the JS divergence
  */
 private double JS(Instance inst, int t, double pi1, double pi2) {
   if (Math.min(pi1, pi2) <= 0) {
     System.out.format("Warning: zero or negative weights in JS calculation! (pi1 %s, pi2 %s)\n", pi1, pi2);
     return 0;
   }
   double sum = Utils.sum(inst.toDoubleArray());
   double kl1 = 0.0, kl2 = 0.0, tmp = 0.0;    
   for (int i = 0; i < inst.numValues(); i++) {
     tmp = inst.valueSparse(i) / sum;      
     if(tmp != 0) {
kl1 += tmp * Math.log(tmp / (tmp * pi1 + pi2 * bestT.Py_t.get(inst.index(i), t)));
     }
   }
   for (int i = 0; i < m_numAttributes; i++) {
     if ((tmp = bestT.Py_t.get(i, t)) != 0) {
kl2 += tmp * Math.log(tmp / (inst.value(i) * pi1  / sum + pi2 * tmp));
     }
   }    
   return pi1 * kl1 + pi2 * kl2;
 }
 
Example 4
Source File: HyperPipes.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Classifies the given test instance.
  *
  * @param instance the instance to be classified
  * @return the predicted class for the instance 
  * @throws Exception if the instance can't be classified
  */
 public double [] distributionForInstance(Instance instance) throws Exception {
       
   // default model?
   if (m_ZeroR != null) {
     return m_ZeroR.distributionForInstance(instance);
   }
   
   double [] dist = new double[m_HyperPipes.length];

   for (int j = 0; j < m_HyperPipes.length; j++) {
     dist[j] = m_HyperPipes[j].partialContains(instance);
   }

   double sum = Utils.sum(dist);
   if (sum <= 0) {
     for (int j = 0; j < dist.length; j++) {
dist[j] = 1.0 / (double)dist.length;
     }
     return dist;
   } else {
     Utils.normalize(dist, sum);
     return dist;
   }
 }
 
Example 5
Source File: NominalToBinary.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** Computes average class values for each attribute and value */
 private void computeAverageClassValues() {

   double totalCounts, sum;
   Instance instance;
   double [] counts;

   double [][] avgClassValues = new double[getInputFormat().numAttributes()][0];
   m_Indices = new int[getInputFormat().numAttributes()][0];
   for (int j = 0; j < getInputFormat().numAttributes(); j++) {
     Attribute att = getInputFormat().attribute(j);
     if (att.isNominal()) {
avgClassValues[j] = new double [att.numValues()];
counts = new double [att.numValues()];
for (int i = 0; i < getInputFormat().numInstances(); i++) {
  instance = getInputFormat().instance(i);
  if (!instance.classIsMissing() && 
      (!instance.isMissing(j))) {
    counts[(int)instance.value(j)] += instance.weight();
    avgClassValues[j][(int)instance.value(j)] += 
      instance.weight() * instance.classValue();
  }
}
sum = Utils.sum(avgClassValues[j]);
totalCounts = Utils.sum(counts);
if (Utils.gr(totalCounts, 0)) {
  for (int k = 0; k < att.numValues(); k++) {
    if (Utils.gr(counts[k], 0)) {
      avgClassValues[j][k] /= (double)counts[k];
    } else {
      avgClassValues[j][k] = sum / (double)totalCounts;
    }
  }
}
m_Indices[j] = Utils.sort(avgClassValues[j]);
     }
   }
 }
 
Example 6
Source File: SimpleCart.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Outputs a tree at a certain level.
  * 
  * @param level 	the level at which the tree is to be printed
  * @return 		a tree at a certain level
  */
 protected String toString(int level) {

   StringBuffer text = new StringBuffer();
   // if leaf nodes
   if (m_Attribute == null) {
     if (Utils.isMissingValue(m_ClassValue)) {
text.append(": null");
     } else {
double correctNum = (int)(m_Distribution[Utils.maxIndex(m_Distribution)]*100)/
100.0;
double wrongNum = (int)((Utils.sum(m_Distribution) -
    m_Distribution[Utils.maxIndex(m_Distribution)])*100)/100.0;
String str = "("  + correctNum + "/" + wrongNum + ")";
text.append(": " + m_ClassAttribute.value((int) m_ClassValue)+ str);
     }
   } else {
     for (int j = 0; j < 2; j++) {
text.append("\n");
for (int i = 0; i < level; i++) {
  text.append("|  ");
}
if (j==0) {
  if (m_Attribute.isNumeric())
    text.append(m_Attribute.name() + " < " + m_SplitValue);
  else
    text.append(m_Attribute.name() + "=" + m_SplitString);
} else {
  if (m_Attribute.isNumeric())
    text.append(m_Attribute.name() + " >= " + m_SplitValue);
  else
    text.append(m_Attribute.name() + "!=" + m_SplitString);
}
text.append(m_Successors[j].toString(level + 1));
     }
   }
   return text.toString();
 }
 
Example 7
Source File: SimpleCart.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Compute and return gini gain for given distributions of a node and its 
 * successor nodes.
 * 
 * @param parentDist 	class distributions of parent node
 * @param childDist 	class distributions of successor nodes
 * @return 		Gini gain computed
 */
protected double computeGiniGain(double[] parentDist, double[][] childDist) {
  double totalWeight = Utils.sum(parentDist);
  if (totalWeight==0) return 0;

  double leftWeight = Utils.sum(childDist[0]);
  double rightWeight = Utils.sum(childDist[1]);

  double parentGini = computeGini(parentDist, totalWeight);
  double leftGini = computeGini(childDist[0],leftWeight);
  double rightGini = computeGini(childDist[1], rightWeight);

  return parentGini - leftWeight/totalWeight*leftGini -
  rightWeight/totalWeight*rightGini;
}
 
Example 8
Source File: MLUtils.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * EmptyVectors - percentage of empty vectors sum(y[i])==0 in Y.
 */
 public static final double emptyVectors(int Y[][]) {
	int N = Y.length;
	int L = Y[0].length;
	double sum = 0.0;
	for(int i = 0; i < N; i++) {
		if (Utils.sum(Y[i]) <= 0.0)
			sum ++;
	}
	return (double)sum/(double)N;
}
 
Example 9
Source File: Distribution.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Adds counts to given bag.
 */
public final void add(int bagIndex, double[] counts) {
  
  double sum = Utils.sum(counts);

  for (int i = 0; i < counts.length; i++)
    m_perClassPerBag[bagIndex][i] += counts[i];
  m_perBag[bagIndex] = m_perBag[bagIndex]+sum;
  for (int i = 0; i < counts.length; i++)
    m_perClass[i] = m_perClass[i]+counts[i];
  totaL = totaL+sum;
}
 
Example 10
Source File: RandomTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Outputs a leaf.
 * 
 * @return the leaf as string
 * @throws Exception if generation fails
 */
protected String leafString() throws Exception {

  double sum = 0, maxCount = 0;
  int maxIndex = 0;
  if (m_ClassDistribution != null) {
    sum = Utils.sum(m_ClassDistribution);
    maxIndex = Utils.maxIndex(m_ClassDistribution);
    maxCount = m_ClassDistribution[maxIndex];
  }
  return " : " + m_Info.classAttribute().value(maxIndex) + " ("
      + Utils.doubleToString(sum, 2) + "/"
      + Utils.doubleToString(sum - maxCount, 2) + ")";
}
 
Example 11
Source File: BFTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Compute and return gini gain for given distributions of a node and its 
 * successor nodes.
 * 
 * @param parentDist 	class distributions of parent node
 * @param childDist 	class distributions of successor nodes
 * @return 		Gini gain computed
 */
protected double computeGiniGain(double[] parentDist, double[][] childDist) {
  double totalWeight = Utils.sum(parentDist);
  if (totalWeight==0) return 0;

  double leftWeight = Utils.sum(childDist[0]);
  double rightWeight = Utils.sum(childDist[1]);

  double parentGini = computeGini(parentDist, totalWeight);
  double leftGini = computeGini(childDist[0],leftWeight);
  double rightGini = computeGini(childDist[1], rightWeight);

  return parentGini - leftWeight/totalWeight*leftGini -
  rightWeight/totalWeight*rightGini;
}
 
Example 12
Source File: LPS.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Outputs a leaf.
 * 
 * @return the leaf as string
 * @throws Exception if generation fails
 */
protected String leafString() throws Exception {

  double sum = 0, maxCount = 0;
  int maxIndex = 0;
  double classMean = 0;
  double avgError = 0;
  if (m_ClassDistribution != null) {
    if (m_Info.classAttribute().isNominal()) {
      sum = Utils.sum(m_ClassDistribution);
      maxIndex = Utils.maxIndex(m_ClassDistribution);
      maxCount = m_ClassDistribution[maxIndex];
    } else {
      classMean = m_ClassDistribution[0];
      if (m_Distribution[1] > 0) {
        avgError = m_Distribution[0] / m_Distribution[1];
      }
    }
  }

  if (m_Info.classAttribute().isNumeric()) {
    return " : " + Utils.doubleToString(classMean, 2) + " ("
      + Utils.doubleToString(m_Distribution[1], 2) + "/"
      + Utils.doubleToString(avgError, 2) + ")";
  }

  return " : " + m_Info.classAttribute().value(maxIndex) + " ("
    + Utils.doubleToString(sum, 2) + "/"
    + Utils.doubleToString(sum - maxCount, 2) + ")";
}
 
Example 13
Source File: LBR.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Calculates the class membership probabilities.
 * for the given test instance.
 *
 * @param instance the instance to be classified
 * @param instanceIndex 
 *
 * @return predicted class probability distribution
 * @throws Exception if distribution can't be computed
 */
public double[] localDistributionForInstance(Instance instance, Indexes instanceIndex) throws Exception {
  
  double sumForPriors = 0;
  double sumForCounts = 0;
  int attIndex, AIndex;
  int numClassesOfInstance = instance.numClasses();
  
  sumForPriors = 0;
  sumForCounts = 0;
  instanceIndex.setSequentialDataset(true);
  // Calculate all of conditional probabilities.
  sumForPriors = Utils.sum(m_Priors) + numClassesOfInstance;
  for (int j = 0; j < numClassesOfInstance; j++) {
    // pointer to counts to make access more efficient in loop
    int [][] countsPointer = m_Counts[j];
    posteriorsArray[j] = (m_Priors[j] + 1) / (sumForPriors);
    for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {
      AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];
      sumForCounts = Utils.sum(countsPointer[AIndex]);
      if (!instance.isMissing(AIndex)) {
        posteriorsArray[j] *= ((countsPointer[AIndex][(int)instance.value(AIndex)] + 1) / (sumForCounts + (double)instance.attribute(AIndex).numValues()));
      }
    }
  }
  
  // Normalize probabilities
  Utils.normalize(posteriorsArray);
  
  return posteriorsArray;
}
 
Example 14
Source File: Vote.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Classifies a given instance using the selected combination rule.
 * 
 * @param instance the instance to be classified
 * @return the distribution
 * @throws Exception if instance could not be classified successfully
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {
  double[] result = new double[instance.numClasses()];

  switch (m_CombinationRule) {
  case AVERAGE_RULE:
    result = distributionForInstanceAverage(instance);
    break;
  case PRODUCT_RULE:
    result = distributionForInstanceProduct(instance);
    break;
  case MAJORITY_VOTING_RULE:
    result = distributionForInstanceMajorityVoting(instance);
    break;
  case MIN_RULE:
    result = distributionForInstanceMin(instance);
    break;
  case MAX_RULE:
    result = distributionForInstanceMax(instance);
    break;
  case MEDIAN_RULE:
    result[0] = classifyInstance(instance);
    break;
  default:
    throw new IllegalStateException("Unknown combination rule '"
        + m_CombinationRule + "'!");
  }

  if (!instance.classAttribute().isNumeric() && (Utils.sum(result) > 0))
    Utils.normalize(result);

  return result;
}
 
Example 15
Source File: BFTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Compute and return information gain for given distributions of a node 
 * and its successor nodes.
 * 
 * @param parentDist 	class distributions of parent node
 * @param childDist 	class distributions of successor nodes
 * @return 		information gain computed
 */
protected double computeInfoGain(double[] parentDist, double[][] childDist) {
  double totalWeight = Utils.sum(parentDist);
  if (totalWeight==0) return 0;

  double leftWeight = Utils.sum(childDist[0]);
  double rightWeight = Utils.sum(childDist[1]);

  double parentInfo = computeEntropy(parentDist, totalWeight);
  double leftInfo = computeEntropy(childDist[0],leftWeight);
  double rightInfo = computeEntropy(childDist[1], rightWeight);

  return parentInfo - leftWeight/totalWeight*leftInfo -
  rightWeight/totalWeight*rightInfo;
}
 
Example 16
Source File: sIB.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Normalize the instance
 * @param inst instance to be normalized
 * @return a new Instance with normalized values
 */
private Instance normalizeInstance(Instance inst) {
  double[] vals = inst.toDoubleArray();
  double sum = Utils.sum(vals);
  for(int i = 0; i < vals.length; i++) {
    vals[i] /= sum;
  }
  return new DenseInstance(inst.weight(), vals);
}
 
Example 17
Source File: AbstractClusterer.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Classifies a given instance. Either this or distributionForInstance()
 * needs to be implemented by subclasses.
 *
 * @param instance the instance to be assigned to a cluster
 * @return the number of the assigned cluster as an integer
 * @exception Exception if instance could not be clustered
 * successfully
 */
public int clusterInstance(Instance instance) throws Exception {

  double [] dist = distributionForInstance(instance);

  if (dist == null) {
    throw new Exception("Null distribution predicted");
  }

  if (Utils.sum(dist) <= 0) {
    throw new Exception("Unable to cluster instance");
  }
  return Utils.maxIndex(dist);
}
 
Example 18
Source File: BFTree.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Generate successor nodes for a node and put them into BestFirstElements 
  * according to gini gain or information gain in a descending order.
  *
  * @param BestFirstElements 	list to store BestFirst nodes
  * @param data 		training instance
  * @param subsetSortedIndices	sorted indices of instances of successor nodes
  * @param subsetWeights 	weights of instances of successor nodes
  * @param dists 		class distributions of successor nodes
  * @param att 		attribute used to split the node
  * @param useHeuristic 	if use heuristic search for nominal attributes in multi-class problem
  * @param useGini 		if use Gini index as splitting criterion
  * @throws Exception 		if something goes wrong 
  */
 protected void makeSuccessors(FastVector BestFirstElements,Instances data,
     int[][][] subsetSortedIndices, double[][][] subsetWeights,
     double[][][] dists,
     Attribute att, boolean useHeuristic, boolean useGini) throws Exception {

   m_Successors = new BFTree[2];

   for (int i=0; i<2; i++) {
     m_Successors[i] = new BFTree();
     m_Successors[i].m_isLeaf = true;

     // class probability and distribution for this successor node
     m_Successors[i].m_ClassProbs = new double[data.numClasses()];
     m_Successors[i].m_Distribution = new double[data.numClasses()];
     System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_ClassProbs,
  0,m_Successors[i].m_ClassProbs.length);
     System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_Distribution,
  0,m_Successors[i].m_Distribution.length);
     if (Utils.sum(m_Successors[i].m_ClassProbs)!=0)
Utils.normalize(m_Successors[i].m_ClassProbs);

     // split information for this successor node
     double[][] props = new double[data.numAttributes()][2];
     double[][][] subDists = new double[data.numAttributes()][2][data.numClasses()];
     double[][] totalSubsetWeights = new double[data.numAttributes()][2];
     FastVector splitInfo = m_Successors[i].computeSplitInfo(m_Successors[i], data,
  subsetSortedIndices[i], subsetWeights[i], subDists, props,
  totalSubsetWeights, useHeuristic, useGini);

     // branch proportion for this successor node
     int splitIndex = ((Attribute)splitInfo.elementAt(1)).index();
     m_Successors[i].m_Props = new double[2];
     System.arraycopy(props[splitIndex], 0, m_Successors[i].m_Props, 0,
  m_Successors[i].m_Props.length);

     // sorted indices and weights of each attribute for this successor node
     m_Successors[i].m_SortedIndices = new int[data.numAttributes()][0];
     m_Successors[i].m_Weights = new double[data.numAttributes()][0];
     for (int j=0; j<m_Successors[i].m_SortedIndices.length; j++) {
m_Successors[i].m_SortedIndices[j] = subsetSortedIndices[i][j];
m_Successors[i].m_Weights[j] = subsetWeights[i][j];
     }

     // distribution of each attribute for this successor node
     m_Successors[i].m_Dists = new double[data.numAttributes()][2][data.numClasses()];
     for (int j=0; j<subDists.length; j++) {
m_Successors[i].m_Dists[j] = subDists[j];
     }

     // total weights for this successor node. 
     m_Successors[i].m_TotalWeight = Utils.sum(totalSubsetWeights[splitIndex]);

     // insert this successor node into BestFirstElements according to gini gain or information gain
     //  descendingly
     if (BestFirstElements.size()==0) {
BestFirstElements.addElement(splitInfo);
     } else {
double gGain = ((Double)(splitInfo.elementAt(3))).doubleValue();
int vectorSize = BestFirstElements.size();
FastVector lastNode = (FastVector)BestFirstElements.elementAt(vectorSize-1);

// If gini gain is less than that of last node in FastVector
if (gGain<((Double)(lastNode.elementAt(3))).doubleValue()) {
  BestFirstElements.insertElementAt(splitInfo, vectorSize);
} else {
  for (int j=0; j<vectorSize; j++) {
    FastVector node = (FastVector)BestFirstElements.elementAt(j);
    double nodeGain = ((Double)(node.elementAt(3))).doubleValue();
    if (gGain>=nodeGain) {
      BestFirstElements.insertElementAt(splitInfo, j);
      break;
    }
  }
}
     }
   }
 }
 
Example 19
Source File: Discretize.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Test using Kononenko's MDL criterion.
  *
  * @param priorCounts
  * @param bestCounts
  * @param numInstances
  * @param numCutPoints
  * @return true if the split is acceptable
  */
 private boolean KononenkosMDL(double[] priorCounts,
			double[][] bestCounts,
			double numInstances,
			int numCutPoints) {

   double distPrior, instPrior, distAfter = 0, sum, instAfter = 0;
   double before, after;
   int numClassesTotal;

   // Number of classes occuring in the set
   numClassesTotal = 0;
   for (int i = 0; i < priorCounts.length; i++) {
     if (priorCounts[i] > 0) {
numClassesTotal++;
     }
   }

   // Encode distribution prior to split
   distPrior = SpecialFunctions.log2Binomial(numInstances
				      + numClassesTotal - 1,
				      numClassesTotal - 1);

   // Encode instances prior to split.
   instPrior = SpecialFunctions.log2Multinomial(numInstances,
					 priorCounts);

   before = instPrior + distPrior;

   // Encode distributions and instances after split.
   for (int i = 0; i < bestCounts.length; i++) {
     sum = Utils.sum(bestCounts[i]);
     distAfter += SpecialFunctions.log2Binomial(sum + numClassesTotal - 1,
					 numClassesTotal - 1);
     instAfter += SpecialFunctions.log2Multinomial(sum,
					    bestCounts[i]);
   }

   // Coding cost after split
   after = Utils.log2(numCutPoints) + distAfter + instAfter;

   // Check if split is to be accepted
   return (before > after);
 }
 
Example 20
Source File: REPTree.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
    * Backfits data from holdout set.
    * 
    * @throws Exception if insertion fails
    */
   protected void backfitHoldOutSet() throws Exception {
     
     // Insert instance into hold-out class distribution
     if (m_Info.classAttribute().isNominal()) {

// Nominal case
if (m_ClassProbs == null) {
  m_ClassProbs = new double[m_Info.numClasses()];
}
System.arraycopy(m_Distribution, 0, m_ClassProbs, 0, m_Info.numClasses());
       for (int i = 0; i < m_HoldOutDist.length; i++) {
         m_ClassProbs[i] += m_HoldOutDist[i];
       }
       if (Utils.sum(m_ClassProbs) > 0) {
         doSmoothing();
         Utils.normalize(m_ClassProbs);
       } else {
         m_ClassProbs = null;
       }
     } else {

// Numeric case
       double sumOfWeightsTrainAndHoldout = m_Distribution[1] + m_HoldOutDist[0];
       if (sumOfWeightsTrainAndHoldout <= 0) {
         return;
       }
if (m_ClassProbs == null) {
  m_ClassProbs = new double[1];
} else {
         m_ClassProbs[0] *= m_Distribution[1];
       }
m_ClassProbs[0] += m_HoldOutDist[1];
m_ClassProbs[0] /= sumOfWeightsTrainAndHoldout;
     }	
     
     // The process is recursive
     if (m_Attribute != -1) {
       for (int i = 0; i < m_Successors.length; i++) {
         m_Successors[i].backfitHoldOutSet();
       }
     }
   }