weka.core.Utils#sum

Source File: FastRandomTree.java From android-speaker-audioanalysis with MIT License

6 votes

/**
 * Normalizes branch sizes so they contain frequencies (stored in "props")
 * instead of counts (stored in "dist"). Creates a new double[] which it 
 * returns.
 */  
protected static double[] countsToFreqs( double[][] dist ) {
  
  double[] props = new double[dist.length];
  
  for (int k = 0; k < props.length; k++) {
    props[k] = Utils.sum(dist[k]);
  }
  if (Utils.eq(Utils.sum(props), 0)) {
    for (int k = 0; k < props.length; k++) {
      props[k] = 1.0 / (double) props.length;
    }
  } else {
    FastRfUtils.normalize(props);
  }
  return props;
}

Source File: RandomRBF.java From tsml with GNU General Public License v3.0

6 votes

/**
 * returns a random index based on the given proportions
 *
 * @param proportionArray     the proportions
 * @param random              the random number generator to use
 * @return the random index
 */
protected int chooseRandomIndexBasedOnProportions(
    double[] proportionArray, Random random) {

  double      probSum;
  double      val;
  int         index;
  double      sum;

  probSum = Utils.sum(proportionArray);
  val     = random.nextDouble() * probSum;
  index   = 0;
  sum     = 0.0;
  
  while ((sum <= val) && (index < proportionArray.length))
    sum += proportionArray[index++];
  
  return index - 1;
}

Source File: sIB.java From tsml with GNU General Public License v3.0

6 votes

/**
  * Compute the JS divergence between an instance and a cluster, used for test data
  * @param inst instance to be clustered
  * @param t index of the cluster
  * @param pi1
  * @param pi2
  * @return the JS divergence
  */
 private double JS(Instance inst, int t, double pi1, double pi2) {
   if (Math.min(pi1, pi2) <= 0) {
     System.out.format("Warning: zero or negative weights in JS calculation! (pi1 %s, pi2 %s)\n", pi1, pi2);
     return 0;
   }
   double sum = Utils.sum(inst.toDoubleArray());
   double kl1 = 0.0, kl2 = 0.0, tmp = 0.0;    
   for (int i = 0; i < inst.numValues(); i++) {
     tmp = inst.valueSparse(i) / sum;      
     if(tmp != 0) {
kl1 += tmp * Math.log(tmp / (tmp * pi1 + pi2 * bestT.Py_t.get(inst.index(i), t)));
     }
   }
   for (int i = 0; i < m_numAttributes; i++) {
     if ((tmp = bestT.Py_t.get(i, t)) != 0) {
kl2 += tmp * Math.log(tmp / (inst.value(i) * pi1  / sum + pi2 * tmp));
     }
   }    
   return pi1 * kl1 + pi2 * kl2;
 }

Source File: HyperPipes.java From tsml with GNU General Public License v3.0

6 votes

/**
  * Classifies the given test instance.
  *
  * @param instance the instance to be classified
  * @return the predicted class for the instance 
  * @throws Exception if the instance can't be classified
  */
 public double [] distributionForInstance(Instance instance) throws Exception {
       
   // default model?
   if (m_ZeroR != null) {
     return m_ZeroR.distributionForInstance(instance);
   }
   
   double [] dist = new double[m_HyperPipes.length];

   for (int j = 0; j < m_HyperPipes.length; j++) {
     dist[j] = m_HyperPipes[j].partialContains(instance);
   }

   double sum = Utils.sum(dist);
   if (sum <= 0) {
     for (int j = 0; j < dist.length; j++) {
dist[j] = 1.0 / (double)dist.length;
     }
     return dist;
   } else {
     Utils.normalize(dist, sum);
     return dist;
   }
 }

Source File: NominalToBinary.java From tsml with GNU General Public License v3.0

5 votes

/** Computes average class values for each attribute and value */
 private void computeAverageClassValues() {

   double totalCounts, sum;
   Instance instance;
   double [] counts;

   double [][] avgClassValues = new double[getInputFormat().numAttributes()][0];
   m_Indices = new int[getInputFormat().numAttributes()][0];
   for (int j = 0; j < getInputFormat().numAttributes(); j++) {
     Attribute att = getInputFormat().attribute(j);
     if (att.isNominal()) {
avgClassValues[j] = new double [att.numValues()];
counts = new double [att.numValues()];
for (int i = 0; i < getInputFormat().numInstances(); i++) {
  instance = getInputFormat().instance(i);
  if (!instance.classIsMissing() && 
      (!instance.isMissing(j))) {
    counts[(int)instance.value(j)] += instance.weight();
    avgClassValues[j][(int)instance.value(j)] += 
      instance.weight() * instance.classValue();
  }
}
sum = Utils.sum(avgClassValues[j]);
totalCounts = Utils.sum(counts);
if (Utils.gr(totalCounts, 0)) {
  for (int k = 0; k < att.numValues(); k++) {
    if (Utils.gr(counts[k], 0)) {
      avgClassValues[j][k] /= (double)counts[k];
    } else {
      avgClassValues[j][k] = sum / (double)totalCounts;
    }
  }
}
m_Indices[j] = Utils.sort(avgClassValues[j]);
     }
   }
 }

Source File: SimpleCart.java From tsml with GNU General Public License v3.0

5 votes

/**
  * Outputs a tree at a certain level.
  * 
  * @param level 	the level at which the tree is to be printed
  * @return 		a tree at a certain level
  */
 protected String toString(int level) {

   StringBuffer text = new StringBuffer();
   // if leaf nodes
   if (m_Attribute == null) {
     if (Utils.isMissingValue(m_ClassValue)) {
text.append(": null");
     } else {
double correctNum = (int)(m_Distribution[Utils.maxIndex(m_Distribution)]*100)/
100.0;
double wrongNum = (int)((Utils.sum(m_Distribution) -
    m_Distribution[Utils.maxIndex(m_Distribution)])*100)/100.0;
String str = "("  + correctNum + "/" + wrongNum + ")";
text.append(": " + m_ClassAttribute.value((int) m_ClassValue)+ str);
     }
   } else {
     for (int j = 0; j < 2; j++) {
text.append("\n");
for (int i = 0; i < level; i++) {
  text.append("|  ");
}
if (j==0) {
  if (m_Attribute.isNumeric())
    text.append(m_Attribute.name() + " < " + m_SplitValue);
  else
    text.append(m_Attribute.name() + "=" + m_SplitString);
} else {
  if (m_Attribute.isNumeric())
    text.append(m_Attribute.name() + " >= " + m_SplitValue);
  else
    text.append(m_Attribute.name() + "!=" + m_SplitString);
}
text.append(m_Successors[j].toString(level + 1));
     }
   }
   return text.toString();
 }

Source File: SimpleCart.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Compute and return gini gain for given distributions of a node and its 
 * successor nodes.
 * 
 * @param parentDist 	class distributions of parent node
 * @param childDist 	class distributions of successor nodes
 * @return 		Gini gain computed
 */
protected double computeGiniGain(double[] parentDist, double[][] childDist) {
  double totalWeight = Utils.sum(parentDist);
  if (totalWeight==0) return 0;

  double leftWeight = Utils.sum(childDist[0]);
  double rightWeight = Utils.sum(childDist[1]);

  double parentGini = computeGini(parentDist, totalWeight);
  double leftGini = computeGini(childDist[0],leftWeight);
  double rightGini = computeGini(childDist[1], rightWeight);

  return parentGini - leftWeight/totalWeight*leftGini -
  rightWeight/totalWeight*rightGini;
}

Source File: MLUtils.java From meka with GNU General Public License v3.0

5 votes

/** 
 * EmptyVectors - percentage of empty vectors sum(y[i])==0 in Y.
 */
 public static final double emptyVectors(int Y[][]) {
	int N = Y.length;
	int L = Y[0].length;
	double sum = 0.0;
	for(int i = 0; i < N; i++) {
		if (Utils.sum(Y[i]) <= 0.0)
			sum ++;
	}
	return (double)sum/(double)N;
}

Source File: Distribution.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Adds counts to given bag.
 */
public final void add(int bagIndex, double[] counts) {
  
  double sum = Utils.sum(counts);

  for (int i = 0; i < counts.length; i++)
    m_perClassPerBag[bagIndex][i] += counts[i];
  m_perBag[bagIndex] = m_perBag[bagIndex]+sum;
  for (int i = 0; i < counts.length; i++)
    m_perClass[i] = m_perClass[i]+counts[i];
  totaL = totaL+sum;
}

Source File: RandomTree.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Outputs a leaf.
 * 
 * @return the leaf as string
 * @throws Exception if generation fails
 */
protected String leafString() throws Exception {

  double sum = 0, maxCount = 0;
  int maxIndex = 0;
  if (m_ClassDistribution != null) {
    sum = Utils.sum(m_ClassDistribution);
    maxIndex = Utils.maxIndex(m_ClassDistribution);
    maxCount = m_ClassDistribution[maxIndex];
  }
  return " : " + m_Info.classAttribute().value(maxIndex) + " ("
      + Utils.doubleToString(sum, 2) + "/"
      + Utils.doubleToString(sum - maxCount, 2) + ")";
}

Source File: BFTree.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Compute and return gini gain for given distributions of a node and its 
 * successor nodes.
 * 
 * @param parentDist 	class distributions of parent node
 * @param childDist 	class distributions of successor nodes
 * @return 		Gini gain computed
 */
protected double computeGiniGain(double[] parentDist, double[][] childDist) {
  double totalWeight = Utils.sum(parentDist);
  if (totalWeight==0) return 0;

  double leftWeight = Utils.sum(childDist[0]);
  double rightWeight = Utils.sum(childDist[1]);

  double parentGini = computeGini(parentDist, totalWeight);
  double leftGini = computeGini(childDist[0],leftWeight);
  double rightGini = computeGini(childDist[1], rightWeight);

  return parentGini - leftWeight/totalWeight*leftGini -
  rightWeight/totalWeight*rightGini;
}

Source File: LPS.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Outputs a leaf.
 * 
 * @return the leaf as string
 * @throws Exception if generation fails
 */
protected String leafString() throws Exception {

  double sum = 0, maxCount = 0;
  int maxIndex = 0;
  double classMean = 0;
  double avgError = 0;
  if (m_ClassDistribution != null) {
    if (m_Info.classAttribute().isNominal()) {
      sum = Utils.sum(m_ClassDistribution);
      maxIndex = Utils.maxIndex(m_ClassDistribution);
      maxCount = m_ClassDistribution[maxIndex];
    } else {
      classMean = m_ClassDistribution[0];
      if (m_Distribution[1] > 0) {
        avgError = m_Distribution[0] / m_Distribution[1];
      }
    }
  }

  if (m_Info.classAttribute().isNumeric()) {
    return " : " + Utils.doubleToString(classMean, 2) + " ("
      + Utils.doubleToString(m_Distribution[1], 2) + "/"
      + Utils.doubleToString(avgError, 2) + ")";
  }

  return " : " + m_Info.classAttribute().value(maxIndex) + " ("
    + Utils.doubleToString(sum, 2) + "/"
    + Utils.doubleToString(sum - maxCount, 2) + ")";
}

Source File: LBR.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Calculates the class membership probabilities.
 * for the given test instance.
 *
 * @param instance the instance to be classified
 * @param instanceIndex 
 *
 * @return predicted class probability distribution
 * @throws Exception if distribution can't be computed
 */
public double[] localDistributionForInstance(Instance instance, Indexes instanceIndex) throws Exception {
  
  double sumForPriors = 0;
  double sumForCounts = 0;
  int attIndex, AIndex;
  int numClassesOfInstance = instance.numClasses();
  
  sumForPriors = 0;
  sumForCounts = 0;
  instanceIndex.setSequentialDataset(true);
  // Calculate all of conditional probabilities.
  sumForPriors = Utils.sum(m_Priors) + numClassesOfInstance;
  for (int j = 0; j < numClassesOfInstance; j++) {
    // pointer to counts to make access more efficient in loop
    int [][] countsPointer = m_Counts[j];
    posteriorsArray[j] = (m_Priors[j] + 1) / (sumForPriors);
    for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {
      AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];
      sumForCounts = Utils.sum(countsPointer[AIndex]);
      if (!instance.isMissing(AIndex)) {
        posteriorsArray[j] *= ((countsPointer[AIndex][(int)instance.value(AIndex)] + 1) / (sumForCounts + (double)instance.attribute(AIndex).numValues()));
      }
    }
  }
  
  // Normalize probabilities
  Utils.normalize(posteriorsArray);
  
  return posteriorsArray;
}

Source File: Vote.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Classifies a given instance using the selected combination rule.
 * 
 * @param instance the instance to be classified
 * @return the distribution
 * @throws Exception if instance could not be classified successfully
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {
  double[] result = new double[instance.numClasses()];

  switch (m_CombinationRule) {
  case AVERAGE_RULE:
    result = distributionForInstanceAverage(instance);
    break;
  case PRODUCT_RULE:
    result = distributionForInstanceProduct(instance);
    break;
  case MAJORITY_VOTING_RULE:
    result = distributionForInstanceMajorityVoting(instance);
    break;
  case MIN_RULE:
    result = distributionForInstanceMin(instance);
    break;
  case MAX_RULE:
    result = distributionForInstanceMax(instance);
    break;
  case MEDIAN_RULE:
    result[0] = classifyInstance(instance);
    break;
  default:
    throw new IllegalStateException("Unknown combination rule '"
        + m_CombinationRule + "'!");
  }

  if (!instance.classAttribute().isNumeric() && (Utils.sum(result) > 0))
    Utils.normalize(result);

  return result;
}

Source File: BFTree.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Compute and return information gain for given distributions of a node 
 * and its successor nodes.
 * 
 * @param parentDist 	class distributions of parent node
 * @param childDist 	class distributions of successor nodes
 * @return 		information gain computed
 */
protected double computeInfoGain(double[] parentDist, double[][] childDist) {
  double totalWeight = Utils.sum(parentDist);
  if (totalWeight==0) return 0;

  double leftWeight = Utils.sum(childDist[0]);
  double rightWeight = Utils.sum(childDist[1]);

  double parentInfo = computeEntropy(parentDist, totalWeight);
  double leftInfo = computeEntropy(childDist[0],leftWeight);
  double rightInfo = computeEntropy(childDist[1], rightWeight);

  return parentInfo - leftWeight/totalWeight*leftInfo -
  rightWeight/totalWeight*rightInfo;
}

Source File: sIB.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Normalize the instance
 * @param inst instance to be normalized
 * @return a new Instance with normalized values
 */
private Instance normalizeInstance(Instance inst) {
  double[] vals = inst.toDoubleArray();
  double sum = Utils.sum(vals);
  for(int i = 0; i < vals.length; i++) {
    vals[i] /= sum;
  }
  return new DenseInstance(inst.weight(), vals);
}

Source File: AbstractClusterer.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Classifies a given instance. Either this or distributionForInstance()
 * needs to be implemented by subclasses.
 *
 * @param instance the instance to be assigned to a cluster
 * @return the number of the assigned cluster as an integer
 * @exception Exception if instance could not be clustered
 * successfully
 */
public int clusterInstance(Instance instance) throws Exception {

  double [] dist = distributionForInstance(instance);

  if (dist == null) {
    throw new Exception("Null distribution predicted");
  }

  if (Utils.sum(dist) <= 0) {
    throw new Exception("Unable to cluster instance");
  }
  return Utils.maxIndex(dist);
}

Source File: BFTree.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Generate successor nodes for a node and put them into BestFirstElements 
  * according to gini gain or information gain in a descending order.
  *
  * @param BestFirstElements 	list to store BestFirst nodes
  * @param data 		training instance
  * @param subsetSortedIndices	sorted indices of instances of successor nodes
  * @param subsetWeights 	weights of instances of successor nodes
  * @param dists 		class distributions of successor nodes
  * @param att 		attribute used to split the node
  * @param useHeuristic 	if use heuristic search for nominal attributes in multi-class problem
  * @param useGini 		if use Gini index as splitting criterion
  * @throws Exception 		if something goes wrong 
  */
 protected void makeSuccessors(FastVector BestFirstElements,Instances data,
     int[][][] subsetSortedIndices, double[][][] subsetWeights,
     double[][][] dists,
     Attribute att, boolean useHeuristic, boolean useGini) throws Exception {

   m_Successors = new BFTree[2];

   for (int i=0; i<2; i++) {
     m_Successors[i] = new BFTree();
     m_Successors[i].m_isLeaf = true;

     // class probability and distribution for this successor node
     m_Successors[i].m_ClassProbs = new double[data.numClasses()];
     m_Successors[i].m_Distribution = new double[data.numClasses()];
     System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_ClassProbs,
  0,m_Successors[i].m_ClassProbs.length);
     System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_Distribution,
  0,m_Successors[i].m_Distribution.length);
     if (Utils.sum(m_Successors[i].m_ClassProbs)!=0)
Utils.normalize(m_Successors[i].m_ClassProbs);

     // split information for this successor node
     double[][] props = new double[data.numAttributes()][2];
     double[][][] subDists = new double[data.numAttributes()][2][data.numClasses()];
     double[][] totalSubsetWeights = new double[data.numAttributes()][2];
     FastVector splitInfo = m_Successors[i].computeSplitInfo(m_Successors[i], data,
  subsetSortedIndices[i], subsetWeights[i], subDists, props,
  totalSubsetWeights, useHeuristic, useGini);

     // branch proportion for this successor node
     int splitIndex = ((Attribute)splitInfo.elementAt(1)).index();
     m_Successors[i].m_Props = new double[2];
     System.arraycopy(props[splitIndex], 0, m_Successors[i].m_Props, 0,
  m_Successors[i].m_Props.length);

     // sorted indices and weights of each attribute for this successor node
     m_Successors[i].m_SortedIndices = new int[data.numAttributes()][0];
     m_Successors[i].m_Weights = new double[data.numAttributes()][0];
     for (int j=0; j<m_Successors[i].m_SortedIndices.length; j++) {
m_Successors[i].m_SortedIndices[j] = subsetSortedIndices[i][j];
m_Successors[i].m_Weights[j] = subsetWeights[i][j];
     }

     // distribution of each attribute for this successor node
     m_Successors[i].m_Dists = new double[data.numAttributes()][2][data.numClasses()];
     for (int j=0; j<subDists.length; j++) {
m_Successors[i].m_Dists[j] = subDists[j];
     }

     // total weights for this successor node. 
     m_Successors[i].m_TotalWeight = Utils.sum(totalSubsetWeights[splitIndex]);

     // insert this successor node into BestFirstElements according to gini gain or information gain
     //  descendingly
     if (BestFirstElements.size()==0) {
BestFirstElements.addElement(splitInfo);
     } else {
double gGain = ((Double)(splitInfo.elementAt(3))).doubleValue();
int vectorSize = BestFirstElements.size();
FastVector lastNode = (FastVector)BestFirstElements.elementAt(vectorSize-1);

// If gini gain is less than that of last node in FastVector
if (gGain<((Double)(lastNode.elementAt(3))).doubleValue()) {
  BestFirstElements.insertElementAt(splitInfo, vectorSize);
} else {
  for (int j=0; j<vectorSize; j++) {
    FastVector node = (FastVector)BestFirstElements.elementAt(j);
    double nodeGain = ((Double)(node.elementAt(3))).doubleValue();
    if (gGain>=nodeGain) {
      BestFirstElements.insertElementAt(splitInfo, j);
      break;
    }
  }
}
     }
   }
 }

Source File: Discretize.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Test using Kononenko's MDL criterion.
  *
  * @param priorCounts
  * @param bestCounts
  * @param numInstances
  * @param numCutPoints
  * @return true if the split is acceptable
  */
 private boolean KononenkosMDL(double[] priorCounts,
			double[][] bestCounts,
			double numInstances,
			int numCutPoints) {

   double distPrior, instPrior, distAfter = 0, sum, instAfter = 0;
   double before, after;
   int numClassesTotal;

   // Number of classes occuring in the set
   numClassesTotal = 0;
   for (int i = 0; i < priorCounts.length; i++) {
     if (priorCounts[i] > 0) {
numClassesTotal++;
     }
   }

   // Encode distribution prior to split
   distPrior = SpecialFunctions.log2Binomial(numInstances
				      + numClassesTotal - 1,
				      numClassesTotal - 1);

   // Encode instances prior to split.
   instPrior = SpecialFunctions.log2Multinomial(numInstances,
					 priorCounts);

   before = instPrior + distPrior;

   // Encode distributions and instances after split.
   for (int i = 0; i < bestCounts.length; i++) {
     sum = Utils.sum(bestCounts[i]);
     distAfter += SpecialFunctions.log2Binomial(sum + numClassesTotal - 1,
					 numClassesTotal - 1);
     instAfter += SpecialFunctions.log2Multinomial(sum,
					    bestCounts[i]);
   }

   // Coding cost after split
   after = Utils.log2(numCutPoints) + distAfter + instAfter;

   // Check if split is to be accepted
   return (before > after);
 }

Source File: REPTree.java From tsml with GNU General Public License v3.0

4 votes

/**
    * Backfits data from holdout set.
    * 
    * @throws Exception if insertion fails
    */
   protected void backfitHoldOutSet() throws Exception {
     
     // Insert instance into hold-out class distribution
     if (m_Info.classAttribute().isNominal()) {

// Nominal case
if (m_ClassProbs == null) {
  m_ClassProbs = new double[m_Info.numClasses()];
}
System.arraycopy(m_Distribution, 0, m_ClassProbs, 0, m_Info.numClasses());
       for (int i = 0; i < m_HoldOutDist.length; i++) {
         m_ClassProbs[i] += m_HoldOutDist[i];
       }
       if (Utils.sum(m_ClassProbs) > 0) {
         doSmoothing();
         Utils.normalize(m_ClassProbs);
       } else {
         m_ClassProbs = null;
       }
     } else {

// Numeric case
       double sumOfWeightsTrainAndHoldout = m_Distribution[1] + m_HoldOutDist[0];
       if (sumOfWeightsTrainAndHoldout <= 0) {
         return;
       }
if (m_ClassProbs == null) {
  m_ClassProbs = new double[1];
} else {
         m_ClassProbs[0] *= m_Distribution[1];
       }
m_ClassProbs[0] += m_HoldOutDist[1];
m_ClassProbs[0] /= sumOfWeightsTrainAndHoldout;
     }	
     
     // The process is recursive
     if (m_Attribute != -1) {
       for (int i = 0; i < m_Successors.length; i++) {
         m_Successors[i].backfitHoldOutSet();
       }
     }
   }

Java Code Examples for weka.core.Utils#sum()