Java Code Examples for weka.core.Utils#sum()
The following examples show how to use
weka.core.Utils#sum() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FastRandomTree.java From android-speaker-audioanalysis with MIT License | 6 votes |
/** * Normalizes branch sizes so they contain frequencies (stored in "props") * instead of counts (stored in "dist"). Creates a new double[] which it * returns. */ protected static double[] countsToFreqs( double[][] dist ) { double[] props = new double[dist.length]; for (int k = 0; k < props.length; k++) { props[k] = Utils.sum(dist[k]); } if (Utils.eq(Utils.sum(props), 0)) { for (int k = 0; k < props.length; k++) { props[k] = 1.0 / (double) props.length; } } else { FastRfUtils.normalize(props); } return props; }
Example 2
Source File: RandomRBF.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * returns a random index based on the given proportions * * @param proportionArray the proportions * @param random the random number generator to use * @return the random index */ protected int chooseRandomIndexBasedOnProportions( double[] proportionArray, Random random) { double probSum; double val; int index; double sum; probSum = Utils.sum(proportionArray); val = random.nextDouble() * probSum; index = 0; sum = 0.0; while ((sum <= val) && (index < proportionArray.length)) sum += proportionArray[index++]; return index - 1; }
Example 3
Source File: sIB.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Compute the JS divergence between an instance and a cluster, used for test data * @param inst instance to be clustered * @param t index of the cluster * @param pi1 * @param pi2 * @return the JS divergence */ private double JS(Instance inst, int t, double pi1, double pi2) { if (Math.min(pi1, pi2) <= 0) { System.out.format("Warning: zero or negative weights in JS calculation! (pi1 %s, pi2 %s)\n", pi1, pi2); return 0; } double sum = Utils.sum(inst.toDoubleArray()); double kl1 = 0.0, kl2 = 0.0, tmp = 0.0; for (int i = 0; i < inst.numValues(); i++) { tmp = inst.valueSparse(i) / sum; if(tmp != 0) { kl1 += tmp * Math.log(tmp / (tmp * pi1 + pi2 * bestT.Py_t.get(inst.index(i), t))); } } for (int i = 0; i < m_numAttributes; i++) { if ((tmp = bestT.Py_t.get(i, t)) != 0) { kl2 += tmp * Math.log(tmp / (inst.value(i) * pi1 / sum + pi2 * tmp)); } } return pi1 * kl1 + pi2 * kl2; }
Example 4
Source File: HyperPipes.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Classifies the given test instance. * * @param instance the instance to be classified * @return the predicted class for the instance * @throws Exception if the instance can't be classified */ public double [] distributionForInstance(Instance instance) throws Exception { // default model? if (m_ZeroR != null) { return m_ZeroR.distributionForInstance(instance); } double [] dist = new double[m_HyperPipes.length]; for (int j = 0; j < m_HyperPipes.length; j++) { dist[j] = m_HyperPipes[j].partialContains(instance); } double sum = Utils.sum(dist); if (sum <= 0) { for (int j = 0; j < dist.length; j++) { dist[j] = 1.0 / (double)dist.length; } return dist; } else { Utils.normalize(dist, sum); return dist; } }
Example 5
Source File: NominalToBinary.java From tsml with GNU General Public License v3.0 | 5 votes |
/** Computes average class values for each attribute and value */ private void computeAverageClassValues() { double totalCounts, sum; Instance instance; double [] counts; double [][] avgClassValues = new double[getInputFormat().numAttributes()][0]; m_Indices = new int[getInputFormat().numAttributes()][0]; for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (att.isNominal()) { avgClassValues[j] = new double [att.numValues()]; counts = new double [att.numValues()]; for (int i = 0; i < getInputFormat().numInstances(); i++) { instance = getInputFormat().instance(i); if (!instance.classIsMissing() && (!instance.isMissing(j))) { counts[(int)instance.value(j)] += instance.weight(); avgClassValues[j][(int)instance.value(j)] += instance.weight() * instance.classValue(); } } sum = Utils.sum(avgClassValues[j]); totalCounts = Utils.sum(counts); if (Utils.gr(totalCounts, 0)) { for (int k = 0; k < att.numValues(); k++) { if (Utils.gr(counts[k], 0)) { avgClassValues[j][k] /= (double)counts[k]; } else { avgClassValues[j][k] = sum / (double)totalCounts; } } } m_Indices[j] = Utils.sort(avgClassValues[j]); } } }
Example 6
Source File: SimpleCart.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Outputs a tree at a certain level. * * @param level the level at which the tree is to be printed * @return a tree at a certain level */ protected String toString(int level) { StringBuffer text = new StringBuffer(); // if leaf nodes if (m_Attribute == null) { if (Utils.isMissingValue(m_ClassValue)) { text.append(": null"); } else { double correctNum = (int)(m_Distribution[Utils.maxIndex(m_Distribution)]*100)/ 100.0; double wrongNum = (int)((Utils.sum(m_Distribution) - m_Distribution[Utils.maxIndex(m_Distribution)])*100)/100.0; String str = "(" + correctNum + "/" + wrongNum + ")"; text.append(": " + m_ClassAttribute.value((int) m_ClassValue)+ str); } } else { for (int j = 0; j < 2; j++) { text.append("\n"); for (int i = 0; i < level; i++) { text.append("| "); } if (j==0) { if (m_Attribute.isNumeric()) text.append(m_Attribute.name() + " < " + m_SplitValue); else text.append(m_Attribute.name() + "=" + m_SplitString); } else { if (m_Attribute.isNumeric()) text.append(m_Attribute.name() + " >= " + m_SplitValue); else text.append(m_Attribute.name() + "!=" + m_SplitString); } text.append(m_Successors[j].toString(level + 1)); } } return text.toString(); }
Example 7
Source File: SimpleCart.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Compute and return gini gain for given distributions of a node and its * successor nodes. * * @param parentDist class distributions of parent node * @param childDist class distributions of successor nodes * @return Gini gain computed */ protected double computeGiniGain(double[] parentDist, double[][] childDist) { double totalWeight = Utils.sum(parentDist); if (totalWeight==0) return 0; double leftWeight = Utils.sum(childDist[0]); double rightWeight = Utils.sum(childDist[1]); double parentGini = computeGini(parentDist, totalWeight); double leftGini = computeGini(childDist[0],leftWeight); double rightGini = computeGini(childDist[1], rightWeight); return parentGini - leftWeight/totalWeight*leftGini - rightWeight/totalWeight*rightGini; }
Example 8
Source File: MLUtils.java From meka with GNU General Public License v3.0 | 5 votes |
/** * EmptyVectors - percentage of empty vectors sum(y[i])==0 in Y. */ public static final double emptyVectors(int Y[][]) { int N = Y.length; int L = Y[0].length; double sum = 0.0; for(int i = 0; i < N; i++) { if (Utils.sum(Y[i]) <= 0.0) sum ++; } return (double)sum/(double)N; }
Example 9
Source File: Distribution.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Adds counts to given bag. */ public final void add(int bagIndex, double[] counts) { double sum = Utils.sum(counts); for (int i = 0; i < counts.length; i++) m_perClassPerBag[bagIndex][i] += counts[i]; m_perBag[bagIndex] = m_perBag[bagIndex]+sum; for (int i = 0; i < counts.length; i++) m_perClass[i] = m_perClass[i]+counts[i]; totaL = totaL+sum; }
Example 10
Source File: RandomTree.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Outputs a leaf. * * @return the leaf as string * @throws Exception if generation fails */ protected String leafString() throws Exception { double sum = 0, maxCount = 0; int maxIndex = 0; if (m_ClassDistribution != null) { sum = Utils.sum(m_ClassDistribution); maxIndex = Utils.maxIndex(m_ClassDistribution); maxCount = m_ClassDistribution[maxIndex]; } return " : " + m_Info.classAttribute().value(maxIndex) + " (" + Utils.doubleToString(sum, 2) + "/" + Utils.doubleToString(sum - maxCount, 2) + ")"; }
Example 11
Source File: BFTree.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Compute and return gini gain for given distributions of a node and its * successor nodes. * * @param parentDist class distributions of parent node * @param childDist class distributions of successor nodes * @return Gini gain computed */ protected double computeGiniGain(double[] parentDist, double[][] childDist) { double totalWeight = Utils.sum(parentDist); if (totalWeight==0) return 0; double leftWeight = Utils.sum(childDist[0]); double rightWeight = Utils.sum(childDist[1]); double parentGini = computeGini(parentDist, totalWeight); double leftGini = computeGini(childDist[0],leftWeight); double rightGini = computeGini(childDist[1], rightWeight); return parentGini - leftWeight/totalWeight*leftGini - rightWeight/totalWeight*rightGini; }
Example 12
Source File: LPS.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Outputs a leaf. * * @return the leaf as string * @throws Exception if generation fails */ protected String leafString() throws Exception { double sum = 0, maxCount = 0; int maxIndex = 0; double classMean = 0; double avgError = 0; if (m_ClassDistribution != null) { if (m_Info.classAttribute().isNominal()) { sum = Utils.sum(m_ClassDistribution); maxIndex = Utils.maxIndex(m_ClassDistribution); maxCount = m_ClassDistribution[maxIndex]; } else { classMean = m_ClassDistribution[0]; if (m_Distribution[1] > 0) { avgError = m_Distribution[0] / m_Distribution[1]; } } } if (m_Info.classAttribute().isNumeric()) { return " : " + Utils.doubleToString(classMean, 2) + " (" + Utils.doubleToString(m_Distribution[1], 2) + "/" + Utils.doubleToString(avgError, 2) + ")"; } return " : " + m_Info.classAttribute().value(maxIndex) + " (" + Utils.doubleToString(sum, 2) + "/" + Utils.doubleToString(sum - maxCount, 2) + ")"; }
Example 13
Source File: LBR.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Calculates the class membership probabilities. * for the given test instance. * * @param instance the instance to be classified * @param instanceIndex * * @return predicted class probability distribution * @throws Exception if distribution can't be computed */ public double[] localDistributionForInstance(Instance instance, Indexes instanceIndex) throws Exception { double sumForPriors = 0; double sumForCounts = 0; int attIndex, AIndex; int numClassesOfInstance = instance.numClasses(); sumForPriors = 0; sumForCounts = 0; instanceIndex.setSequentialDataset(true); // Calculate all of conditional probabilities. sumForPriors = Utils.sum(m_Priors) + numClassesOfInstance; for (int j = 0; j < numClassesOfInstance; j++) { // pointer to counts to make access more efficient in loop int [][] countsPointer = m_Counts[j]; posteriorsArray[j] = (m_Priors[j] + 1) / (sumForPriors); for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) { AIndex = instanceIndex.m_SequentialAttIndexes[attIndex]; sumForCounts = Utils.sum(countsPointer[AIndex]); if (!instance.isMissing(AIndex)) { posteriorsArray[j] *= ((countsPointer[AIndex][(int)instance.value(AIndex)] + 1) / (sumForCounts + (double)instance.attribute(AIndex).numValues())); } } } // Normalize probabilities Utils.normalize(posteriorsArray); return posteriorsArray; }
Example 14
Source File: Vote.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Classifies a given instance using the selected combination rule. * * @param instance the instance to be classified * @return the distribution * @throws Exception if instance could not be classified successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] result = new double[instance.numClasses()]; switch (m_CombinationRule) { case AVERAGE_RULE: result = distributionForInstanceAverage(instance); break; case PRODUCT_RULE: result = distributionForInstanceProduct(instance); break; case MAJORITY_VOTING_RULE: result = distributionForInstanceMajorityVoting(instance); break; case MIN_RULE: result = distributionForInstanceMin(instance); break; case MAX_RULE: result = distributionForInstanceMax(instance); break; case MEDIAN_RULE: result[0] = classifyInstance(instance); break; default: throw new IllegalStateException("Unknown combination rule '" + m_CombinationRule + "'!"); } if (!instance.classAttribute().isNumeric() && (Utils.sum(result) > 0)) Utils.normalize(result); return result; }
Example 15
Source File: BFTree.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Compute and return information gain for given distributions of a node * and its successor nodes. * * @param parentDist class distributions of parent node * @param childDist class distributions of successor nodes * @return information gain computed */ protected double computeInfoGain(double[] parentDist, double[][] childDist) { double totalWeight = Utils.sum(parentDist); if (totalWeight==0) return 0; double leftWeight = Utils.sum(childDist[0]); double rightWeight = Utils.sum(childDist[1]); double parentInfo = computeEntropy(parentDist, totalWeight); double leftInfo = computeEntropy(childDist[0],leftWeight); double rightInfo = computeEntropy(childDist[1], rightWeight); return parentInfo - leftWeight/totalWeight*leftInfo - rightWeight/totalWeight*rightInfo; }
Example 16
Source File: sIB.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Normalize the instance * @param inst instance to be normalized * @return a new Instance with normalized values */ private Instance normalizeInstance(Instance inst) { double[] vals = inst.toDoubleArray(); double sum = Utils.sum(vals); for(int i = 0; i < vals.length; i++) { vals[i] /= sum; } return new DenseInstance(inst.weight(), vals); }
Example 17
Source File: AbstractClusterer.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Classifies a given instance. Either this or distributionForInstance() * needs to be implemented by subclasses. * * @param instance the instance to be assigned to a cluster * @return the number of the assigned cluster as an integer * @exception Exception if instance could not be clustered * successfully */ public int clusterInstance(Instance instance) throws Exception { double [] dist = distributionForInstance(instance); if (dist == null) { throw new Exception("Null distribution predicted"); } if (Utils.sum(dist) <= 0) { throw new Exception("Unable to cluster instance"); } return Utils.maxIndex(dist); }
Example 18
Source File: BFTree.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Generate successor nodes for a node and put them into BestFirstElements * according to gini gain or information gain in a descending order. * * @param BestFirstElements list to store BestFirst nodes * @param data training instance * @param subsetSortedIndices sorted indices of instances of successor nodes * @param subsetWeights weights of instances of successor nodes * @param dists class distributions of successor nodes * @param att attribute used to split the node * @param useHeuristic if use heuristic search for nominal attributes in multi-class problem * @param useGini if use Gini index as splitting criterion * @throws Exception if something goes wrong */ protected void makeSuccessors(FastVector BestFirstElements,Instances data, int[][][] subsetSortedIndices, double[][][] subsetWeights, double[][][] dists, Attribute att, boolean useHeuristic, boolean useGini) throws Exception { m_Successors = new BFTree[2]; for (int i=0; i<2; i++) { m_Successors[i] = new BFTree(); m_Successors[i].m_isLeaf = true; // class probability and distribution for this successor node m_Successors[i].m_ClassProbs = new double[data.numClasses()]; m_Successors[i].m_Distribution = new double[data.numClasses()]; System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_ClassProbs, 0,m_Successors[i].m_ClassProbs.length); System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_Distribution, 0,m_Successors[i].m_Distribution.length); if (Utils.sum(m_Successors[i].m_ClassProbs)!=0) Utils.normalize(m_Successors[i].m_ClassProbs); // split information for this successor node double[][] props = new double[data.numAttributes()][2]; double[][][] subDists = new double[data.numAttributes()][2][data.numClasses()]; double[][] totalSubsetWeights = new double[data.numAttributes()][2]; FastVector splitInfo = m_Successors[i].computeSplitInfo(m_Successors[i], data, subsetSortedIndices[i], subsetWeights[i], subDists, props, totalSubsetWeights, useHeuristic, useGini); // branch proportion for this successor node int splitIndex = ((Attribute)splitInfo.elementAt(1)).index(); m_Successors[i].m_Props = new double[2]; System.arraycopy(props[splitIndex], 0, m_Successors[i].m_Props, 0, m_Successors[i].m_Props.length); // sorted indices and weights of each attribute for this successor node m_Successors[i].m_SortedIndices = new int[data.numAttributes()][0]; m_Successors[i].m_Weights = new double[data.numAttributes()][0]; for (int j=0; j<m_Successors[i].m_SortedIndices.length; j++) { m_Successors[i].m_SortedIndices[j] = subsetSortedIndices[i][j]; m_Successors[i].m_Weights[j] = subsetWeights[i][j]; } // distribution of each attribute for this successor node m_Successors[i].m_Dists = new double[data.numAttributes()][2][data.numClasses()]; for (int j=0; j<subDists.length; j++) { m_Successors[i].m_Dists[j] = subDists[j]; } // total weights for this successor node. m_Successors[i].m_TotalWeight = Utils.sum(totalSubsetWeights[splitIndex]); // insert this successor node into BestFirstElements according to gini gain or information gain // descendingly if (BestFirstElements.size()==0) { BestFirstElements.addElement(splitInfo); } else { double gGain = ((Double)(splitInfo.elementAt(3))).doubleValue(); int vectorSize = BestFirstElements.size(); FastVector lastNode = (FastVector)BestFirstElements.elementAt(vectorSize-1); // If gini gain is less than that of last node in FastVector if (gGain<((Double)(lastNode.elementAt(3))).doubleValue()) { BestFirstElements.insertElementAt(splitInfo, vectorSize); } else { for (int j=0; j<vectorSize; j++) { FastVector node = (FastVector)BestFirstElements.elementAt(j); double nodeGain = ((Double)(node.elementAt(3))).doubleValue(); if (gGain>=nodeGain) { BestFirstElements.insertElementAt(splitInfo, j); break; } } } } } }
Example 19
Source File: Discretize.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Test using Kononenko's MDL criterion. * * @param priorCounts * @param bestCounts * @param numInstances * @param numCutPoints * @return true if the split is acceptable */ private boolean KononenkosMDL(double[] priorCounts, double[][] bestCounts, double numInstances, int numCutPoints) { double distPrior, instPrior, distAfter = 0, sum, instAfter = 0; double before, after; int numClassesTotal; // Number of classes occuring in the set numClassesTotal = 0; for (int i = 0; i < priorCounts.length; i++) { if (priorCounts[i] > 0) { numClassesTotal++; } } // Encode distribution prior to split distPrior = SpecialFunctions.log2Binomial(numInstances + numClassesTotal - 1, numClassesTotal - 1); // Encode instances prior to split. instPrior = SpecialFunctions.log2Multinomial(numInstances, priorCounts); before = instPrior + distPrior; // Encode distributions and instances after split. for (int i = 0; i < bestCounts.length; i++) { sum = Utils.sum(bestCounts[i]); distAfter += SpecialFunctions.log2Binomial(sum + numClassesTotal - 1, numClassesTotal - 1); instAfter += SpecialFunctions.log2Multinomial(sum, bestCounts[i]); } // Coding cost after split after = Utils.log2(numCutPoints) + distAfter + instAfter; // Check if split is to be accepted return (before > after); }
Example 20
Source File: REPTree.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Backfits data from holdout set. * * @throws Exception if insertion fails */ protected void backfitHoldOutSet() throws Exception { // Insert instance into hold-out class distribution if (m_Info.classAttribute().isNominal()) { // Nominal case if (m_ClassProbs == null) { m_ClassProbs = new double[m_Info.numClasses()]; } System.arraycopy(m_Distribution, 0, m_ClassProbs, 0, m_Info.numClasses()); for (int i = 0; i < m_HoldOutDist.length; i++) { m_ClassProbs[i] += m_HoldOutDist[i]; } if (Utils.sum(m_ClassProbs) > 0) { doSmoothing(); Utils.normalize(m_ClassProbs); } else { m_ClassProbs = null; } } else { // Numeric case double sumOfWeightsTrainAndHoldout = m_Distribution[1] + m_HoldOutDist[0]; if (sumOfWeightsTrainAndHoldout <= 0) { return; } if (m_ClassProbs == null) { m_ClassProbs = new double[1]; } else { m_ClassProbs[0] *= m_Distribution[1]; } m_ClassProbs[0] += m_HoldOutDist[1]; m_ClassProbs[0] /= sumOfWeightsTrainAndHoldout; } // The process is recursive if (m_Attribute != -1) { for (int i = 0; i < m_Successors.length; i++) { m_Successors[i].backfitHoldOutSet(); } } }