Java Code Examples for weka.core.Utils#gr()

The following examples show how to use weka.core.Utils#gr() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MINND.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Compute the target function to minimize in gradient descent
 * The formula is:<br/>
 * 1/2*sum[i=1..p](f(X, Xi)-var(Y, Yi))^2 <p/>
 * where p is the number of exemplars and Y is the class label.
 * In the case of X=MU, f() is the Euclidean distance between two
 * exemplars together with the related weights and var() is 
 * sqrt(numDimension)*(Y-Yi) where Y-Yi is either 0 (when Y==Yi)
 * or 1 (Y!=Yi) 
 *
 * @param x the weights of the exemplar in question
 * @param rowpos row index of x in X
 * @param Y the observed class label
 * @return the result of the target function
 */
public double target(double[] x, double[][] X, int rowpos, double[] Y){
  double y = Y[rowpos], result=0;

  for(int i=0; i < X.length; i++){
    if((i != rowpos) && (X[i] != null)){
      double var = (y==Y[i]) ? 0.0 : Math.sqrt((double)m_Dimension - 1);
      double f=0;
      for(int j=0; j < m_Dimension; j++)
        if(Utils.gr(m_Variance[rowpos][j], 0.0)){
          f += x[j]*(X[rowpos][j]-X[i][j]) * (X[rowpos][j]-X[i][j]);     
          //System.out.println("i:"+i+" j: "+j+" row: "+rowpos);
        }
      f = Math.sqrt(f);
      //System.out.println("???distance between "+rowpos+" and "+i+": "+f+"|y:"+y+" vs "+Y[i]);
      if(Double.isInfinite(f))
        System.exit(1);
      result += 0.5 * (f - var) * (f - var);
    }
  }
  //System.out.println("???target: "+result);
  return result;
}
 
Example 2
Source File: LMT.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Classifies an instance.
  *
  * @param instance the instance to classify
  * @return the classification
  * @throws Exception if instance can't be classified successfully
  */
 public double classifyInstance(Instance instance) throws Exception {

   double maxProb = -1;
   int maxIndex = 0;
     
   //classify by maximum probability
   double[] probs = distributionForInstance(instance);       
   for (int j = 0; j < instance.numClasses(); j++) {
     if (Utils.gr(probs[j], maxProb)) {
maxIndex = j;
maxProb = probs[j];
     }
   }     
   return (double)maxIndex;      
 }
 
Example 3
Source File: FT.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Classifies an instance.
  *
  * @param instance the instance to classify
  * @return the classification
  * @throws Exception if instance can't be classified successfully
  */
 public double classifyInstance(Instance instance) throws Exception {

   double maxProb = -1;
   int maxIndex = 0;
  
   //classify by maximum probability
   double[] probs = distributionForInstance(instance);       
   for (int j = 0; j < instance.numClasses(); j++) {
     if (Utils.gr(probs[j], maxProb)) {
maxIndex = j;
maxProb = probs[j];
     }
   }     
   return (double)maxIndex;      
 }
 
Example 4
Source File: EntropySplitCrit.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Computes entropy of test distribution with respect to training distribution.
  */
 public final double splitCritValue(Distribution train, Distribution test) {

   double result = 0;
   int numClasses = 0;
   int i, j;
   
   // Find out relevant number of classes
   for (j = 0; j < test.numClasses(); j++)
     if (Utils.gr(train.perClass(j), 0) || Utils.gr(test.perClass(j), 0))
numClasses++;

   // Compute entropy of test data with respect to training data
   for (i = 0; i < test.numBags(); i++)
     if (Utils.gr(test.perBag(i),0)) {
for (j = 0; j < test.numClasses(); j++)
  if (Utils.gr(test.perClassPerBag(i, j), 0))
    result -= test.perClassPerBag(i, j)*
      Math.log(train.perClassPerBag(i, j) + 1);
result += test.perBag(i) * Math.log(train.perBag(i) + numClasses);
     }
 
   return result / log2;
 }
 
Example 5
Source File: ClassifierDecList.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/** 
  * Classifies an instance.
  *
  * @exception Exception if something goes wrong
  */
 public double classifyInstance(Instance instance)
      throws Exception {

   double maxProb = -1;
   double currentProb;
   int maxIndex = 0;
   int j;

   for (j = 0; j < instance.numClasses();
 j++){
     currentProb = getProbs(j,instance,1);
     if (Utils.gr(currentProb,maxProb)){
maxIndex = j;
maxProb = currentProb;
     }
   }
   if (Utils.eq(maxProb,0))
     return -1.0;
   else
     return (double)maxIndex;
 }
 
Example 6
Source File: NominalToBinary.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** Computes average class values for each attribute and value */
 private void computeAverageClassValues() {

   double totalCounts, sum;
   Instance instance;
   double [] counts;

   double [][] avgClassValues = new double[getInputFormat().numAttributes()][0];
   m_Indices = new int[getInputFormat().numAttributes()][0];
   for (int j = 0; j < getInputFormat().numAttributes(); j++) {
     Attribute att = getInputFormat().attribute(j);
     if (att.isNominal()) {
avgClassValues[j] = new double [att.numValues()];
counts = new double [att.numValues()];
for (int i = 0; i < getInputFormat().numInstances(); i++) {
  instance = getInputFormat().instance(i);
  if (!instance.classIsMissing() && 
      (!instance.isMissing(j))) {
    counts[(int)instance.value(j)] += instance.weight();
    avgClassValues[j][(int)instance.value(j)] += 
      instance.weight() * instance.classValue();
  }
}
sum = Utils.sum(avgClassValues[j]);
totalCounts = Utils.sum(counts);
if (Utils.gr(totalCounts, 0)) {
  for (int k = 0; k < att.numValues(); k++) {
    if (Utils.gr(counts[k], 0)) {
      avgClassValues[j][k] /= (double)counts[k];
    } else {
      avgClassValues[j][k] = sum / (double)totalCounts;
    }
  }
}
m_Indices[j] = Utils.sort(avgClassValues[j]);
     }
   }
 }
 
Example 7
Source File: Distribution.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Returns class with highest frequency over all bags.
  */
 public final int maxClass() {

   double maxCount = 0;
   int maxIndex = 0;
   int i;

   for (i=0;i<m_perClass.length;i++)
     if (Utils.gr(m_perClass[i],maxCount)) {
maxCount = m_perClass[i];
maxIndex = i;
     }

   return maxIndex;
 }
 
Example 8
Source File: SMO.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Quick and dirty check whether the quadratic programming problem is solved.
    * 
    * @throws Exception if checking fails
    */
   protected void checkClassifier() throws Exception {

     double sum = 0;
     for (int i = 0; i < m_alpha.length; i++) {
if (m_alpha[i] > 0) {
  sum += m_class[i] * m_alpha[i];
}
     }
     System.err.println("Sum of y(i) * alpha(i): " + sum);

     for (int i = 0; i < m_alpha.length; i++) {
double output = SVMOutput(i, m_data.instance(i));
if (Utils.eq(m_alpha[i], 0)) {
  if (Utils.sm(m_class[i] * output, 1)) {
    System.err.println("KKT condition 1 violated: " + m_class[i] * output);
  }
} 
if (Utils.gr(m_alpha[i], 0) && 
    Utils.sm(m_alpha[i], m_C * m_data.instance(i).weight())) {
  if (!Utils.eq(m_class[i] * output, 1)) {
    System.err.println("KKT condition 2 violated: " + m_class[i] * output);
  }
} 
if (Utils.eq(m_alpha[i], m_C * m_data.instance(i).weight())) {
  if (Utils.gr(m_class[i] * output, 1)) {
    System.err.println("KKT condition 3 violated: " + m_class[i] * output);
  }
} 
     }
   }
 
Example 9
Source File: MISMO.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Quick and dirty check whether the quadratic programming problem is solved.
 * 
 * @throws Exception if something goes wrong
 */
protected void checkClassifier() throws Exception {

  double sum = 0;
  for (int i = 0; i < m_alpha.length; i++) {
    if (m_alpha[i] > 0) {
      sum += m_class[i] * m_alpha[i];
    }
  }
  System.err.println("Sum of y(i) * alpha(i): " + sum);

  for (int i = 0; i < m_alpha.length; i++) {
    double output = SVMOutput(i, m_data.instance(i));
    if (Utils.eq(m_alpha[i], 0)) {
      if (Utils.sm(m_class[i] * output, 1)) {
        System.err.println("KKT condition 1 violated: " + m_class[i] * output);
      }
    } 
    if (Utils.gr(m_alpha[i], 0) && 
        Utils.sm(m_alpha[i], m_C * m_data.instance(i).weight())) {
      if (!Utils.eq(m_class[i] * output, 1)) {
        System.err.println("KKT condition 2 violated: " + m_class[i] * output);
      }
        } 
    if (Utils.eq(m_alpha[i], m_C * m_data.instance(i).weight())) {
      if (Utils.gr(m_class[i] * output, 1)) {
        System.err.println("KKT condition 3 violated: " + m_class[i] * output);
      }
    } 
  }
}
 
Example 10
Source File: MakeDecList.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
  * Returns the class distribution for an instance.
  *
  * @exception Exception if distribution can't be computed
  */
 public double[] distributionForInstance(Instance instance) 
      throws Exception {

   double [] currentProbs = null;
   double [] sumProbs;
   double currentWeight, weight = 1;
   int i,j;

   // Get probabilities.
   sumProbs = new double [instance.numClasses()];
   i = 0;
   while (Utils.gr(weight,0)){
     currentWeight = 
((ClassifierDecList)theRules.elementAt(i)).weight(instance);
     if (Utils.gr(currentWeight,0)) {
currentProbs = ((ClassifierDecList)theRules.elementAt(i)).
  distributionForInstance(instance);
for (j = 0; j < sumProbs.length; j++)
  sumProbs[j] += weight*currentProbs[j];
weight = weight*(1-currentWeight);
     }
     i++;
   }

   return sumProbs;
 }
 
Example 11
Source File: ReplaceMissingValues.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Signify that this batch of input to the filter is finished. 
  * If the filter requires all instances prior to filtering,
  * output() may now be called to retrieve the filtered instances.
  *
  * @return true if there are instances pending output
  * @throws IllegalStateException if no input structure has been defined
  */
 public boolean batchFinished() {

   if (getInputFormat() == null) {
     throw new IllegalStateException("No input instance format defined");
   }

   if (m_ModesAndMeans == null) {
     // Compute modes and means
     double sumOfWeights =  getInputFormat().sumOfWeights();
     double[][] counts = new double[getInputFormat().numAttributes()][];
     for (int i = 0; i < getInputFormat().numAttributes(); i++) {
if (getInputFormat().attribute(i).isNominal()) {
  counts[i] = new double[getInputFormat().attribute(i).numValues()];
         if (counts[i].length > 0)
           counts[i][0] = sumOfWeights;
}
     }
     double[] sums = new double[getInputFormat().numAttributes()];
     for (int i = 0; i < sums.length; i++) {
sums[i] = sumOfWeights;
     }
     double[] results = new double[getInputFormat().numAttributes()];
     for (int j = 0; j < getInputFormat().numInstances(); j++) {
Instance inst = getInputFormat().instance(j);
for (int i = 0; i < inst.numValues(); i++) {
  if (!inst.isMissingSparse(i)) {
    double value = inst.valueSparse(i);
    if (inst.attributeSparse(i).isNominal()) {
             if (counts[inst.index(i)].length > 0) {
               counts[inst.index(i)][(int)value] += inst.weight();
               counts[inst.index(i)][0] -= inst.weight();
             }
    } else if (inst.attributeSparse(i).isNumeric()) {
      results[inst.index(i)] += inst.weight() * inst.valueSparse(i);
    }
  } else {
    if (inst.attributeSparse(i).isNominal()) {
             if (counts[inst.index(i)].length > 0) {
        counts[inst.index(i)][0] -= inst.weight();
             }
    } else if (inst.attributeSparse(i).isNumeric()) {
      sums[inst.index(i)] -= inst.weight();
    }
  }
}
     }
     m_ModesAndMeans = new double[getInputFormat().numAttributes()];
     for (int i = 0; i < getInputFormat().numAttributes(); i++) {
if (getInputFormat().attribute(i).isNominal()) {
         if (counts[i].length == 0)
           m_ModesAndMeans[i] = Utils.missingValue();
         else
    m_ModesAndMeans[i] = (double)Utils.maxIndex(counts[i]);
} else if (getInputFormat().attribute(i).isNumeric()) {
  if (Utils.gr(sums[i], 0)) {
    m_ModesAndMeans[i] = results[i] / sums[i];
  }
}
     }

     // Convert pending input instances
     for(int i = 0; i < getInputFormat().numInstances(); i++) {
convertInstance(getInputFormat().instance(i));
     }
   } 
   // Free memory
   flushInput();

   m_NewBatch = true;
   return (numPendingOutput() != 0);
 }
 
Example 12
Source File: YATSI.java    From collective-classification-weka-package with GNU General Public License v3.0 4 votes vote down vote up
/**
 * internal function for determining the class distribution for an instance, 
 * will be overridden by derived classes. <br/>
 * 
 * @param instance	the instance to get the distribution for
 * @return		the distribution for the given instance
 * @throws Exception	if something goes wrong
 */
@Override
protected double[] getDistribution(Instance instance) throws Exception {
  int         index;
  int         i;
  double[]    result;
  Instances   neighbors;
  Instance    inst;
  double[]    count;
  double[]    countNum;
  int         labelIndex;

  result = null;

  // find instance
  index = m_Data.indexOf(instance);
  if (index > -1) {
    // get neighbors
    neighbors = m_NNSearch.kNearestNeighbours(
                  m_Data.get(index), m_KNNDetermined);

    // count class label
    count    = new double[neighbors.numClasses()];
    countNum = new double[neighbors.numClasses()];
    for (i = 0; i < neighbors.numInstances(); i++) {
      inst = neighbors.instance(i);
      if (!inst.classIsMissing()) {
        count[(int) inst.classValue()] += inst.weight();
        countNum[(int) inst.classValue()]++;
      }
    }

    // build result
    result = new double[instance.numClasses()];
    for (i = 0; i < result.length; i++)
      result[i] = count[i];
    if (Utils.gr(Utils.sum(result), 0))
      Utils.normalize(result);
    else
      System.out.println(
          "No summed up weights: " + instance 
          + ", counts=" + Utils.arrayToString(countNum));
    labelIndex = Utils.maxIndex(count);
    // is it a clear-cut distribution?
    if (!Utils.eq(Utils.sum(count) - count[labelIndex], 0))
      m_ClearCutDistribution++;
    // did the label change due to weights?
    if (Utils.maxIndex(countNum) != labelIndex)
      m_WeightFlips++;
  }
  else {
    throw new Exception("Cannot find instance: " + instance + "\n" 
        + " -> pos=" + index 
        + " = " + m_Data.get(StrictMath.abs(index)));
  }

  return result;
}
 
Example 13
Source File: PairedStats.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Calculates the derived statistics (significance etc).
  */
 public void calculateDerived() {

   xStats.calculateDerived();
   yStats.calculateDerived();
   differencesStats.calculateDerived();

   correlation = Double.NaN;
   if (!Double.isNaN(xStats.stdDev) && !Double.isNaN(yStats.stdDev)
&& !Utils.eq(xStats.stdDev, 0)) {
     double slope = (xySum - xStats.sum * yStats.sum / count)
/ (xStats.sumSq - xStats.sum * xStats.mean);
     if (!Utils.eq(yStats.stdDev, 0)) {
correlation = slope * xStats.stdDev / yStats.stdDev;
     } else {
correlation = 1.0;
     }
   }

   if (Utils.gr(differencesStats.stdDev, 0)) {
     double tval = differencesStats.mean
* Math.sqrt(count)
/ differencesStats.stdDev;

     if (m_degreesOfFreedom >= 1){
       differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                        m_degreesOfFreedom);
     } else {
       if (count > 1) {
         differencesProbability = Statistics.FProbability(tval * tval, 1,
                                                          (int) count - 1);
       } else {
         differencesProbability = 1;
       }
     }
   } else {
     if (differencesStats.sumSq == 0) {
differencesProbability = 1.0;
     } else {
differencesProbability = 0.0;
     }
   }
   differencesSignificance = 0;
   if (differencesProbability <= sigLevel) {
     if (xStats.mean > yStats.mean) {
differencesSignificance = 1;
     } else {
differencesSignificance = -1;
     }
   }
 }
 
Example 14
Source File: ConjunctiveRule.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Builds a single rule learner with REP dealing with nominal classes or
  * numeric classes.
  * For nominal classes, this rule learner predicts a distribution on
  * the classes.
  * For numeric classes, this learner predicts a single value.
  *
  * @param instances the training data
  * @throws Exception if classifier can't be built successfully
  */
 public void buildClassifier(Instances instances) throws Exception {
   // can classifier handle the data?
   getCapabilities().testWithFail(instances);

   // remove instances with missing class
   Instances data = new Instances(instances);
   data.deleteWithMissingClass();
   
   if(data.numInstances() < m_Folds)
     throw new Exception("Not enough data for REP.");

   m_ClassAttribute = data.classAttribute();
   if(m_ClassAttribute.isNominal())
     m_NumClasses = m_ClassAttribute.numValues();
   else
     m_NumClasses = 1;

   m_Antds = new FastVector();
   m_DefDstr = new double[m_NumClasses];
   m_Cnsqt = new double[m_NumClasses];
   m_Targets = new FastVector();	    
   m_Random = new Random(m_Seed);
   
   if(m_NumAntds != -1){
     grow(data);
   }
   else{

     data.randomize(m_Random);

     // Split data into Grow and Prune	   
     data.stratify(m_Folds);

     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);

     grow(growData);      // Build this rule  
     prune(pruneData);    // Prune this rule		  	  
   }

   if(m_ClassAttribute.isNominal()){			   
     Utils.normalize(m_Cnsqt);
     if(Utils.gr(Utils.sum(m_DefDstr), 0))
Utils.normalize(m_DefDstr);
   }	
 }
 
Example 15
Source File: MINND.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * This function calculates the Kullback Leibler distance between
 * two normal distributions.  This distance is always positive. 
 * Kullback Leibler distance = integral{f(X)ln(f(X)/g(X))}
 * Note that X is a vector.  Since we assume dimensions are independent
 * f(X)(g(X) the same) is actually the product of normal density
 * functions of each dimensions.  Also note that it should be log2
 * instead of (ln) in the formula, but we use (ln) simply for computational
 * convenience.
 *
 * The result is as follows, suppose there are P dimensions, and f(X)
 * is the first distribution and g(X) is the second:
 * Kullback = sum[1..P](ln(SIGMA2/SIGMA1)) +
 *            sum[1..P](SIGMA1^2 / (2*(SIGMA2^2))) +
 *            sum[1..P]((MU1-MU2)^2 / (2*(SIGMA2^2))) -
 *            P/2
 *
 * @param mu1 mu of the first normal distribution
 * @param mu2 mu of the second normal distribution 
 * @param var1 variance(SIGMA^2) of the first normal distribution
 * @param var2 variance(SIGMA^2) of the second normal distribution
 * @return the Kullback distance of two distributions
 */
public double kullback(double[] mu1, double[] mu2,
    double[] var1, double[] var2, int pos){
  int p = mu1.length;
  double result = 0;

  for(int y=0; y < p; y++){
    if((Utils.gr(var1[y], 0)) && (Utils.gr(var2[y], 0))){
      result +=  
        ((Math.log(Math.sqrt(var2[y]/var1[y]))) +
         (var1[y] / (2.0*var2[y])) + 
         (m_Change[pos][y] * (mu1[y]-mu2[y])*(mu1[y]-mu2[y]) / (2.0*var2[y])) -
         0.5);
    }
  }

  return result;
}
 
Example 16
Source File: MINND.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Pre-process the given exemplar according to the other exemplars 
 * in the given exemplars.  It also updates noise data statistics.
 *
 * @param data the whole exemplars
 * @param pos the position of given exemplar in data
 * @return the processed exemplar
 * @throws Exception if the returned exemplar is wrong 
 */
public Instance preprocess(Instances data, int pos)
  throws Exception{
  Instance before = data.instance(pos);
  if((int)before.classValue() == 0){
    m_NoiseM[pos] = null;
    m_NoiseV[pos] = null;
    return before;
  }

  Instances after_relationInsts =before.attribute(1).relation().stringFreeStructure();
  Instances noises_relationInsts =before.attribute(1).relation().stringFreeStructure();

  Instances newData = m_Attributes;
  Instance after = new DenseInstance(before.numAttributes());
  Instance noises =  new DenseInstance(before.numAttributes());
  after.setDataset(newData);
  noises.setDataset(newData);

  for(int g=0; g < before.relationalValue(1).numInstances(); g++){
    Instance datum = before.relationalValue(1).instance(g);
    double[] dists = new double[data.numInstances()];

    for(int i=0; i < data.numInstances(); i++){
      if(i != pos)
        dists[i] = distance(datum, m_Mean[i], m_Variance[i], i);
      else
        dists[i] = Double.POSITIVE_INFINITY;
    }		   

    int[] pred = new int[m_NumClasses];
    for(int n=0; n < pred.length; n++)
      pred[n] = 0;

    for(int o=0; o<m_Select; o++){
      int index = Utils.minIndex(dists);
      pred[(int)m_Class[index]]++;
      dists[index] = Double.POSITIVE_INFINITY;
    }

    int clas = Utils.maxIndex(pred);
    if((int)before.classValue() != clas)
      noises_relationInsts.add(datum);
    else
      after_relationInsts.add(datum);		
  }

  int relationValue;
  relationValue = noises.attribute(1).addRelation( noises_relationInsts);
  noises.setValue(0,before.value(0));
  noises.setValue(1, relationValue);
  noises.setValue(2, before.classValue());

  relationValue = after.attribute(1).addRelation( after_relationInsts);
  after.setValue(0,before.value(0));
  after.setValue(1, relationValue);
  after.setValue(2, before.classValue());


  if(Utils.gr(noises.relationalValue(1).sumOfWeights(), 0)){	
    for (int i=0; i<m_Dimension; i++) {
      m_NoiseM[pos][i] = noises.relationalValue(1).meanOrMode(i);
      m_NoiseV[pos][i] = noises.relationalValue(1).variance(i);
      if(Utils.eq(m_NoiseV[pos][i],0.0))
        m_NoiseV[pos][i] = m_ZERO;
    }
    /* for(int y=0; y < m_NoiseV[pos].length; y++){
       if(Utils.eq(m_NoiseV[pos][y],0.0))
       m_NoiseV[pos][y] = m_ZERO;
       } */	
  }
  else{
    m_NoiseM[pos] = null;
    m_NoiseV[pos] = null;
  }

  return after;
}
 
Example 17
Source File: CollectiveTree.java    From collective-classification-weka-package with GNU General Public License v3.0 4 votes vote down vote up
/**
 * determines the distribution of the instances with a non-missing value
 * at the given attribute position.
 * @param data        the instances to work on
 * @param indices     the sorted indices
 * @param att         the attribute to determine the distribution for
 * @return            the distribution
 */
protected double[] determineAttributeDistribution( Instances data,
                                                   int[] indices,
                                                   int att) {
  double[]      result;
  int           i;
  Instance      inst;
  int           count;
  double[]      values;
  double        median;

  // nominal attribute
  if (data.attribute(att).isNominal()) {
    result = new double[data.attribute(att).numValues()];

    // determine attribute distribution (necessary to distribute instances
    // with no class and missing attribute)
    for (i = 0; i < indices.length; i++) {
      inst = data.instance(indices[i]);
      if (inst.isMissing(att))
        break;
      result[(int) inst.value(att)] += inst.weight();
    }
  }
  // numeric attribute
  else {
    result = new double[2];   // less or greater/equal than median

    // determine number of instances w/o missing attribute
    count = 0;
    for (i = 0; i < indices.length; i++) {
      inst = data.instance(indices[i]);
      if (inst.isMissing(att))
        break;
      count++;
    }

    // determine median
    values = new double[count];
    for (i = 0; i < count; i++) {
      inst      = data.instance(indices[i]);
      values[i] = inst.value(att);
    }
    if (values.length == 0)
      median = 0;
    else if (values.length == 1)
      median = values[0];
    else
      median = Utils.kthSmallestValue(values, values.length / 2);

    // disitribute
    for (i = 0; i < count; i++) {
      inst = data.instance(indices[i]);
      if (Utils.sm(inst.value(att), median))
        result[0] += inst.weight();
      else
        result[1] += inst.weight();
    }
  }

  if (Utils.gr(Utils.sum(result), 0))
    Utils.normalize(result);

  return result;
}
 
Example 18
Source File: MINND.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
   * Use gradient descent to distort the MU parameter for
   * the exemplar.  The exemplar can be in the specified row in the 
   * given matrix, which has numExemplar rows and numDimension columns;
   * or not in the matrix.
   * 
   * @param row the given row index
   * @param mean
   */
  public void findWeights(int row, double[][] mean){

    double[] neww = new double[m_Dimension];
    double[] oldw = new double[m_Dimension];
    System.arraycopy(m_Change[row], 0, neww, 0, m_Dimension);
    //for(int z=0; z<m_Dimension; z++)
    //System.out.println("mu("+row+"): "+origin[z]+" | "+newmu[z]);
    double newresult = target(neww, mean, row, m_Class);
    double result = Double.POSITIVE_INFINITY;
    double rate= 0.05;
    if(m_Rate != -1)
      rate = m_Rate;
    //System.out.println("???Start searching ...");
search: 
    while(Utils.gr((result-newresult), m_STOP)){ // Full step
      oldw = neww;
      neww= new double[m_Dimension];

      double[] delta = delta(oldw, mean, row, m_Class);

      for(int i=0; i < m_Dimension; i++)
        if(Utils.gr(m_Variance[row][i], 0.0))
          neww[i] = oldw[i] + rate * delta[i];

      result = newresult;
      newresult = target(neww, mean, row, m_Class);

      //System.out.println("???old: "+result+"|new: "+newresult);
      while(Utils.gr(newresult, result)){ // Search back
        //System.out.println("search back");
        if(m_Rate == -1){
          rate *= m_Decay; // Decay
          for(int i=0; i < m_Dimension; i++)
            if(Utils.gr(m_Variance[row][i], 0.0))
              neww[i] = oldw[i] + rate * delta[i];
          newresult = target(neww, mean, row, m_Class);
        }
        else{
          for(int i=0; i < m_Dimension; i++)
            neww[i] = oldw[i];
          break search;
        }
      }
    }
    //System.out.println("???Stop");
    m_Change[row] = neww;
  }
 
Example 19
Source File: MultiClassClassifierUpdateable.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Returns the distribution for an instance.
 * 
 * @param inst the instance to get the distribution for
 * @return the distribution
 * @throws Exception if the distribution can't be computed successfully
 */
@Override
public double[] distributionForInstance(Instance inst) throws Exception {

  if (m_Classifiers.length == 1) {
    return m_Classifiers[0].distributionForInstance(inst);
  }

  double[] probs = new double[inst.numClasses()];
  if (m_Method == METHOD_1_AGAINST_1) {
    double[][] r = new double[inst.numClasses()][inst.numClasses()];
    double[][] n = new double[inst.numClasses()][inst.numClasses()];

    for (int i = 0; i < m_ClassFilters.length; i++) {
      if (m_Classifiers[i] != null && m_SumOfWeights[i] > 0) {
        Instance tempInst = (Instance) inst.copy();
        tempInst.setDataset(m_TwoClassDataset);
        double[] current = m_Classifiers[i].distributionForInstance(tempInst);
        Range range = new Range(
            ((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
        range.setUpper(m_ClassAttribute.numValues());
        int[] pair = range.getSelection();
        if (m_pairwiseCoupling && inst.numClasses() > 2) {
          r[pair[0]][pair[1]] = current[0];
          n[pair[0]][pair[1]] = m_SumOfWeights[i];
        } else {
          if (current[0] > current[1]) {
            probs[pair[0]] += 1.0;
          } else {
            probs[pair[1]] += 1.0;
          }
        }
      }
    }
    if (m_pairwiseCoupling && inst.numClasses() > 2) {
      try {
        return pairwiseCoupling(n, r);
      } catch (IllegalArgumentException ex) {
      }
    }
    if (Utils.gr(Utils.sum(probs), 0)) {
      Utils.normalize(probs);
    }
    return probs;
  } else {
    probs = super.distributionForInstance(inst);
  }

  /*
   * if (probs.length == 1) { // ZeroR made the prediction return new
   * double[m_ClassAttribute.numValues()]; }
   */

  return probs;
}
 
Example 20
Source File: RuleStats.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Subset description length: <br>
 * S(t,k,p) = -k*log2(p)-(n-k)log2(1-p)
 *
 * Details see Quilan: "MDL and categorical theories (Continued)",ML95
 *
 * @param t the number of elements in a known set
 * @param k the number of elements in a subset
 * @param p the expected proportion of subset known by recipient
 * @return the subset description length
 */
public static double subsetDL(double t, double k, double p){
  double rt = Utils.gr(p, 0.0) ? (- k*Utils.log2(p)) : 0.0;
  rt -= (t-k)*Utils.log2(1-p);
  return rt;
}