Java Code Examples for weka.core.Utils#minIndex()

The following examples show how to use weka.core.Utils#minIndex() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: sIB.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Cluster a given instance, this is the method defined in Clusterer
 * interface do nothing but just return the cluster assigned to it
 */
public int clusterInstance(Instance instance) throws Exception {
  double prior = (double) 1 / input.sumVals;
  double[] distances = new double[m_numCluster]; 
  for(int i = 0; i < m_numCluster; i++){
    double Pnew = bestT.Pt[i] + prior;
    double pi1 = prior / Pnew;
    double pi2 = bestT.Pt[i] / Pnew;
    distances[i] = Pnew * JS(instance, i, pi1, pi2);
  }
  return Utils.minIndex(distances);
}
 
Example 2
Source File: sIB.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Cluster an instance into the nearest cluster. 
 * @param instIdx Index of the instance to be clustered
 * @param input Object which describe the statistics of the training dataset
 * @param T Partition
 * @return index of the cluster that has the minimum distance to the instance
 */
private int clusterInstance(int instIdx, Input input, Partition T) {
  double[] distances = new double[m_numCluster];
  for (int i = 0; i < m_numCluster; i++) {
    double Pnew = input.Px[instIdx] + T.Pt[i];
    double pi1 = input.Px[instIdx] / Pnew;
    double pi2 = T.Pt[i] / Pnew;
    distances[i] = Pnew * JS(instIdx, input, T, i, pi1, pi2);
  }
  return Utils.minIndex(distances);    
}
 
Example 3
Source File: CostSensitiveClassifier.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Returns class probabilities. When minimum expected cost approach is chosen,
  * returns probability one for class with the minimum expected misclassification
  * cost. Otherwise it returns the probability distribution returned by
  * the base classifier.
  *
  * @param instance the instance to be classified
  * @return the computed distribution for the given instance
  * @throws Exception if instance could not be classified
  * successfully */
 public double[] distributionForInstance(Instance instance) throws Exception {

   if (!m_MinimizeExpectedCost) {
     return m_Classifier.distributionForInstance(instance);
   }
   double [] pred = m_Classifier.distributionForInstance(instance);
   double [] costs = m_CostMatrix.expectedCosts(pred, instance);
   /*
   for (int i = 0; i < pred.length; i++) {
     System.out.print(pred[i] + " ");
   }
   System.out.println();
   for (int i = 0; i < costs.length; i++) {
     System.out.print(costs[i] + " ");
   }
   System.out.println("\n");
   */

   // This is probably not ideal
   int classIndex = Utils.minIndex(costs);
   for (int i = 0; i  < pred.length; i++) {
     if (i == classIndex) {
pred[i] = 1.0;
     } else {
pred[i] = 0.0;
     }
   }
   return pred; 
 }
 
Example 4
Source File: SVMAttributeEval.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Get SVM-ranked attribute indexes (best to worst) selected for
 * the class attribute indexed by classInd (one-vs-all).
 */
private int[] rankBySVM(int classInd, Instances data) {
  // Holds a mapping into the original array of attribute indices
  int[] origIndices = new int[data.numAttributes()];
  for (int i = 0; i < origIndices.length; i++)
    origIndices[i] = i;
  
  // Count down of number of attributes remaining
  int numAttrLeft = data.numAttributes()-1;
  // Ranked attribute indices for this class, one vs.all (highest->lowest)
  int[] attRanks = new int[numAttrLeft];

  try {
    MakeIndicator filter = new MakeIndicator();
    filter.setAttributeIndex("" + (data.classIndex() + 1));
    filter.setNumeric(false);
    filter.setValueIndex(classInd);
    filter.setInputFormat(data);
    Instances trainCopy = Filter.useFilter(data, filter);
    double pctToElim = ((double) m_percentToEliminate) / 100.0;
    while (numAttrLeft > 0) {
      int numToElim;
      if (pctToElim > 0) {
        numToElim = (int) (trainCopy.numAttributes() * pctToElim);
        numToElim = (numToElim > 1) ? numToElim : 1;
        if (numAttrLeft - numToElim <= m_percentThreshold) {
          pctToElim = 0;
          numToElim = numAttrLeft - m_percentThreshold;
        }
      } else {
        numToElim = (numAttrLeft >= m_numToEliminate) ? m_numToEliminate : numAttrLeft;
      }
      
      // Build the linear SVM with default parameters
      SMO smo = new SMO();
                              
      // SMO seems to get stuck if data not normalised when few attributes remain
      // smo.setNormalizeData(numAttrLeft < 40);
      smo.setFilterType(new SelectedTag(m_smoFilterType, SMO.TAGS_FILTER));
      smo.setEpsilon(m_smoPParameter);
      smo.setToleranceParameter(m_smoTParameter);
      smo.setC(m_smoCParameter);
      smo.buildClassifier(trainCopy);
                              
      // Find the attribute with maximum weight^2
      double[] weightsSparse = smo.sparseWeights()[0][1];
      int[] indicesSparse = smo.sparseIndices()[0][1];
      double[] weights = new double[trainCopy.numAttributes()];
      for (int j = 0; j < weightsSparse.length; j++) {
        weights[indicesSparse[j]] = weightsSparse[j] * weightsSparse[j];
      }
      weights[trainCopy.classIndex()] = Double.MAX_VALUE;
      int minWeightIndex;
      int[] featArray = new int[numToElim];
      boolean[] eliminated = new boolean[origIndices.length];
      for (int j = 0; j < numToElim; j++) {
        minWeightIndex = Utils.minIndex(weights);
        attRanks[--numAttrLeft] = origIndices[minWeightIndex];
        featArray[j] = minWeightIndex;
        eliminated[minWeightIndex] = true;
        weights[minWeightIndex] = Double.MAX_VALUE;
      }
                              
      // Delete the worst attributes. 
      weka.filters.unsupervised.attribute.Remove delTransform =
        new weka.filters.unsupervised.attribute.Remove();
      delTransform.setInvertSelection(false);
      delTransform.setAttributeIndicesArray(featArray);
      delTransform.setInputFormat(trainCopy);
      trainCopy = Filter.useFilter(trainCopy, delTransform);
                              
      // Update the array of remaining attribute indices
      int[] temp = new int[origIndices.length - numToElim];
      int k = 0;
      for (int j = 0; j < origIndices.length; j++) {
        if (!eliminated[j]) {
          temp[k++] = origIndices[j];
        }
      }
      origIndices = temp;
    }                 
    // Carefully handle all exceptions
  } catch (Exception e) {
    e.printStackTrace();                      
  }
  return attRanks;
}
 
Example 5
Source File: MINND.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Pre-process the given exemplar according to the other exemplars 
 * in the given exemplars.  It also updates noise data statistics.
 *
 * @param data the whole exemplars
 * @param pos the position of given exemplar in data
 * @return the processed exemplar
 * @throws Exception if the returned exemplar is wrong 
 */
public Instance preprocess(Instances data, int pos)
  throws Exception{
  Instance before = data.instance(pos);
  if((int)before.classValue() == 0){
    m_NoiseM[pos] = null;
    m_NoiseV[pos] = null;
    return before;
  }

  Instances after_relationInsts =before.attribute(1).relation().stringFreeStructure();
  Instances noises_relationInsts =before.attribute(1).relation().stringFreeStructure();

  Instances newData = m_Attributes;
  Instance after = new DenseInstance(before.numAttributes());
  Instance noises =  new DenseInstance(before.numAttributes());
  after.setDataset(newData);
  noises.setDataset(newData);

  for(int g=0; g < before.relationalValue(1).numInstances(); g++){
    Instance datum = before.relationalValue(1).instance(g);
    double[] dists = new double[data.numInstances()];

    for(int i=0; i < data.numInstances(); i++){
      if(i != pos)
        dists[i] = distance(datum, m_Mean[i], m_Variance[i], i);
      else
        dists[i] = Double.POSITIVE_INFINITY;
    }		   

    int[] pred = new int[m_NumClasses];
    for(int n=0; n < pred.length; n++)
      pred[n] = 0;

    for(int o=0; o<m_Select; o++){
      int index = Utils.minIndex(dists);
      pred[(int)m_Class[index]]++;
      dists[index] = Double.POSITIVE_INFINITY;
    }

    int clas = Utils.maxIndex(pred);
    if((int)before.classValue() != clas)
      noises_relationInsts.add(datum);
    else
      after_relationInsts.add(datum);		
  }

  int relationValue;
  relationValue = noises.attribute(1).addRelation( noises_relationInsts);
  noises.setValue(0,before.value(0));
  noises.setValue(1, relationValue);
  noises.setValue(2, before.classValue());

  relationValue = after.attribute(1).addRelation( after_relationInsts);
  after.setValue(0,before.value(0));
  after.setValue(1, relationValue);
  after.setValue(2, before.classValue());


  if(Utils.gr(noises.relationalValue(1).sumOfWeights(), 0)){	
    for (int i=0; i<m_Dimension; i++) {
      m_NoiseM[pos][i] = noises.relationalValue(1).meanOrMode(i);
      m_NoiseV[pos][i] = noises.relationalValue(1).variance(i);
      if(Utils.eq(m_NoiseV[pos][i],0.0))
        m_NoiseV[pos][i] = m_ZERO;
    }
    /* for(int y=0; y < m_NoiseV[pos].length; y++){
       if(Utils.eq(m_NoiseV[pos][y],0.0))
       m_NoiseV[pos][y] = m_ZERO;
       } */	
  }
  else{
    m_NoiseM[pos] = null;
    m_NoiseV[pos] = null;
  }

  return after;
}
 
Example 6
Source File: MINND.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Use Kullback Leibler distance to find the nearest neighbours of
 * the given exemplar.
 * It also uses K-Nearest Neighbour algorithm to classify the 
 * test exemplar
 *
 * @param ex the given test exemplar
 * @return the classification 
 * @throws Exception if the exemplar could not be classified
 * successfully
 */
public double classifyInstance(Instance ex)throws Exception{

  ex = scale(ex);

  double[] var = new double [m_Dimension];
  for (int i=0; i<m_Dimension; i++) 
    var[i]= ex.relationalValue(1).variance(i);	

  // The Kullback distance to all exemplars
  double[] kullback = new double[m_Class.length];

  // The first K nearest neighbours' predictions */
double[] predict = new double[m_NumClasses];
for(int h=0; h < predict.length; h++)
  predict[h] = 0;
ex = cleanse(ex);

if(ex.relationalValue(1).numInstances() == 0){
  if (getDebug())
    System.out.println("???Whole exemplar falls into ambiguous area!");
  return 1.0;                          // Bias towards positive class
}

double[] mean = new double[m_Dimension];	
for (int i=0; i<m_Dimension; i++)
  mean [i]=ex.relationalValue(1).meanOrMode(i);

// Avoid zero sigma
for(int h=0; h < var.length; h++){
  if(Utils.eq(var[h],0.0))
    var[h] = m_ZERO;
}	

for(int i=0; i < m_Class.length; i++){
  if(m_ValidM[i] != null)
    kullback[i] = kullback(mean, m_ValidM[i], var, m_Variance[i], i);
  else
    kullback[i] = Double.POSITIVE_INFINITY;
}

for(int j=0; j < m_Neighbour; j++){
  int pos = Utils.minIndex(kullback);
  predict[(int)m_Class[pos]] += m_Weights[pos];	   
  kullback[pos] = Double.POSITIVE_INFINITY;
}	

if (getDebug())
  System.out.println("???There are still some unambiguous instances in this exemplar! Predicted as: "+Utils.maxIndex(predict));
return (double)Utils.maxIndex(predict);	
}
 
Example 7
Source File: MINND.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Cleanse the given exemplar according to the valid and noise data
 * statistics
 *
 * @param before the given exemplar
 * @return the processed exemplar
 * @throws Exception if the returned exemplar is wrong 
 */
public Instance cleanse(Instance before) throws Exception{

  Instances insts = before.relationalValue(1).stringFreeStructure();
  Instance after = new DenseInstance (before.numAttributes());
  after.setDataset(m_Attributes);

  for(int g=0; g < before.relationalValue(1).numInstances(); g++){
    Instance datum = before.relationalValue(1).instance(g);
    double[] minNoiDists = new double[m_Choose];
    double[] minValDists = new double[m_Choose];
    int noiseCount = 0, validCount = 0;
    double[] nDist = new double[m_Mean.length]; 
    double[] vDist = new double[m_Mean.length]; 

    for(int h=0; h < m_Mean.length; h++){
      if(m_ValidM[h] == null)
        vDist[h] = Double.POSITIVE_INFINITY;
      else
        vDist[h] = distance(datum, m_ValidM[h], m_ValidV[h], h);

      if(m_NoiseM[h] == null)
        nDist[h] = Double.POSITIVE_INFINITY;
      else
        nDist[h] = distance(datum, m_NoiseM[h], m_NoiseV[h], h);
    }

    for(int k=0; k < m_Choose; k++){
      int pos = Utils.minIndex(vDist);
      minValDists[k] = vDist[pos];
      vDist[pos] = Double.POSITIVE_INFINITY;
      pos = Utils.minIndex(nDist);
      minNoiDists[k] = nDist[pos];
      nDist[pos] = Double.POSITIVE_INFINITY;
    }

    int x = 0,y = 0;
    while((x+y) < m_Choose){
      if(minValDists[x] <= minNoiDists[y]){
        validCount++;
        x++;
      }
      else{
        noiseCount++;
        y++;
      }
    }
    if(x >= y)
      insts.add (datum);

  }

  after.setValue(0, before.value( 0));
  after.setValue(1, after.attribute(1).addRelation(insts));
  after.setValue(2, before.value( 2));

  return after;
}