Java Code Examples for weka.core.Utils#sort()

The following examples show how to use weka.core.Utils#sort() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ThresholdCurve.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Gets the index of the instance with the closest threshold value to the
 * desired target
 *
 * @param tcurve a set of instances that have been generated by this class
 * @param threshold the target threshold
 * @return the index of the instance that has threshold closest to
 * the target, or -1 if this could not be found (i.e. no data, or
 * bad threshold target)
 */
public static int getThresholdInstance(Instances tcurve, double threshold) {

  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (tcurve.numInstances() == 0)
      || (threshold < 0)
      || (threshold > 1.0)) {
    return -1;
  }
  if (tcurve.numInstances() == 1) {
    return 0;
  }
  double [] tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1);
  int [] sorted = Utils.sort(tvals);
  return binarySearch(sorted, tvals, threshold);
}
 
Example 2
Source File: MarginCurve.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Calculates the cumulative margin distribution for the set of
 * predictions, returning the result as a set of Instances. The
 * structure of these Instances is as follows:<p> <ul> 
 * <li> <b>Margin</b> contains the margin value (which should be plotted
 * as an x-coordinate) 
 * <li> <b>Current</b> contains the count of instances with the current 
 * margin (plot as y axis)
 * <li> <b>Cumulative</b> contains the count of instances with margin
 * less than or equal to the current margin (plot as y axis)
 * </ul> <p>
 *
 * @return datapoints as a set of instances, null if no predictions
 * have been made.  
 */
public Instances getCurve(FastVector predictions) {

  if (predictions.size() == 0) {
    return null;
  }

  Instances insts = makeHeader();
  double [] margins = getMargins(predictions);
  int [] sorted = Utils.sort(margins);
  int binMargin = 0;
  int totalMargin = 0;
  insts.add(makeInstance(-1, binMargin, totalMargin));
  for (int i = 0; i < sorted.length; i++) {
    double current = margins[sorted[i]];
    double weight = ((NominalPrediction)predictions.elementAt(sorted[i]))
      .weight();
    totalMargin += weight;
    binMargin += weight;
    if (true) {
      insts.add(makeInstance(current, binMargin, totalMargin));
      binMargin = 0;
    }
  }
  return insts;
}
 
Example 3
Source File: EvaluationStatisticsUtils.java    From meka with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Rank Matrix
 */
public static int[][] rankMatrix(List<EvaluationStatistics> stats, String measurement) {

	double V[][] = valueMatrix(stats,measurement);


	int N = V.length;
	int k = V[0].length;

	int R[][] = new int[N][k];
	for (int i = 0; i < N; i++) {
		int indices[] = Utils.sort(V[i]);
		// add 1 to each
		for (int j = 0; j < k; j++) {
			R[i][indices[j]] = (j+1);
		}
	}


	return R;
}
 
Example 4
Source File: LFSMethods.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * @return ranking (integer array) of attributes in data with evaluator (sorting is NOT stable!)
 */
public int[] rankAttributes(Instances data, SubsetEvaluator evaluator,
                            boolean verbose) throws Exception {
  if (verbose) {
    System.out.println("Ranking attributes with " +
                       evaluator.getClass().getName());
  }

  double[] merit = new double[data.numAttributes()];
  BitSet group = new BitSet(data.numAttributes());

  for (int k = 0; k < data.numAttributes(); k++) {
    if (k != data.classIndex()) {
      group.set(k);
      merit[k] -= evaluator.evaluateSubset(group);
      m_evalsTotal++;
      group.clear(k);
    } else {
      merit[k] = Double.MAX_VALUE;
    }

    if (verbose) {
      System.out.println(k + ": " + merit[k]);
    }
  }

  int[] ranking = Utils.sort(merit);

  if (verbose) {
    System.out.print("Ranking [ ");

    for (int i = 0; i < ranking.length; i++) {
      System.out.print(ranking[i] + " ");
    }

    System.out.println("]\n");
  }

  return ranking;
}
 
Example 5
Source File: NominalToBinary.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** Computes average class values for each attribute and value */
 private void computeAverageClassValues() {

   double totalCounts, sum;
   Instance instance;
   double [] counts;

   double [][] avgClassValues = new double[getInputFormat().numAttributes()][0];
   m_Indices = new int[getInputFormat().numAttributes()][0];
   for (int j = 0; j < getInputFormat().numAttributes(); j++) {
     Attribute att = getInputFormat().attribute(j);
     if (att.isNominal()) {
avgClassValues[j] = new double [att.numValues()];
counts = new double [att.numValues()];
for (int i = 0; i < getInputFormat().numInstances(); i++) {
  instance = getInputFormat().instance(i);
  if (!instance.classIsMissing() && 
      (!instance.isMissing(j))) {
    counts[(int)instance.value(j)] += instance.weight();
    avgClassValues[j][(int)instance.value(j)] += 
      instance.weight() * instance.classValue();
  }
}
sum = Utils.sum(avgClassValues[j]);
totalCounts = Utils.sum(counts);
if (Utils.gr(totalCounts, 0)) {
  for (int k = 0; k < att.numValues(); k++) {
    if (Utils.gr(counts[k], 0)) {
      avgClassValues[j][k] /= (double)counts[k];
    } else {
      avgClassValues[j][k] = sum / (double)totalCounts;
    }
  }
}
m_Indices[j] = Utils.sort(avgClassValues[j]);
     }
   }
 }
 
Example 6
Source File: Metrics.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
public static double L_RankLoss(int y[], double rpred[]) {
// works with missing

       double[][] aligned = align(y, rpred);

       y = toIntArray(aligned[0]);
       rpred = aligned[1];

       int r[] = Utils.sort(rpred);
       return L_RankLoss(y, r);
   }
 
Example 7
Source File: Ranker.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Sorts the evaluated attribute list
 *
 * @return an array of sorted (highest eval to lowest) attribute indexes
 * @throws Exception of sorting can't be done.
 */
public double[][] rankedAttributes ()
  throws Exception {
  int i, j;

  if (m_attributeList == null || m_attributeMerit == null) {
    throw  new Exception("Search must be performed before a ranked " 
                         + "attribute list can be obtained");
  }

  int[] ranked = Utils.sort(m_attributeMerit);
  // reverse the order of the ranked indexes
  double[][] bestToWorst = new double[ranked.length][2];

  for (i = ranked.length - 1, j = 0; i >= 0; i--) {
    bestToWorst[j++][0] = ranked[i];
  }

  // convert the indexes to attribute indexes
  for (i = 0; i < bestToWorst.length; i++) {
    int temp = ((int)bestToWorst[i][0]);
    bestToWorst[i][0] = m_attributeList[temp];
    bestToWorst[i][1] = m_attributeMerit[temp];
  }
  
  if (m_numToSelect > bestToWorst.length) {
    throw new Exception("More attributes requested than exist in the data");
  }

  if (m_numToSelect <= 0) {
    if (m_threshold == -Double.MAX_VALUE) {
      m_calculatedNumToSelect = bestToWorst.length;
    } else {
      determineNumToSelectFromThreshold(bestToWorst);
    }
  }
  /*    if (m_numToSelect > 0) {
    determineThreshFromNumToSelect(bestToWorst);
    } */

  return  bestToWorst;
}
 
Example 8
Source File: PrincipalComponents.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Set the format for the transformed data
 * @return a set of empty Instances (header only) in the new format
 * @throws Exception if the output format can't be set
 */
private Instances setOutputFormat() throws Exception {
  if (m_eigenvalues == null) {
    return null;
  }

  double cumulative = 0.0;
  FastVector attributes = new FastVector();
   for (int i = m_numAttribs - 1; i >= 0; i--) {
     StringBuffer attName = new StringBuffer();
     // build array of coefficients
     double[] coeff_mags = new double[m_numAttribs];
     for (int j = 0; j < m_numAttribs; j++)
       coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);
     int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;
     // this array contains the sorted indices of the coefficients
     int[] coeff_inds;
     if (m_numAttribs > 0) {
        // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude
        coeff_inds = Utils.sort(coeff_mags);
     } else {
        // if  m_maxAttrsInName <= 0, use all coeffs in original order
        coeff_inds = new int[m_numAttribs];
        for (int j=0; j<m_numAttribs; j++)
          coeff_inds[j] = j;
     }
     // build final attName string
     for (int j = 0; j < num_attrs; j++) {
       double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];
       if (j > 0 && coeff_value >= 0)
         attName.append("+");
       attName.append(Utils.doubleToString(coeff_value,5,3)
                      +m_trainInstances.attribute(coeff_inds[j]).name());
     }
     if (num_attrs < m_numAttribs)
       attName.append("...");
       
     attributes.addElement(new Attribute(attName.toString()));
     cumulative+=m_eigenvalues[m_sortedEigens[i]];

     if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
       break;
     }
   }
   
   if (m_hasClass) {
     attributes.addElement(m_trainHeader.classAttribute().copy());
   }

   Instances outputFormat = 
     new Instances(m_trainInstances.relationName()+"_principal components",
                   attributes, 0);

   // set the class to be the last attribute if necessary
   if (m_hasClass) {
     outputFormat.setClassIndex(outputFormat.numAttributes()-1);
   }
   
   m_outputNumAtts = outputFormat.numAttributes();
   return outputFormat;
}
 
Example 9
Source File: ThresholdCurve.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Calculates the n point precision result, which is the precision averaged
 * over n evenly spaced (w.r.t recall) samples of the curve.
 *
 * @param tcurve a previously extracted threshold curve Instances.
 * @param n the number of points to average over.
 * @return the n-point precision.
 */
public static double getNPointPrecision(Instances tcurve, int n) {

  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (tcurve.numInstances() == 0)) {
    return Double.NaN;
  }
  int recallInd = tcurve.attribute(RECALL_NAME).index();
  int precisInd = tcurve.attribute(PRECISION_NAME).index();
  double [] recallVals = tcurve.attributeToDoubleArray(recallInd);
  int [] sorted = Utils.sort(recallVals);
  double isize = 1.0 / (n - 1);
  double psum = 0;
  for (int i = 0; i < n; i++) {
    int pos = binarySearch(sorted, recallVals, i * isize);
    double recall = recallVals[sorted[pos]];
    double precis = tcurve.instance(sorted[pos]).value(precisInd);
    /*
    System.err.println("Point " + (i + 1) + ": i=" + pos 
                       + " r=" + (i * isize)
                       + " p'=" + precis 
                       + " r'=" + recall);
    */
    // interpolate figures for non-endpoints
    while ((pos != 0) && (pos < sorted.length - 1)) {
      pos++;
      double recall2 = recallVals[sorted[pos]];
      if (recall2 != recall) {
        double precis2 = tcurve.instance(sorted[pos]).value(precisInd);
        double slope = (precis2 - precis) / (recall2 - recall);
        double offset = precis - recall * slope;
        precis = isize * i * slope + offset;
        /*
        System.err.println("Point2 " + (i + 1) + ": i=" + pos 
                           + " r=" + (i * isize)
                           + " p'=" + precis2 
                           + " r'=" + recall2
                           + " p''=" + precis);
        */
        break;
      }
    }
    psum += precis;
  }
  return psum / n;
}
 
Example 10
Source File: InterquartileRange.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * computes the thresholds for outliers and extreme values
  * 
  * @param instances	the data to work on
  */
 protected void computeThresholds(Instances instances) {
   int		i;
   double[]	values;
   int[]	sortedIndices;
   int		half;
   int		quarter;
   double	q1;
   double	q2;
   double	q3;
   
   m_UpperExtremeValue = new double[m_AttributeIndices.length];
   m_UpperOutlier      = new double[m_AttributeIndices.length];
   m_LowerOutlier      = new double[m_AttributeIndices.length];
   m_LowerExtremeValue = new double[m_AttributeIndices.length];
   m_Median            = new double[m_AttributeIndices.length];
   m_IQR               = new double[m_AttributeIndices.length];
   
   for (i = 0; i < m_AttributeIndices.length; i++) {
     // non-numeric attribute?
     if (m_AttributeIndices[i] == NON_NUMERIC)
continue;
     
     // sort attribute data
     values        = instances.attributeToDoubleArray(m_AttributeIndices[i]);
     sortedIndices = Utils.sort(values);
     
     // determine indices
     half    = sortedIndices.length / 2;
     quarter = half / 2;
     
     if (sortedIndices.length % 2 == 1) {
q2 = values[sortedIndices[half]];
     }
     else {
q2 = (values[sortedIndices[half]] + values[sortedIndices[half + 1]]) / 2;
     }
     
     if (half % 2 == 1) {
q1 = values[sortedIndices[quarter]];
q3 = values[sortedIndices[sortedIndices.length - quarter - 1]];
     }
     else {
q1 = (values[sortedIndices[quarter]] + values[sortedIndices[quarter + 1]]) / 2;
q3 = (values[sortedIndices[sortedIndices.length - quarter - 1]] + values[sortedIndices[sortedIndices.length - quarter]]) / 2;
     }
     
     // determine thresholds and other values
     m_Median[i]            = q2;
     m_IQR[i]               = q3 - q1;
     m_UpperExtremeValue[i] = q3 + getExtremeValuesFactor() * m_IQR[i];
     m_UpperOutlier[i]      = q3 + getOutlierFactor()       * m_IQR[i];
     m_LowerOutlier[i]      = q1 - getOutlierFactor()       * m_IQR[i];
     m_LowerExtremeValue[i] = q1 - getExtremeValuesFactor() * m_IQR[i];
   }
 }
 
Example 11
Source File: UnivariateEqualFrequencyHistogramEstimator.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Returns the interval for the given confidence value. 
 * 
 * @param conf the confidence value in the interval [0, 1]
 * @return the interval
 */
public double[][] predictIntervals(double conf) {

  // Update the bandwidth
  updateBoundariesAndOrWeights();

  // Compute minimum and maximum value, and delta
  double val = Statistics.normalInverse(1.0 - (1.0 - conf) / 2);
  double min = m_TM.firstKey() - val * m_Width;
  double max = m_TM.lastKey() + val * m_Width;
  double delta = (max - min) / m_NumIntervals;

  // Create array with estimated probabilities
  double[] probabilities = new double[m_NumIntervals];
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    probabilities[i] = 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }

  // Sort array based on area of bin estimates
  int[] sortedIndices = Utils.sort(probabilities);

  // Mark the intervals to use
  double sum = 0;
  boolean[] toUse = new boolean[probabilities.length];
  int k = 0;
  while ((sum < conf) && (k < toUse.length)){
    toUse[sortedIndices[toUse.length - (k + 1)]] = true;
    sum += probabilities[sortedIndices[toUse.length - (k + 1)]];
    k++;
  }

  // Don't need probabilities anymore
  probabilities = null;

  // Create final list of intervals
  ArrayList<double[]> intervals = new ArrayList<double[]>();

  // The current interval
  double[] interval = null;
  
  // Iterate through kernels
  boolean haveStartedInterval = false;
  for (int i = 0; i < m_NumIntervals; i++) {

    // Should the current bin be used?
    if (toUse[i]) {

      // Do we need to create a new interval?
      if (haveStartedInterval == false) {
        haveStartedInterval = true;
        interval = new double[2];
        interval[0] = min + i * delta;
      }

      // Regardless, we should update the upper boundary
      interval[1] = min + (i + 1) * delta;
    } else {

      // We need to finalize and store the last interval
      // if necessary.
      if (haveStartedInterval) {
        haveStartedInterval = false;
        intervals.add(interval);
      }
    }
  }

  // Add last interval if there is one
  if (haveStartedInterval) {
    intervals.add(interval);
  }

  return intervals.toArray(new double[0][0]);
}
 
Example 12
Source File: UnivariateKernelEstimator.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Returns the interval for the given confidence value. 
 * 
 * @param conf the confidence value in the interval [0, 1]
 * @return the interval
 */
public double[][] predictIntervals(double conf) {

  // Update the bandwidth
  updateWidth();

  // Compute minimum and maximum value, and delta
  double val = Statistics.normalInverse(1.0 - (1.0 - conf) / 2);
  double min = m_TM.firstKey() - val * m_Width;
  double max = m_TM.lastKey() + val * m_Width;
  double delta = (max - min) / m_NumIntervals;

  // Create array with estimated probabilities
  double[] probabilities = new double[m_NumIntervals];
  double leftVal = Math.exp(logDensity(min));
  for (int i = 0; i < m_NumIntervals; i++) {
    double rightVal = Math.exp(logDensity(min + (i + 1) * delta));
    probabilities[i] = 0.5 * (leftVal + rightVal) * delta;
    leftVal = rightVal;
  }

  // Sort array based on area of bin estimates
  int[] sortedIndices = Utils.sort(probabilities);

  // Mark the intervals to use
  double sum = 0;
  boolean[] toUse = new boolean[probabilities.length];
  int k = 0;
  while ((sum < conf) && (k < toUse.length)){
    toUse[sortedIndices[toUse.length - (k + 1)]] = true;
    sum += probabilities[sortedIndices[toUse.length - (k + 1)]];
    k++;
  }

  // Don't need probabilities anymore
  probabilities = null;

  // Create final list of intervals
  ArrayList<double[]> intervals = new ArrayList<double[]>();

  // The current interval
  double[] interval = null;
  
  // Iterate through kernels
  boolean haveStartedInterval = false;
  for (int i = 0; i < m_NumIntervals; i++) {

    // Should the current bin be used?
    if (toUse[i]) {

      // Do we need to create a new interval?
      if (haveStartedInterval == false) {
        haveStartedInterval = true;
        interval = new double[2];
        interval[0] = min + i * delta;
      }

      // Regardless, we should update the upper boundary
      interval[1] = min + (i + 1) * delta;
    } else {

      // We need to finalize and store the last interval
      // if necessary.
      if (haveStartedInterval) {
        haveStartedInterval = false;
        intervals.add(interval);
      }
    }
  }

  // Add last interval if there is one
  if (haveStartedInterval) {
    intervals.add(interval);
  }

  return intervals.toArray(new double[0][0]);
}
 
Example 13
Source File: PMCC.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances D) throws Exception {

	m_R = new Random(m_S);

	// Variables

	int L = D.classIndex(); 
	int N = D.numInstances();
	int d = D.numAttributes()-L;

	h = new CC[m_M];
	w = new double[m_M];
	//int s[][] = new int[m_M][L]; // for interest's sake

	if (m_Is >= m_M) {

		//HashMap<String,CC> id2cc = new HashMap<String,CC>();

		// Make CC
		int s[] = MLUtils.gen_indices(L); 
		MLUtils.randomize(s,m_R);
		h[0] = buildCC(Arrays.copyOf(s,s.length),D); // @todo move into setChain(..)
		w[0] = payoff(h[0],D);
		//id2cc.put(Arrays.toString(s),h[0]);			// save a copy
		//s[0] = s_;
		if(getDebug()) System.out.println("s[0] = "+Arrays.toString(s));

		for(int t = 0; t < m_Is; t++) {

			// propose a chain s' ~ pi(s'|s) 
			int s_[] = (m_O > 0) ? 
				  pi(Arrays.copyOf(s,s.length),m_R,t,m_Beta)	  :	// default cond. option - with temperature
				  A.swap(Arrays.copyOf(s,s.length),m_R) ;	        // special simple option - swap two elements

			// build h' with sequence s'
			//CC h_ = rebuildCC(getClosest(id2cc,Arrays.toString(s_)),s_,D);
			CC h_ = buildCC(Arrays.copyOf(s_,s_.length),D);
			//id2cc.put(Arrays.toString(s_), h_);

			// rate h' (by its performance on the training data)
			double w_ = payoff(h_,D);

			// accept h' weighted more than the weakest h in the population
			int min = Utils.sort(w)[0]; // (min index)
			if (w_ > w[min]) {
				w[min] = w_;
				h[min] = h_;
				if (getDebug()) System.out.println(" accepted h_ with score "+w_+" > "+w[min]);
				s = s_;
			}
			else
				if (getDebug()) System.out.println(" DENIED h_ with score "+w_+" !> score "+w[min]);
		}
		if (getDebug()) System.out.println("---");

		// normalise weights
		Utils.normalize(w);
	}
	else {
		throw new Exception("[Error] Number of chains evaluated (Is) should be at least as great as the population selected (M), and always greater than 0.");
	}

}
 
Example 14
Source File: A.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
public static final int[] sort(int a[]) {
	int c[] = Arrays.copyOf(a,a.length);
	Utils.sort(c); // @todo: Arrays.sort ?
	return c;
}
 
Example 15
Source File: MetricsTest.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
public void testUtilSort(){
    int[] real = {0,1,0,1,0,1};
    double[] pred = {0.4,
                     0.1,
                     0.5,
                     0.2,
                     0.6,
                     0.3};

    int[] sorted = Utils.sort(pred);

    assertTrue(Arrays.toString(sorted),
               Arrays.equals(new int[]{1,3,5,0,2,4},sorted));

    
    
    // for(int i =0; i < sorted.length; sorted++){
    //     assertEquals(new int[]{3,0,4,1,5,2},sorted);
    // }
    
    
}
 
Example 16
Source File: C45PruneableClassifierTreeG.java    From tsml with GNU General Public License v3.0 3 votes vote down vote up
/**
 * sorts the int array in ascending order by attribute indexed 
 * by a in dataset data.  
 * @param the data the indices represent
 * @param the index of the attribute to sort by
 * @return array of sorted indicies
 */
private int [] sortByAttribute(Instances data, int a) {

  double [] attList = data.attributeToDoubleArray(a);
  int [] temp = Utils.sort(attList);
  return temp;
}