Java Code Examples for weka.core.Instances#attributeToDoubleArray()

The following examples show how to use weka.core.Instances#attributeToDoubleArray() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ClusteringUtilities.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public static double randIndex(int[] predicted, Instances inst){
    double[] actual = inst.attributeToDoubleArray(inst.classIndex());

    double A = 0, B = 0, C = 0, D = 0;

    for (int i = 0; i < predicted.length; i++){
        for (int n = 0; n < actual.length; n++){
            if ((predicted[i] == predicted[n]) && (actual[i] == actual[n])){
                A++;
            }
            else if ((predicted[i] != predicted[n]) && (actual[i] != actual[n])){
                B++;
            }
            else if ((predicted[i] == predicted[n]) && (actual[i] != actual[n])){
                C++;
            }
            else{
                D++;
            }
        }
    }

    return (A + B)/(A + B + C + D);
}
 
Example 2
Source File: ThresholdCurve.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Gets the index of the instance with the closest threshold value to the
 * desired target
 *
 * @param tcurve a set of instances that have been generated by this class
 * @param threshold the target threshold
 * @return the index of the instance that has threshold closest to
 * the target, or -1 if this could not be found (i.e. no data, or
 * bad threshold target)
 */
public static int getThresholdInstance(Instances tcurve, double threshold) {

  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (tcurve.numInstances() == 0)
      || (threshold < 0)
      || (threshold > 1.0)) {
    return -1;
  }
  if (tcurve.numInstances() == 1) {
    return 0;
  }
  double [] tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1);
  int [] sorted = Utils.sort(tvals);
  return binarySearch(sorted, tvals, threshold);
}
 
Example 3
Source File: MultiLinearRegression.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
    //creating the 2class version of the insts
    numericClassInsts = new Instances(data);
    numericClassInsts.setClassIndex(0); //temporary
    numericClassInsts.deleteAttributeAt(numericClassInsts.numAttributes()-1);
    Attribute newClassAtt = new Attribute("newClassVal"); //numeric class
    numericClassInsts.insertAttributeAt(newClassAtt, numericClassInsts.numAttributes());
    numericClassInsts.setClassIndex(numericClassInsts.numAttributes()-1); //temporary

    //and building the regressors
    regressors = new LinearRegression[data.numClasses()];
    double[] trueClassVals = data.attributeToDoubleArray(data.classIndex());
    for (int c = 0; c < data.numClasses(); c++) {

        for (int i = 0; i < numericClassInsts.numInstances(); i++) {
            //if this inst is of the class we're currently handling (c), set new class val to 1 else 0
            double cval = trueClassVals[i] == c ? 1 : 0; 
            numericClassInsts.instance(i).setClassValue(cval);
        }    

        regressors[c] = new LinearRegression();
        regressors[c].buildClassifier(numericClassInsts);
    }
}
 
Example 4
Source File: MultiResponseModelTrees.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
    //creating the 2class version of the insts
    numericClassInsts = new Instances(data);
    numericClassInsts.setClassIndex(0); //temporary
    numericClassInsts.deleteAttributeAt(numericClassInsts.numAttributes()-1);
    Attribute newClassAtt = new Attribute("newClassVal"); //numeric class
    numericClassInsts.insertAttributeAt(newClassAtt, numericClassInsts.numAttributes());
    numericClassInsts.setClassIndex(numericClassInsts.numAttributes()-1); //temporary

    //and building the regressors
    regressors = new M5P[data.numClasses()];
    double[] trueClassVals = data.attributeToDoubleArray(data.classIndex());
    for (int c = 0; c < data.numClasses(); c++) {

        for (int i = 0; i < numericClassInsts.numInstances(); i++) {
            //if this inst is of the class we're currently handling (c), set new class val to 1 else 0
            double cval = trueClassVals[i] == c ? 1 : 0; 
            numericClassInsts.instance(i).setClassValue(cval);
        }    

        regressors[c] = new M5P();
        regressors[c].buildClassifier(numericClassInsts);
    }
}
 
Example 5
Source File: Reciprocal.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public Instances transform(Instances data){
//Not ideal, should call a method to get this
        int responsePos=data.numAttributes()-1;
        double[] response=data.attributeToDoubleArray(responsePos);
//Find the min value
        double min=response[0];
        for(int i=0;i<response.length;i++)
        {
                if(response[i]<min)
                        min=response[i];
        }
        if(min<=zeroOffset)	//Cant take a log of a negative, so offset
        {
                offSet=-min+zeroOffset;
        }
        else
                offSet=0;
        System.out.println(" Min value = "+min+" offset = "+offSet);

        for(int i=0;i<data.numInstances();i++)
        {
            Instance t = data.instance(i);
            double resp=t.value(responsePos);
            System.out.print(i+" "+resp);
            resp=1/(resp+offSet);
            System.out.println(" "+resp);
            t.setValue(responsePos,resp);
        }
        return data;
    }
 
Example 6
Source File: MatrixUtils.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Helper method that transforma an Instances object to a Matrix object.
 *
 * @param inst The Instances to transform.
 * @return  The resulting Matrix object.
 */
public static Matrix instancesToMatrix(Instances inst){
	double[][] darr = new double[inst.numInstances()][inst.numAttributes()];
	for (int i =0 ; i < inst.numAttributes(); i++) {
		for (int j = 0; j < inst.attributeToDoubleArray(i).length; j++) {
			darr[j][i] = inst.attributeToDoubleArray(i)[j];
		}
	}
	return new Matrix(darr);
}
 
Example 7
Source File: LinearModel.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public  double[] formTestPredictions(Instances testData)
	{
//Form X matrix from testData
		int rows=testData.numInstances();
		int cols=testData.numAttributes();	//includes the constant term
		predicted=new double[rows];
		if(cols!=m)
		{
			System.out.println("Error: Mismatch in attribute lengths in form test Train ="+m+" Test ="+cols);
			System.exit(0);
		}
		double[][] xt = new double[cols][rows];
		for(int i=0;i<rows;i++)
			xt[0][i]=1;
		for(int i=1;i<cols;i++)
			xt[i]=testData.attributeToDoubleArray(i-1);
		Matrix testX=new Matrix(xt);
		testX=testX.transpose();
		
		for(int i=0;i<rows;i++)
		{
			//Find predicted
			predicted[i]=paras[0];
			for(int j=1;j<paras.length;j++)
				predicted[i]+=paras[j]*testX.get(i,j);
		}
		return predicted;
	
	}
 
Example 8
Source File: LinearModel.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public LinearModel(Instances data)
	{
//Form X and Y from Instances		
		n=data.numInstances();
		m=data.numAttributes();	//includes the constant term
		y = data.attributeToDoubleArray(data.classIndex());
		Y=new Matrix(y,y.length);
		double[][] xt = new double[m][n];
		for(int i=0;i<n;i++)
			xt[0][i]=1;
		for(int i=1;i<m;i++)
			xt[i]=data.attributeToDoubleArray(i-1);
		Xt=new Matrix(xt);
		X=Xt.transpose();
	}
 
Example 9
Source File: YeoJohnson.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override 
public Instances transform(Instances data)
{
           System.out.println(" Doesnt do anything! ");
	int responsePos=data.numAttributes()-1;
	double[] response=data.attributeToDoubleArray(responsePos);
	double[] preds=new double[response.length];	
	double v;
	Instance inst;
	return data;
}
 
Example 10
Source File: YeoJohnson.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
static public double findBestTransform(Instances data, int pos, double[] power)
	{
		int responsePos=data.classIndex();
		double[] temp=data.attributeToDoubleArray(responsePos);
		double[] response=new double[temp.length];
                System.arraycopy(temp, 0, response, 0, temp.length);
		double[] predictions=new double[response.length];	
		double v;
		Instance inst;
		LinearModel lm;
		double bestLambda=MIN,minError=Double.MAX_VALUE,error;
		double correlation;
		for(double lambda=MIN;lambda<=MAX;lambda+=INTERVAL)
		{
//Transform response				
			transformResponse(data,lambda,response);
			lm=new LinearModel(data);
			lm.fitModel();
			lm.formTrainPredictions();
			lm.findTrainStatistics();
			
//Use the K-S stat for this		
			error=ResidualTests.kolmogorovSmirnoff(lm.stdResidual);
			correlation=ResidualTests.testHeteroscadisity(lm.y,lm.predicted);
			if(error<minError)
			{
				bestLambda=lambda;
				minError=error;
			}
//			System.out.println(" Lambda ="+lambda+" KS Stat = "+error+" Correlation = "+correlation);
		}
		power[pos]=bestLambda;
		return minError;
	}
 
Example 11
Source File: YeoJohnson.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
static public Instances invertResponse(Instances data, double lambda){
	Instance inst;
	int responsePos=data.classIndex();
	double[] response=data.attributeToDoubleArray(responsePos);
	double v;
	for(int i=0;i<response.length;i++)
	{
		inst=data.instance(i);

		
		if(response[i]<0)
		{
			if(lambda!=2)
				v=-(Math.pow((1-response[i]),2-lambda)-1)/(2-lambda);
			else
				v=-Math.log(1-response[i]);
		}
		else
		{
			if(lambda==0)
				v=Math.log(1+response[i]);
			else
				v=(Math.pow(response[i]+1,lambda)-1)/lambda;
		}
		inst.setValue(responsePos,v);
	}
	
	return data;
}
 
Example 12
Source File: YeoJohnson.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Instances invert(Instances data){
	Instance inst;
	int responsePos=data.numAttributes()-1;
	double[] response=data.attributeToDoubleArray(responsePos);
	double v;
	double[] newVals=invert(bestLambda,response);
	
	for(int i=0;i<data.numInstances();i++)
	{
		inst=data.instance(i);
		inst.setValue(responsePos,newVals[i]);
	}
	return data;
}
 
Example 13
Source File: ThresholdCurve.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Calculates the area under the ROC curve as the Wilcoxon-Mann-Whitney statistic.
  *
  * @param tcurve a previously extracted threshold curve Instances.
  * @return the ROC area, or Double.NaN if you don't pass in 
  * a ThresholdCurve generated Instances. 
  */
 public static double getROCArea(Instances tcurve) {

   final int n = tcurve.numInstances();
   if (!RELATION_NAME.equals(tcurve.relationName()) 
       || (n == 0)) {
     return Double.NaN;
   }
   final int tpInd = tcurve.attribute(TRUE_POS_NAME).index();
   final int fpInd = tcurve.attribute(FALSE_POS_NAME).index();
   final double [] tpVals = tcurve.attributeToDoubleArray(tpInd);
   final double [] fpVals = tcurve.attributeToDoubleArray(fpInd);

   double area = 0.0, cumNeg = 0.0;
   final double totalPos = tpVals[0];
   final double totalNeg = fpVals[0];
   for (int i = 0; i < n; i++) {
double cip, cin;
if (i < n - 1) {
    cip = tpVals[i] - tpVals[i + 1];
    cin = fpVals[i] - fpVals[i + 1];
} else {
    cip = tpVals[n - 1];
    cin = fpVals[n - 1];
}
area += cip * (cumNeg + (0.5 * cin));
cumNeg += cin;
   }
   area /= (totalNeg * totalPos);

   return area;
 }
 
Example 14
Source File: ThresholdCurve.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Calculates the area under the precision-recall curve (AUPRC).
 *
 * @param tcurve a previously extracted threshold curve Instances.
 * @return the PRC area, or Double.NaN if you don't pass in 
 * a ThresholdCurve generated Instances. 
 */
public static double getPRCArea(Instances tcurve) {
  final int n = tcurve.numInstances();
  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (n == 0)) {
    return Double.NaN;
  }
  
  final int pInd = tcurve.attribute(PRECISION_NAME).index();
  final int rInd = tcurve.attribute(RECALL_NAME).index();
  final double [] pVals = tcurve.attributeToDoubleArray(pInd);
  final double [] rVals = tcurve.attributeToDoubleArray(rInd);
  
  double area = 0;
  double xlast = rVals[n - 1];
  
  // start from the first real p/r pair (not the artificial zero point)
  for (int i = n - 2; i >= 0; i--) {
    double recallDelta = rVals[i] - xlast;
    area += (pVals[i] * recallDelta);
    
    xlast = rVals[i];
  }
  
  if (area == 0) {
    return Utils.missingValue();
  }
  return area;
}
 
Example 15
Source File: InterquartileRange.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * computes the thresholds for outliers and extreme values
  * 
  * @param instances	the data to work on
  */
 protected void computeThresholds(Instances instances) {
   int		i;
   double[]	values;
   int[]	sortedIndices;
   int		half;
   int		quarter;
   double	q1;
   double	q2;
   double	q3;
   
   m_UpperExtremeValue = new double[m_AttributeIndices.length];
   m_UpperOutlier      = new double[m_AttributeIndices.length];
   m_LowerOutlier      = new double[m_AttributeIndices.length];
   m_LowerExtremeValue = new double[m_AttributeIndices.length];
   m_Median            = new double[m_AttributeIndices.length];
   m_IQR               = new double[m_AttributeIndices.length];
   
   for (i = 0; i < m_AttributeIndices.length; i++) {
     // non-numeric attribute?
     if (m_AttributeIndices[i] == NON_NUMERIC)
continue;
     
     // sort attribute data
     values        = instances.attributeToDoubleArray(m_AttributeIndices[i]);
     sortedIndices = Utils.sort(values);
     
     // determine indices
     half    = sortedIndices.length / 2;
     quarter = half / 2;
     
     if (sortedIndices.length % 2 == 1) {
q2 = values[sortedIndices[half]];
     }
     else {
q2 = (values[sortedIndices[half]] + values[sortedIndices[half + 1]]) / 2;
     }
     
     if (half % 2 == 1) {
q1 = values[sortedIndices[quarter]];
q3 = values[sortedIndices[sortedIndices.length - quarter - 1]];
     }
     else {
q1 = (values[sortedIndices[quarter]] + values[sortedIndices[quarter + 1]]) / 2;
q3 = (values[sortedIndices[sortedIndices.length - quarter - 1]] + values[sortedIndices[sortedIndices.length - quarter]]) / 2;
     }
     
     // determine thresholds and other values
     m_Median[i]            = q2;
     m_IQR[i]               = q3 - q1;
     m_UpperExtremeValue[i] = q3 + getExtremeValuesFactor() * m_IQR[i];
     m_UpperOutlier[i]      = q3 + getOutlierFactor()       * m_IQR[i];
     m_LowerOutlier[i]      = q1 - getOutlierFactor()       * m_IQR[i];
     m_LowerExtremeValue[i] = q1 - getExtremeValuesFactor() * m_IQR[i];
   }
 }
 
Example 16
Source File: BoxTidwell.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static Instances transformRegressor(Instances data, int pos,int resultPos, double[] powers)
	{

//1. Get values of the attribute of interest. 
		
//Confusingly, am working with attributes in rows not columns		
		double[] temp=data.attributeToDoubleArray(pos);
		double[] originalData= new double[temp.length];
		double[] logData= new double[temp.length];
		
		for(int i=0;i<temp.length;i++)
		{
			originalData[i]=temp[i];
			logData[i]=Math.log(temp[i]);	
		}
		double[] y =data.attributeToDoubleArray(data.classIndex()); 
//		I'm not sure if this is a memory copy or a reference copy, so be safe
		double[][] transposeFirst = new double[data.numAttributes()][data.numInstances()];
		double[][] transposeSecond = new double[data.numAttributes()+1][data.numInstances()];
		for(int j=0;j<data.numInstances();j++)
		{
			transposeFirst[0][j]=transposeSecond[0][j]=1;
		}
		for(int i=1;i<data.numAttributes();i++)
		{
			transposeFirst[i]=transposeSecond[i]=data.attributeToDoubleArray(i-1);
		}
//		Add one to pos cos of the ones
		pos=pos+1;
//		Second has an attribute at the end of data for transform
		int workingPos=data.numAttributes();
		LinearModel l1,l2;
		double alpha=1, b1,b2;
		double min=0.1;
		boolean finished=false;
		int count=0;
		final int MaxIterations=10;
		//		Initialise alpha to 1
//Find Base SSE		
		//While not termination condition
		while(!finished)
		{
//			System.out.println(" Iteration = "+(count+1)+" alpha = "+alpha);
			//Create new attributes
			//1. Calculate x^alpha
			for(int j=0;j<originalData.length;j++)
			{
				transposeSecond[pos][j]=transposeFirst[pos][j]=Math.pow(originalData[j],alpha);
			}

			//2. Fit y=b1+ .. b_pos	x^alpha (+ other terms)-> get b_pos
			l1=new LinearModel(transposeFirst,y);	
			l1.fitModel();
			
//Not necessary: 
//			l1.formTrainPredictions();
//			l1.findTrainStatistics();
//			System.out.println(l1+"\nVariance for L1 = "+l1.variance);
			
			b1=l1.paras[pos];
			//3. Fit y=b*1+ .. b*_pos	x^alpha +b*_workingPos x^alpha*log(x) (+ other terms)-> get b*2
			//2. Calculate x^alpha*log(x)
			for(int j=0;j<originalData.length;j++)
				transposeSecond[workingPos][j]=transposeFirst[pos][j]*logData[j];
			l2=new LinearModel(transposeSecond,y);	
			l2.fitModel();
			
//			Not necessary: 
//			l2.formTrainPredictions();
//			l2.findTrainStatistics();
//			System.out.println(l2+"\nVariance for L2 = "+l2.variance);
			
			b2=l2.paras[workingPos];
			
			alpha+=b2/b1;
			//Work out change term alpha = b*2/b1+alpha0
//			System.out.println("New Alpha ="+alpha+" b1 = "+b1+" b2 = "+b2);
			//Update termination criteria: stop if small change: check notes
			count++;
			if(Math.abs(b2/b1)<min || count>=MaxIterations)
				finished=true;
			else if(Math.abs(alpha)>10)
			{
				alpha=1;
				finished=true;
			}
		}
//Fix original 
		powers[resultPos]=alpha;
		pos=pos-1;
		Instance inst;
		for(int i=0;i<data.numInstances();i++)
		{
			inst=data.instance(i);
			inst.setValue(pos,Math.pow(originalData[i],alpha));
		}
		return data;
	}
 
Example 17
Source File: ThresholdCurve.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Calculates the n point precision result, which is the precision averaged
 * over n evenly spaced (w.r.t recall) samples of the curve.
 *
 * @param tcurve a previously extracted threshold curve Instances.
 * @param n the number of points to average over.
 * @return the n-point precision.
 */
public static double getNPointPrecision(Instances tcurve, int n) {

  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (tcurve.numInstances() == 0)) {
    return Double.NaN;
  }
  int recallInd = tcurve.attribute(RECALL_NAME).index();
  int precisInd = tcurve.attribute(PRECISION_NAME).index();
  double [] recallVals = tcurve.attributeToDoubleArray(recallInd);
  int [] sorted = Utils.sort(recallVals);
  double isize = 1.0 / (n - 1);
  double psum = 0;
  for (int i = 0; i < n; i++) {
    int pos = binarySearch(sorted, recallVals, i * isize);
    double recall = recallVals[sorted[pos]];
    double precis = tcurve.instance(sorted[pos]).value(precisInd);
    /*
    System.err.println("Point " + (i + 1) + ": i=" + pos 
                       + " r=" + (i * isize)
                       + " p'=" + precis 
                       + " r'=" + recall);
    */
    // interpolate figures for non-endpoints
    while ((pos != 0) && (pos < sorted.length - 1)) {
      pos++;
      double recall2 = recallVals[sorted[pos]];
      if (recall2 != recall) {
        double precis2 = tcurve.instance(sorted[pos]).value(precisInd);
        double slope = (precis2 - precis) / (recall2 - recall);
        double offset = precis - recall * slope;
        precis = isize * i * slope + offset;
        /*
        System.err.println("Point2 " + (i + 1) + ": i=" + pos 
                           + " r=" + (i * isize)
                           + " p'=" + precis2 
                           + " r'=" + recall2
                           + " p''=" + precis);
        */
        break;
      }
    }
    psum += precis;
  }
  return psum / n;
}
 
Example 18
Source File: C45PruneableClassifierTreeG.java    From tsml with GNU General Public License v3.0 3 votes vote down vote up
/**
 * sorts the int array in ascending order by attribute indexed 
 * by a in dataset data.  
 * @param the data the indices represent
 * @param the index of the attribute to sort by
 * @return array of sorted indicies
 */
private int [] sortByAttribute(Instances data, int a) {

  double [] attList = data.attributeToDoubleArray(a);
  int [] temp = Utils.sort(attList);
  return temp;
}
 
Example 19
Source File: Ex01_Datahandling.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
public static void main(String[] args) throws Exception {
    
    // We'll be loading the ItalyPowerDemand dataset which is distributed with this codebase
    String basePath = "src/main/java/experiments/data/tsc/";
    String dataset = "ItalyPowerDemand";
    int seed = 1;
    
    Instances train;
    Instances test;
    Instances[] trainTest;
    
    
    
    
    ///////////// Loading method 1: loading individual files
    // DatasetLoading.loadData...(...)
    // For loading in a single arff without performing any kind of sampling. Class value is 
    // assumed to be the last attribute
    
    train = DatasetLoading.loadDataThrowable(basePath + dataset + "/" + dataset + "_TRAIN.arff");
    test = DatasetLoading.loadDataThrowable(basePath + dataset + "/" + dataset + "_TEST.arff");
    
    // We could then resample these, while maintaining train/test distributions, using this
    
    trainTest = InstanceTools.resampleTrainAndTestInstances(train, test, 1);
    train = trainTest[0];
    test = trainTest[1];
    
    
    
    
    
    
    ///////////// Loading method 2: sampling directly
    // DatasetLoading.sampleDataset(...)
    // Wraps the data loading and sampling performed above. Read in a dataset either
    // from a single complete file (e.g. uci data) or a predefined split (e.g. ucr/tsc data) 
    // and resamples it according to the seed given. If the resampled fold can already 
    // be found in the read location ({dsetname}{foldid}_TRAIN and _TEST) then it will
    // load those. See the sampleDataset(...) javadoc
    
    trainTest = DatasetLoading.sampleDataset(basePath, dataset, seed);
    train = trainTest[0];
    test = trainTest[1];
    
    
    
    
    
    
    ///////////// Loading method 3: sampling the built in dataset
    // DatasetLoading.sampleDataset(...)
    // Because ItalyPowerDemand is distributed with the codebase, there's a wrapper 
    // to sample it directly for quick testing 
    
    trainTest = DatasetLoading.sampleItalyPowerDemand(seed);
    train = trainTest[0];
    test = trainTest[1];
    
    
    
    
    
    
    //////////// Data inspection and handling:
    // We can look at the basic meta info
    
    System.out.println("train.relationName() = " + train.relationName());
    System.out.println("train.numInstances() = " + train.numInstances());
    System.out.println("train.numAttributes() = " + train.numAttributes());
    System.out.println("train.numClasses() = " + train.numClasses());
    
    // And the individual instances
    
    for (Instance inst : train)
        System.out.print(inst.classValue() + ", ");
    System.out.println("");
    
    
    
    
    
    
    
    
    
    // Often for speed we just want the data in a primitive array
    // We can go to and from them using this sort of procedure
    
    // Lets keeps the class labels separate in this example
    double[] classLabels = train.attributeToDoubleArray(train.classIndex()); // aka y_train
    
    boolean removeLastVal = true;
    double[][] data = InstanceTools.fromWekaInstancesArray(train, removeLastVal); // aka X_train
    
    // We can then do whatever fast array-optimised stuff, and shove it back into an instances object
    Instances reformedTrain = InstanceTools.toWekaInstances(data, classLabels);
}