Java Code Examples for weka.core.Instances#deleteAttributeAt()

The following examples show how to use weka.core.Instances#deleteAttributeAt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MultiResponseModelTrees.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
    //creating the 2class version of the insts
    numericClassInsts = new Instances(data);
    numericClassInsts.setClassIndex(0); //temporary
    numericClassInsts.deleteAttributeAt(numericClassInsts.numAttributes()-1);
    Attribute newClassAtt = new Attribute("newClassVal"); //numeric class
    numericClassInsts.insertAttributeAt(newClassAtt, numericClassInsts.numAttributes());
    numericClassInsts.setClassIndex(numericClassInsts.numAttributes()-1); //temporary

    //and building the regressors
    regressors = new M5P[data.numClasses()];
    double[] trueClassVals = data.attributeToDoubleArray(data.classIndex());
    for (int c = 0; c < data.numClasses(); c++) {

        for (int i = 0; i < numericClassInsts.numInstances(); i++) {
            //if this inst is of the class we're currently handling (c), set new class val to 1 else 0
            double cval = trueClassVals[i] == c ? 1 : 0; 
            numericClassInsts.instance(i).setClassValue(cval);
        }    

        regressors[c] = new M5P();
        regressors[c].buildClassifier(numericClassInsts);
    }
}
 
Example 2
Source File: ShapeletTransformClassifierLegacy.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
@Override
    public double[] distributionForInstance(Instance ins) throws Exception{
        shapeletData.add(ins);

        Instances temp  = transform.transform(shapeletData);
//Delete redundant
        for(int del:redundantFeatures)
            temp.deleteAttributeAt(del);
/*         if(performPCA){
             temp=pca.transform(temp);
         }
*/
        Instance test  = temp.get(0);
        shapeletData.remove(0);

        return classifier.distributionForInstance(test);
    }
 
Example 3
Source File: RISE.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
private Instances filterData(Instances result) throws Exception{
            int maxLag=(result.numAttributes()-1)/4;
            if(maxLag>ACF.DEFAULT_MAXLAG)
                maxLag=ACF.DEFAULT_MAXLAG;
            Instances[] t=new Instances[filters.length];
            for(int j=0;j<filters.length;j++){
// Im not sure this a sensible or robust way of doing this
//What if L meant something else to the SimpleFilter?
//Can you use a whole string, e.g. MAXLAG?
                filters[j].setOptions(new String[]{"L",maxLag+""});
                filters[j].setInputFormat(result);
                t[j]=Filter.useFilter(result, filters[j]);
            }
            //4. Merge them all together
            Instances combo=new Instances(t[0]);
            for(int j=1;j<filters.length;j++){
                if( j < filters.length){
                    combo.setClassIndex(-1);
                    combo.deleteAttributeAt(combo.numAttributes()-1);
                }
                combo=Instances.mergeInstances(combo, t[j]);
            }
            combo.setClassIndex(combo.numAttributes()-1);
            return combo;
    }
 
Example 4
Source File: MIWrapper.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Builds the classifier
 *
 * @param data the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances train = new Instances(data);
  train.deleteWithMissingClass();
  
  if (m_Classifier == null) {
    throw new Exception("A base classifier has not been specified!");
  }

  if (getDebug())
    System.out.println("Start training ...");
  m_NumClasses = train.numClasses();

  //convert the training dataset into single-instance dataset
  m_ConvertToProp.setWeightMethod(getWeightMethod());
  m_ConvertToProp.setInputFormat(train);
  train = Filter.useFilter(train, m_ConvertToProp);
  train.deleteAttributeAt(0); // remove the bag index attribute

  m_Classifier.buildClassifier(train);
}
 
Example 5
Source File: Tools.java    From gsn with GNU General Public License v3.0 6 votes vote down vote up
/**
* pre-process the data be normalizing and removing unused attributes
* @param i
* @return
*/
public static Instances prepareInstances(Instances i){
	
	//select features to use
	i.setClassIndex(9);
	i.deleteAttributeAt(8);
	i.deleteAttributeAt(7);
	i.deleteAttributeAt(6);
	i.deleteAttributeAt(2);
	i.deleteAttributeAt(1);
	
	//scale the values
	for(int k=0;k<i.numInstances();k++){
		Instance j = i.instance(k);
		j.setValue(0, j.value(0)/1400.0);
		j.setValue(2, j.value(2)/50);
		j.setValue(3, j.value(3)/100.0);
		j.setValue(4, j.value(4)/100.0 - 4);			
	}
	
	return i;
}
 
Example 6
Source File: MultiLinearRegression.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
    //creating the 2class version of the insts
    numericClassInsts = new Instances(data);
    numericClassInsts.setClassIndex(0); //temporary
    numericClassInsts.deleteAttributeAt(numericClassInsts.numAttributes()-1);
    Attribute newClassAtt = new Attribute("newClassVal"); //numeric class
    numericClassInsts.insertAttributeAt(newClassAtt, numericClassInsts.numAttributes());
    numericClassInsts.setClassIndex(numericClassInsts.numAttributes()-1); //temporary

    //and building the regressors
    regressors = new LinearRegression[data.numClasses()];
    double[] trueClassVals = data.attributeToDoubleArray(data.classIndex());
    for (int c = 0; c < data.numClasses(); c++) {

        for (int i = 0; i < numericClassInsts.numInstances(); i++) {
            //if this inst is of the class we're currently handling (c), set new class val to 1 else 0
            double cval = trueClassVals[i] == c ? 1 : 0; 
            numericClassInsts.instance(i).setClassValue(cval);
        }    

        regressors[c] = new LinearRegression();
        regressors[c].buildClassifier(numericClassInsts);
    }
}
 
Example 7
Source File: MISVM.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Computes the distribution for a given exemplar
 *
 * @param exmp the exemplar for which distribution is computed
 * @return the distribution
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance exmp)
  throws Exception {

  double sum=0;
  double classValue;
  double[] distribution = new double[2];

  Instances testData = new Instances(exmp.dataset(), 0);
  testData.add(exmp);

  // convert the training dataset into single-instance dataset
  testData = Filter.useFilter(testData, m_ConvertToProp);	
  testData.deleteAttributeAt(0); //remove the bagIndex attribute	

  if (m_Filter != null)	
    testData = Filter.useFilter(testData, m_Filter); 

  for(int j = 0; j < testData.numInstances(); j++){
    Instance inst = testData.instance(j);
    double output = m_SVM.output(-1, inst); 
    if (output <= 0)
      classValue = 0.0;
    else
      classValue = 1.0;
    sum += classValue;
  }
  if (sum == 0)
    distribution[0] = 1.0;
  else 
    distribution[0] = 0.0;
  distribution [1] = 1.0 - distribution[0];

  return distribution;
}
 
Example 8
Source File: PCA.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Instances transform(Instances data) {

    Instances newData= null;
    try {
        newData = pca.transformedData(data);
        while(newData.numAttributes()-1>numAttributesToKeep)
            newData.deleteAttributeAt(newData.numAttributes()-2);
    } catch (Exception e) {
        throw new RuntimeException(" Error in Transformers/PCA when performing the PCA transform: "+e);
    }
    return newData;
}
 
Example 9
Source File: CCUtils.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
 * LinkTransform - prepare 'D' for training at a node 'j' of the chain, by excluding 'exl'.
 * @param	D		dataset
 * @param	j		index of the label of this node
 * @param	exl		indices of labels which are NOT parents of j
 * @return	the transformed dataset (which can be used as a template)
 */
public static Instances linkTransform(Instances D, int j, int exl[]) {
	Instances D_j = new Instances(D);
	D_j.setClassIndex(-1); 
	// delete all the attributes (and track where our index ends up)
	int ndx = j;
	for(int i = exl.length-1; i >= 0; i--) {
		D_j.deleteAttributeAt(exl[i]);
		if (exl[i] < ndx)
			ndx--; 
	}
	D_j.setClassIndex(ndx); 
	return D_j;
}
 
Example 10
Source File: BestConf.java    From bestconf with Apache License 2.0 5 votes vote down vote up
public static ArrayList<String> preprocessInstances(Instances retval){
	double[][] cMatrix;
	ArrayList<String> result = new ArrayList<String>();
	ArrayList<String> deleteAttNames = new ArrayList<String>();
	PrincipalComponents pc = new PrincipalComponents();
	HashMap<Integer, ArrayList<Integer>> filter = new HashMap<Integer, ArrayList<Integer>>();
	try {
		pc.buildEvaluator(retval);
		cMatrix = pc.getCorrelationMatrix();		
		for(int i = 0; i < cMatrix.length; i++){
			ArrayList<Integer> record = new ArrayList<Integer>();
			for(int j = i + 1; j < cMatrix.length; j++)
				if(cMatrix[i][j] >= correlationFactorThreshold || cMatrix[i][j] <= -correlationFactorThreshold){
					record.add(j);
				}
			if(record.size() != 0){
				filter.put(i, record);
			}
		}
		Iterator<Map.Entry<Integer, ArrayList<Integer>>> iter = filter.entrySet().iterator();
		while (iter.hasNext()) {
			Map.Entry<Integer, ArrayList<Integer>> entry = iter.next();
			ArrayList<Integer> arr = entry.getValue();
			for(int i = 0; i < arr.size(); i++)
				if(arr.get(i) != cMatrix.length - 1 && !deleteAttNames.contains(retval.attribute(arr.get(i)).name())){
					deleteAttNames.add(retval.attribute(arr.get(i)).name());
				}
			if(arr.contains(cMatrix.length-1)){
				result.add(retval.attribute(Integer.parseInt(entry.getKey().toString())).name());
			}
		}
		for(int i = 0; i < deleteAttNames.size(); i++){
			retval.deleteAttributeAt(retval.attribute(deleteAttNames.get(i)).index());
		}
	} catch (Exception e) {
		e.printStackTrace();
	}
	return result;
}
 
Example 11
Source File: FTtree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** Removes extended attributes in current dataset or instance 
 *
 * @exception Exception if something goes wrong
 */
protected Instances removeExtAttributes(Instances  data) throws Exception{
  
  for (int i=0; i< data.classAttribute().numValues(); i++)
    {
      data.deleteAttributeAt(0);
    }
  return data;
}
 
Example 12
Source File: DatasetLoading.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * If the dataset loaded has a first attribute whose name _contains_ the string "experimentsSplitAttribute".toLowerCase()
 * then it will be assumed that we want to perform a leave out one X cross validation. Instances are sampled such that fold N is comprised of
 * a test set with all instances with first-attribute equal to the Nth unique value in a sorted list of first-attributes. The train
 * set would be all other instances. The first attribute would then be removed from all instances, so that they are not given
 * to the classifier to potentially learn from. It is up to the user to ensure the the foldID requested is within the range of possible
 * values 1 to numUniqueFirstAttValues
 *
 * @return new Instances[] { trainSet, testSet };
 */
public static Instances[] splitDatasetByFirstAttribute(Instances all, int foldId) {
    TreeMap<Double, Integer> splitVariables = new TreeMap<>();
    for (int i = 0; i < all.numInstances(); i++) {
        //even if it's a string attribute, this val corresponds to the index into the array of possible strings for this att
        double key= all.instance(i).value(0);
        Integer val = splitVariables.get(key);
        if (val == null)
            val = 0;
        splitVariables.put(key, ++val);
    }

    //find the split attribute value to keep for testing this fold
    double idToReserveForTestSet = -1;
    int testSize = -1;
    int c = 0;
    for (Map.Entry<Double, Integer> splitVariable : splitVariables.entrySet()) {
        if (c++ == foldId) {
            idToReserveForTestSet = splitVariable.getKey();
            testSize = splitVariable.getValue();
        }
    }

    //make the split
    Instances train = new Instances(all, all.size() - testSize);
    Instances test  = new Instances(all, testSize);
    for (int i = 0; i < all.numInstances(); i++)
        if (all.instance(i).value(0) == idToReserveForTestSet)
            test.add(all.instance(i));
    train.addAll(all);

    //delete the split attribute
    train.deleteAttributeAt(0);
    test.deleteAttributeAt(0);

    return new Instances[] { train, test };
}
 
Example 13
Source File: MLUtils.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
 * ReplaceZasAttributes - data Z[][] will be the new attributes in D.
 * @param	D 	dataset (of N instances)
 * @param	Z	attribute space (of N rows, H columns)
 * @param	L	number of classes / labels.
 */
public static Instances replaceZasAttributes(Instances D, double Z[][], int L) {
	D.setClassIndex(0);
	int m = D.numAttributes()-L;
	for(int j = 0; j < m; j++) {
		D.deleteAttributeAt(L);
	}
	return addZtoD(D, Z, L);
}
 
Example 14
Source File: ACF.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
* Truncates all cases to having n attributes, i.e. removes from numAtts()-n to numAtts()-1  
* @param d
* @param n 
*/    
   public void truncate(Instances d, int n){
       int att=n;
       while(att<d.numAttributes()){
           if(att==d.classIndex())
               att++;
           else
               d.deleteAttributeAt(att);
       }
   }
 
Example 15
Source File: CCp.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
public Link(int chain[], int j, Instances train) throws Exception {
	this.j = j;

	this.index = chain[j];

	// sort out excludes [4|5,1,0,2,3]
	this.excld = Arrays.copyOfRange(chain,j+1,chain.length); 
	// sort out excludes [0,1,2,3,5]
	Arrays.sort(this.excld); 

	this.classifier = (AbstractClassifier)AbstractClassifier.forName(getClassifier().getClass().getName(),((AbstractClassifier)getClassifier()).getOptions());

	Instances new_train = new Instances(train);

	// delete all except one (leaving a binary problem)
	if(getDebug()) System.out.print(" "+this.index);
	new_train.setClassIndex(-1); 
	// delete all the attributes (and track where our index ends up)
	int c_index = chain[j]; 
	for(int i = excld.length-1; i >= 0; i--) {
		new_train.deleteAttributeAt(excld[i]);
		if (excld[i] < this.index)
			c_index--; 
	}
	new_train.setClassIndex(c_index); 

	_template = new Instances(new_train,0);

	this.classifier.buildClassifier(new_train);
	new_train = null;

	if(j+1 < chain.length) 
		next = new meka.classifiers.multitarget.CCp.Link(chain, ++j, train);
}
 
Example 16
Source File: MultivariateShapeletTransformClassifier.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
    public double[] distributionForInstance(Instance ins) throws Exception{
        format.add(ins);
        
        Instances temp  = doTransform ? transform.process(format) : format;
//Delete redundant
        for(int del:redundantFeatures)
            temp.deleteAttributeAt(del);
        
        Instance test  = temp.get(0);
        format.remove(0);
        return ensemble.distributionForInstance(test);
    }
 
Example 17
Source File: PropositionalToMultiInstance.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Sets the format of the input instances.
 *
 * @param instanceInfo an Instances object containing the input 
 * instance structure (any instances contained in the object are 
 * ignored - only the structure is required).
 * @return true if the outputFormat may be collected immediately
 * @throws Exception if the input format can't be set 
 * successfully
 */
public boolean setInputFormat(Instances instanceInfo) 
  throws Exception {

  if (instanceInfo.attribute(0).type()!= Attribute.NOMINAL) {
    throw new Exception("The first attribute type of the original propositional instance dataset must be Nominal!");
  }
  super.setInputFormat(instanceInfo);

  /* create a new output format (multi-instance format) */
  Instances newData = instanceInfo.stringFreeStructure();
  Attribute attBagIndex = (Attribute) newData.attribute(0).copy();
  Attribute attClass = (Attribute) newData.classAttribute().copy();
  // remove the bagIndex attribute
  newData.deleteAttributeAt(0);
  // remove the class attribute
  newData.setClassIndex(-1);
  newData.deleteAttributeAt(newData.numAttributes() - 1);

  FastVector attInfo = new FastVector(3); 
  attInfo.addElement(attBagIndex);
  attInfo.addElement(new Attribute("bag", newData)); // relation-valued attribute
  attInfo.addElement(attClass);
  Instances data = new Instances("Multi-Instance-Dataset", attInfo, 0); 
  data.setClassIndex(data.numAttributes() - 1);

  super.setOutputFormat(data.stringFreeStructure());

  m_BagStringAtts = new StringLocator(data.attribute(1).relation());
  m_BagRelAtts    = new RelationalLocator(data.attribute(1).relation());
  
  return true;
}
 
Example 18
Source File: InstanceTools.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
     * Removes attributes deemed redundant. These are either
     * 1. All one value (i.e. constant)
     * 2. Some odd test to count the number different to the one before.
     * I think this is meant to count the number of different values?
     * It would be good to delete attributes that are identical to other attributes.
     * @param train instances from which to remove redundant attributes
     * @return array of indexes of attributes remove
     */
     //Returns the *shifted* indexes, so just deleting them should work
//Removes all constant attributes or attributes with just a single value
    public static int[] removeRedundantTrainAttributes(Instances train){
        int i=0;
        int minNumDifferent=2;
        boolean remove=false;
        LinkedList<Integer> list= new LinkedList<>();
        int count=0;
        while(i<train.numAttributes()-1){ //Dont test class
            remove=false;
// Test if constant
            int j=1;
            if(train.instance(j-1).value(i)==train.instance(j).value(i))
            while(j<train.numInstances() && train.instance(j-1).value(i)==train.instance(j).value(i))
                j++;
            if(j==train.numInstances())
                remove=true;
            else{
//Test pairwise similarity?
//I think this is meant to test how many different values there are. If so, it should be
//done with a HashSet of doubles. This counts how many values are identical to their predecessor
                count =0;
                for(j=1;j<train.numInstances();j++){
                    if(train.instance(j-1).value(i)==train.instance(j).value(i))
                        count++;
                }
                if(train.numInstances()-count<minNumDifferent+1)
                    remove=true;
            }
            if(remove){
    // Remove from data
                train.deleteAttributeAt(i);
                list.add(i);
            }else{
                i++;
            }
  //          count++;
        }
        int[] del=new int[list.size()];
        count=0;
        for(Integer in:list){
            del[count++]=in;
        }
        return del;
        
    }
 
Example 19
Source File: RT.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances D) throws Exception {
  	testCapabilities(D);
  	
	int L = D.classIndex();

	//Create header
	Instances D_ = new Instances(D,0,0);

	//Delete the old class attributes
	for (int j = 0; j < L; j++)
		D_.deleteAttributeAt(0); 

	//Make the new class attribute
	FastVector classes = new FastVector(L);
	for (int j = 0; j < L; j++)
		classes.addElement("C"+j);

	//Add the new class attribute
	D_.insertAttributeAt(new Attribute("ClassY",classes),0);
	D_.setClassIndex(0);

	//Loop through D again
	for (int i = 0; i < D.numInstances(); i++) {
		for (int j = 0; j < L; j++) {
			if((int)D.instance(i).value(j) > 0) {
				// make a copy here ...
				Instance x_ = (Instance)D.instance(i).copy();
				x_.setDataset(null);
				// make it multi-class, and set the appropriate class value ...
				for (int k = 1; k < L; k++)
					x_.deleteAttributeAt(1); 
				x_.setDataset(D_);
				x_.setClassValue(j); // (*) this just ponts to the right index
				D_.add(x_);
			}
		}
	}

	//Save the template
	m_InstancesTemplate = new Instances(D_,0);

	//Build
	if(getDebug())  System.out.println("Building classifier "+m_Classifier.getClass().getName()+" on "+D_.numInstances()+" instances (originally "+D.numInstances()+")");
	m_Classifier.buildClassifier(D_);

}
 
Example 20
Source File: SAX_1NN.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws Exception{
        
//        System.out.println(ClassifierTools.testUtils_getIPDAcc(new SAX_1NN(10, 4)));
//        System.out.println(ClassifierTools.testUtils_confirmIPDReproduction(new SAX_1NN(10, 4), 0.9154518950437318, "2019_09_26"));
        
        System.out.println("BagofPatternsTest\n\n");
        
        try {
            Instances all = DatasetLoading.loadDataNullable("C:\\\\Temp\\\\TESTDATA\\\\FiveClassV1.arff");
            all.deleteAttributeAt(0); //just name of bottle        
            
            Randomize rand = new Randomize();
            rand.setInputFormat(all);
            for (int i = 0; i < all.numInstances(); ++i) {
                rand.input(all.get(i));
            }
            rand.batchFinished();
            
            int trainNum = (int) (all.numInstances() * 0.7);
            int testNum = all.numInstances() - trainNum;
            
            Instances train = new Instances(all, trainNum);
            for (int i = 0; i < trainNum; ++i) 
                train.add(rand.output());
            
            Instances test = new Instances(all, testNum);
            for (int i = 0; i < testNum; ++i) 
                test.add(rand.output());
            
            SAX_1NN saxc = new SAX_1NN(6,3);
            saxc.buildClassifier(train);
            
            System.out.println(saxc.SAXdata);
            
            System.out.println("\nACCURACY TEST");
            System.out.println(ClassifierTools.accuracy(test, saxc));

        }
        catch (Exception e) {
            System.out.println(e);
            e.printStackTrace();
        }
        
    }