Java Code Examples for weka.core.Instances#classIndex()

The following examples show how to use weka.core.Instances#classIndex() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AddCluster.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * filters all attributes that should be ignored.
  * 
  * @param data	the data to filter
  * @return		the filtered data
  * @throws Exception	if filtering fails
  */
 protected Instances removeIgnored(Instances data) throws Exception {
   Instances result = data;
   
   if (m_IgnoreAttributesRange != null || data.classIndex() >= 0) {
     m_removeAttributes = new Remove();
     String rangeString = "";
     if (m_IgnoreAttributesRange != null) {
rangeString += m_IgnoreAttributesRange.getRanges();
     }
     if (data.classIndex() >= 0) {
if (rangeString.length() > 0) {
  rangeString += "," + (data.classIndex() + 1);
} else {
  rangeString = "" + (data.classIndex() + 1);
}
     }
     ((Remove) m_removeAttributes).setAttributeIndices(rangeString);
     ((Remove) m_removeAttributes).setInvertSelection(false);
     m_removeAttributes.setInputFormat(data);
     result = Filter.useFilter(data, m_removeAttributes);
   }
   
   return result;
 }
 
Example 2
Source File: NormalizeCase.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public void standard(Instances r) throws Exception{
	double mean,sum,sumSq,stdev,x,y;
	int size=r.numAttributes();
	int classIndex=r.classIndex();
	if(classIndex>0)
		size--;
	for(int i=0;i<r.numInstances();i++)
	{
		sum=sumSq=mean=stdev=0;
		for(int j=0;j<r.numAttributes();j++){
		if(j!=classIndex&& !r.attribute(j).isNominal()){// Ignore all nominal atts{
				x=r.instance(i).value(j);
				sum+=x;
			}
			mean=sum/size;
		}
		for(int j=0;j<r.numAttributes();j++){
			if(j!=classIndex&& !r.attribute(j).isNominal()){// Ignore all nominal atts{
				x=r.instance(i).value(j);
				r.instance(i).setValue(j,(x-mean));
			}
		}
	}
}
 
Example 3
Source File: AbstractVectorClusterer.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
protected void normaliseData(Instances data) throws Exception{
    if (data.classIndex() >= 0 && data.classIndex() != data.numAttributes()-1){
        throw new Exception("Class attribute is available and not the final attribute.");
    }

    attributeMeans = new double[data.numAttributes()-1];
    attributeStdDevs = new double[data.numAttributes()-1];

    for (int i = 0; i < data.numAttributes()-1; i++){
        attributeMeans[i] = data.attributeStats(i).numericStats.mean;
        attributeStdDevs[i] = data.attributeStats(i).numericStats
                .stdDev;

        for (int n = 0; n < data.size(); n++){
            Instance instance = data.get(n);
            instance.setValue(i, (instance.value(i) - attributeMeans[i])
                    /attributeStdDevs[i]);
        }
    }
}
 
Example 4
Source File: LabeledItemSet.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Splits the class attribute away. Depending on the invert flag, the
 * instances without class attribute or only the class attribute of all
 * instances is returned
 * 
 * @param instances the instances
 * @param invert flag; if true only the class attribute remains, otherweise
 *          the class attribute is the only attribute that is deleted.
 * @throws Exception exception if instances cannot be splitted
 * @return Instances without the class attribute or instances with only the
 *         class attribute
 */
public static Instances divide(Instances instances, boolean invert)
    throws Exception {

  Instances newInstances = new Instances(instances);
  if (instances.classIndex() < 0)
    throw new Exception(
        "For class association rule mining a class attribute has to be specified.");
  if (invert) {
    for (int i = 0; i < newInstances.numAttributes(); i++) {
      if (i != newInstances.classIndex()) {
        newInstances.deleteAttributeAt(i);
        i--;
      }
    }
    return newInstances;
  } else {
    newInstances.setClassIndex(-1);
    newInstances.deleteAttributeAt(instances.classIndex());
    return newInstances;
  }
}
 
Example 5
Source File: MLUtils.java    From meka with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Get K - get the number of values associated with each label L.
 * @param	D 	a dataset
 * @return	a vector of size L: K_1,...,K_L
 */
public int[] getK(Instances D) {
	int L = D.classIndex();
	HashSet counts[] = new HashSet[L];
	int K[] = new int[L];
	for(int j = 0; j < L; j++) {
		counts[j] = new HashSet<Integer>();
		for(Instance x : D) {
			int k = (int)x.value(j);
			counts[j].add(k);
		}
		K[j] = counts[j].size();
		/*
		   System.out.println(""+j+" = "+counts[j]);
		   if (counts[j].size() < 2) {
		   System.out.println("OK, this is a problem ...");
		//System.exit(1);
		   }
		   */
	}
	return K;
}
 
Example 6
Source File: StatUtils.java    From meka with GNU General Public License v3.0 6 votes vote down vote up
/**
 * GetApproxP - A fast version of getC(D), based on frequent sets.
 * Actually, if we don't prune, this is not even approximate -- it is the real empirical P.
 */
public static int[][] getApproxC(Instances D) {
	int N = D.numInstances();
	int L = D.classIndex();
	int C[][] = new int[L][L];
	// @todo, can prune here to make even faster by pruning this.
	HashMap<LabelSet,Integer> map = MLUtils.countCombinationsSparse(D,L);

	for (LabelSet y : map.keySet()) {
		int c = map.get(y);
		for(int j = 0; j < y.indices.length; j++) {
			int j_ = y.indices[j];
			C[j_][j_] += c;
			for(int k = j+1; k < y.indices.length; k++) {
				int k_ = y.indices[k];
				C[j_][k_] += c;
			}
		}
	}

	return C;
}
 
Example 7
Source File: MLUtils.java    From meka with GNU General Public License v3.0 6 votes vote down vote up
/** 
 * LabelCardinalities - return the frequency of each label of dataset D.
 */
public static final double[] labelCardinalities(Instances D) {
	int L = D.classIndex();
	double lc[] = new double[L];
	for(int j = 0; j < L; j++) {
	    int count = 0;
		for(int i = 0; i < D.numInstances(); i++) {
		    //if for missing valueses
		    if(!D.instance(i).isMissing(j)){
			lc[j] += D.instance(i).value(j);
			count ++;
		    }
		}
		lc[j] /= count; //D.numInstances();
	}
	return lc;
}
 
Example 8
Source File: YeoJohnson.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
static public void transformResponse(Instances data, double lambda, double[] response)
{
	Instance inst;
	int responsePos=data.classIndex();
	double[] newData=transform(lambda,response);
	for(int i=0;i<response.length;i++)
	{
		inst=data.instance(i);
		inst.setValue(responsePos,newData[i]);
	}
}
 
Example 9
Source File: PACF.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat)
                throws Exception {
//Check capabilities for the filter. Can only handle real valued, no missing.       
    getCapabilities().testWithFail(inputFormat);

    seriesLength=inputFormat.numAttributes();	
    if(inputFormat.classIndex()>=0)
        seriesLength--;
    if(maxLag>seriesLength-endTerms)
        maxLag=seriesLength-endTerms;
    if(maxLag<0)
        maxLag=inputFormat.numAttributes()-1;
    //Set up instances size and format. 
    ArrayList<Attribute> atts=new ArrayList<>();
    String name;
    for(int i=0;i<maxLag;i++){
        name = "PACF_"+i;
        atts.add(new Attribute(name));
    }
    if(inputFormat.classIndex()>=0){	//Classification set, set class 
      //Get the class values 		
        Attribute target =inputFormat.attribute(inputFormat.classIndex());
        ArrayList<String> vals=new ArrayList<>(target.numValues());
        for(int i=0;i<target.numValues();i++)
            vals.add(target.value(i));
        atts.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(),vals));
    }	
    Instances result = new Instances("PACF"+inputFormat.relationName(),atts,inputFormat.numInstances());
    if(inputFormat.classIndex()>=0)
        result.setClassIndex(result.numAttributes()-1);
    return result;	
}
 
Example 10
Source File: ACF.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
* Truncates all cases to having n attributes, i.e. removes from numAtts()-n to numAtts()-1  
* @param d
* @param n 
*/    
   public void truncate(Instances d, int n){
       int att=n;
       while(att<d.numAttributes()){
           if(att==d.classIndex())
               att++;
           else
               d.deleteAttributeAt(att);
       }
   }
 
Example 11
Source File: CCp.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void buildClassifier(Instances D) throws Exception {
  	testCapabilities(D);
  	
	int L = D.classIndex();

	prepareChain(L);

	if(getDebug()) System.out.print(":- Chain (");
	root = new meka.classifiers.multitarget.CCp.Link(retrieveChain(),0,D);
	if (getDebug()) System.out.println(" ) -:");
}
 
Example 12
Source File: RnnTextFilesEmbeddingInstanceIterator.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
@Override
public LabeledSentenceProvider getSentenceProvider(Instances data) {
  List<File> files = new ArrayList<>();
  List<String> labels = new ArrayList<>();
  final int clsIdx = data.classIndex();
  for (Instance inst : data) {
    labels.add(String.valueOf(inst.value(clsIdx)));
    final String path = inst.stringValue(1 - clsIdx);
    final File file = Paths.get(textsLocation.getAbsolutePath(), path).toFile();
    files.add(file);
  }

  return new FileLabeledSentenceProvider(files, labels, data.numClasses());
}
 
Example 13
Source File: RegOptimizer.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * initializes the algorithm
 * 
 * @param data	the data to work with
 * @throws Exception 	if m_SVM is null
 */
protected void init(Instances data) throws Exception {
  if (m_SVM == null) {
    throw new Exception ("SVM not initialized in optimizer. Use RegOptimizer.setSVMReg()");
  }
  m_C = m_SVM.getC();
  m_data = data;
  m_classIndex = data.classIndex();
  m_nInstances = data.numInstances();
  
  // Initialize kernel
  m_kernel = Kernel.makeCopy(m_SVM.getKernel());
  m_kernel.buildKernel(data);
  
  //init m_target
  m_target = new double[m_nInstances];
  for (int i = 0; i < m_nInstances; i++) {
    m_target[i] = data.instance(i).classValue();
  }
  
  m_random = new Random(m_nSeed);
  
  //		initialize alpha and alpha* array to all zero 
  m_alpha = new double[m_target.length];
  m_alphaStar = new double[m_target.length];
  
  m_supportVectors = new SMOset(m_nInstances);
  
  m_b = 0.0;
  m_nEvals = 0;
  m_nCacheHits = -1;
}
 
Example 14
Source File: WARAM.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Generates the classifier.
  *
  * @param instances set of instances serving as training data 
  * @exception Exception if the classifier has not been generated 
  * successfully
  */
 public void buildClassifier(Instances D) throws Exception {
     // swap attributes to fit MEKA
	testCapabilities(D);

	int L = D.classIndex();
	int featlength =  (D.numAttributes() -L)*2;
	int numSamples = D.numInstances();
	int classlength = L * 2;

	System.out.println("Using rho="+roa);
	if (numFeatures==-1){
	    initARAM( featlength,classlength ,roa , threshold );
		}else{
	if (featlength != numFeatures) {
		return ;

	}
	if (classlength != numClasses) {
		return ;

	}}

// Copy the instances so we don't mess up the original data.
// Function calls do not deep copy the arguments..
//Instances m_Instances = new Instances(instances);

// Use the enumeration of instances to train classifier.
// Do any sanity checks (e.g., missing attributes etc here
// before calling updateClassifier for the actual learning
Enumeration enumInsts = D.enumerateInstances();
while (enumInsts.hasMoreElements()) {
	Instance instance = (Instance) enumInsts.nextElement();
	updateClassifier(instance);
}
   System.out.println("Training done, used "+numCategories+" neurons.");

// Alternatively, you can put the training logic within this method,
// rather than updateClassifier(...). However, if you omit the 
// updateClassifier(...) method, you should remove 
// UpdateableClassifier from the class declaration above.
 }
 
Example 15
Source File: RandomSubSpace.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * builds the classifier.
  *
  * @param data 	the training data to be used for generating the
  * 			classifier.
  * @throws Exception 	if the classifier could not be built successfully
  */
 public void buildClassifier(Instances data) throws Exception {

   // can classifier handle the data?
   getCapabilities().testWithFail(data);

   // remove instances with missing class
   m_data = new Instances(data);
   m_data.deleteWithMissingClass();
   
   // only class? -> build ZeroR model
   if (m_data.numAttributes() == 1) {
     System.err.println(
  "Cannot build model (only class attribute present in data!), "
  + "using ZeroR model instead!");
     m_ZeroR = new weka.classifiers.rules.ZeroR();
     m_ZeroR.buildClassifier(m_data);
     return;
   }
   else {
     m_ZeroR = null;
   }
   
   super.buildClassifier(data);

   Integer[] indices = new Integer[data.numAttributes()-1];
   int classIndex = data.classIndex();
   int offset = 0;
   for(int i = 0; i < indices.length+1; i++) {
     if (i != classIndex) {
indices[offset++] = i+1;
     }
   }
   int subSpaceSize = numberOfAttributes(indices.length, getSubSpaceSize());
   Random random = data.getRandomNumberGenerator(m_Seed);
   
   for (int j = 0; j < m_Classifiers.length; j++) {
     if (m_Classifier instanceof Randomizable) {
((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
     }
     FilteredClassifier fc = new FilteredClassifier();
     fc.setClassifier(m_Classifiers[j]);
     m_Classifiers[j] = fc;
     Remove rm = new Remove();
     rm.setOptions(new String[]{"-V", "-R", randomSubSpace(indices,subSpaceSize,classIndex+1,random)});
     fc.setFilter(rm);

     // build the classifier
     //m_Classifiers[j].buildClassifier(m_data);
   }
   
   buildClassifiers();
   
   // save memory
   m_data = null;
 }
 
Example 16
Source File: NormalizeAttribute.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public NormalizeAttribute(Instances data){
		trainData=data;
		classIndex=data.classIndex();
//Finds all the stats, doesnt cost much more really		
		findStats(data);
	}
 
Example 17
Source File: MultilayerPerceptron.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * This function sets what the m_numeric flag to represent the passed class
  * it also performs the normalization of the attributes if applicable
  * and sets up the info to normalize the class. (note that regardless of
  * the options it will fill an array with the range and base, set to 
  * normalize all attributes and the class to be between -1 and 1)
  * @param inst the instances.
  * @return The modified instances. This needs to be done. If the attributes
  * are normalized then deep copies will be made of all the instances which
  * will need to be passed back out.
  */
 private Instances setClassType(Instances inst) throws Exception {
   if (inst != null) {
     // x bounds
     double min=Double.POSITIVE_INFINITY;
     double max=Double.NEGATIVE_INFINITY;
     double value;
     m_attributeRanges = new double[inst.numAttributes()];
     m_attributeBases = new double[inst.numAttributes()];
     for (int noa = 0; noa < inst.numAttributes(); noa++) {
min = Double.POSITIVE_INFINITY;
max = Double.NEGATIVE_INFINITY;
for (int i=0; i < inst.numInstances();i++) {
  if (!inst.instance(i).isMissing(noa)) {
    value = inst.instance(i).value(noa);
    if (value < min) {
      min = value;
    }
    if (value > max) {
      max = value;
    }
  }
}

m_attributeRanges[noa] = (max - min) / 2;
m_attributeBases[noa] = (max + min) / 2;
if (noa != inst.classIndex() && m_normalizeAttributes) {
  for (int i = 0; i < inst.numInstances(); i++) {
    if (m_attributeRanges[noa] != 0) {
      inst.instance(i).setValue(noa, (inst.instance(i).value(noa)  
				      - m_attributeBases[noa]) /
				m_attributeRanges[noa]);
    }
    else {
      inst.instance(i).setValue(noa, inst.instance(i).value(noa) - 
				m_attributeBases[noa]);
    }
  }
}
     }
     if (inst.classAttribute().isNumeric()) {
m_numeric = true;
     }
     else {
m_numeric = false;
     }
   }
   return inst;
 }
 
Example 18
Source File: GeneticSearch.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
* Searches the attribute subset space using a genetic algorithm.
*
* @param ASEval the attribute evaluator to guide the search
* @param data the training instances.
* @return an array (not necessarily ordered) of selected attribute indexes
* @throws Exception if the search can't be completed
*/
public int[] search (ASEvaluation ASEval, Instances data)
 throws Exception {

  m_best = null;
  m_generationReports = new StringBuffer();

  if (!(ASEval instanceof SubsetEvaluator)) {
    throw  new Exception(ASEval.getClass().getName() 
                         + " is not a " 
                         + "Subset evaluator!");
  }
  
 if (ASEval instanceof UnsupervisedSubsetEvaluator) {
   m_hasClass = false;
 }
 else {
   m_hasClass = true;
   m_classIndex = data.classIndex();
 }

 SubsetEvaluator ASEvaluator = (SubsetEvaluator)ASEval;
 m_numAttribs = data.numAttributes();

 m_startRange.setUpper(m_numAttribs-1);
 if (!(getStartSet().equals(""))) {
   m_starting = m_startRange.getSelection();
 }

 // initial random population
 m_lookupTable = new Hashtable(m_lookupTableSize);
 m_random = new Random(m_seed);
 m_population = new GABitSet [m_popSize];

 // set up random initial population
 initPopulation();
 evaluatePopulation(ASEvaluator);
 populationStatistics();
 scalePopulation();
 checkBest();
 m_generationReports.append(populationReport(0));

 boolean converged;
 for (int i=1;i<=m_maxGenerations;i++) {
   generation();
   evaluatePopulation(ASEvaluator);
   populationStatistics();
   scalePopulation();
   // find the best pop member and check for convergence
   converged = checkBest();

   if ((i == m_maxGenerations) || 
       ((i % m_reportFrequency) == 0) ||
       (converged == true)) {
     m_generationReports.append(populationReport(i));
     if (converged == true) {
       break;
     }
   }
 }
 return attributeList(m_best.getChromosome());
}
 
Example 19
Source File: StatUtils.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
/**
 * CondDepMatrix - Get a Conditional Dependency Matrix.
 * Based on Zhang's 'LEAD' approach, where<br>
 * the probability of labels j and k both getting errors on the same instance is error(j)*error(k)
 * if the actual co-occurence is otherwise. 
 * @param	D	dataset
 * @return a L*L matrix of Unconditional Depndence.
 */
public static double[][] condDepMatrix(Instances D, Result result) {

	int L = D.classIndex();
	int N = D.numInstances();
	double T[][] = MLUtils.getYfromD(D);						// Output (TEACHER)
	double Y[][] = MatrixUtils.threshold(result.allPredictions(), 0.5);	// Output (PREDICTED)
	result.output = Result.getStats(result,"6");	            // <-- high verbosity, because we need individual accuracies				
	double E[] = fillError(result, L);							// Errors (EXPECTED)
	double F[][][] = new double[3][L][L];						// Errors (ACTUAL)
	// Find the actual co-occurence ...
	for(int i = 0; i < N; i++) {
		int y[] = A.toIntArray(Y[i],0.5); 					// predicted
		int t[] = A.toIntArray(T[i],0.5);					// actual (teacher)
		for(int j = 0; j < L; j++) {
			for(int k = j+1; k < L; k++) {
				if (y[j] != t[j] && y[k] != t[k]) {
					// if j incorrect and k also ...
					F[0][j][k]++;								// error type 0
				}
				else if (y[j] == t[j] && t[k] == y[k]) {
					// both are correct
					F[2][j][k]++;								// error type 2
				}
				else {
					// if only one is correct
					F[1][j][k]++;								// error type 1
				}
			}
		}
	}

	// Un-Normalize with the Expected error
	double E_norm[][][] = new double[3][L][L];
	for(int j = 0; j < L; j++) {
		for(int k = j+1; k < L; k++) {
			E_norm[0][j][k] = N * (E[j] * E[k]);
			E_norm[2][j][k] = N * ((1.0 - E[k]) * (1.0 - E[j]));
			E_norm[1][j][k] = N * ( (E[j] * (1.0 - E[k])) + (1.0 - E[j]) * E[k]);
		}
	}
	return StatUtils.chi2(F,E_norm);
}
 
Example 20
Source File: LabelTransformationClassifier.java    From meka with GNU General Public License v3.0 3 votes vote down vote up
@Override
   public void buildClassifier(Instances D) throws Exception {
testCapabilities(D);

int L = D.classIndex();

if(getDebug()) System.out.print("transforming labels with size: "+L+" baseModel: "+m_Classifier.getClass().getName()+" ");

Instances transformed_D = this.transformLabels(D);

m_Classifier.buildClassifier(transformed_D);
   }