Java Code Examples for weka.core.Instances#numAttributes()

The following examples show how to use weka.core.Instances#numAttributes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: HillClimber.java From tsml with GNU General Public License v3.0

6 votes

/** 
 * find best (or least bad) arc addition operation
 * 
 * @param bayesNet Bayes network to add arc to
 * @param instances data set
 * @param oBestOperation
 * @return Operation containing best arc to add, or null if no arc addition is allowed 
 * (this can happen if any arc addition introduces a cycle, or all parent sets are filled
 * up to the maximum nr of parents).
 * @throws Exception if something goes wrong
 */
Operation findBestArcToAdd(BayesNet bayesNet, Instances instances, Operation oBestOperation) throws Exception {
	int nNrOfAtts = instances.numAttributes();
	// find best arc to add
	for (int iAttributeHead = 0; iAttributeHead < nNrOfAtts; iAttributeHead++) {
		if (bayesNet.getParentSet(iAttributeHead).getNrOfParents() < m_nMaxNrOfParents) {
			for (int iAttributeTail = 0; iAttributeTail < nNrOfAtts; iAttributeTail++) {
				if (addArcMakesSense(bayesNet, instances, iAttributeHead, iAttributeTail)) {
					Operation oOperation = new Operation(iAttributeTail, iAttributeHead, Operation.OPERATION_ADD);
					double fScore = calcScoreWithExtraParent(oOperation.m_nHead, oOperation.m_nTail);
					if (fScore > oBestOperation.m_fScore) {
						if (isNotTabu(oOperation)) {
							oBestOperation = oOperation;
							oBestOperation.m_fScore = fScore;
						}
					}
				}
			}
		}
	}
	return oBestOperation;
}

Example 2

Source File: LibLINEAR.java From tsml with GNU General Public License v3.0

6 votes

/**
 * turns on nominal to binary filtering
 * if there are not only numeric attributes
 */
private Instances nominalToBinary( Instances insts ) throws Exception {
  boolean onlyNumeric = true;
  for (int i = 0; i < insts.numAttributes(); i++) {
    if (i != insts.classIndex()) {
      if (!insts.attribute(i).isNumeric()) {
        onlyNumeric = false;
        break;
      }
    }
  }

  if (!onlyNumeric) {
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(insts);
    insts = Filter.useFilter(insts, m_NominalToBinary);
  }
  return insts;
}

Example 3

Source File: InstanceTools.java From tsml with GNU General Public License v3.0

6 votes

public static double[][] create2DMatrixFromInstances(Instances train, Instances test) {
    double [][] data = new double[train.numInstances() + test.numInstances()][train.numAttributes()];
    
    for(int i=0; i<train.numInstances(); i++)
    {
        for(int j=0; j<train.numAttributes(); j++)
        {
            data[i][j] = train.get(i).value(j);
        }
    }
    
    int index=0;
    for(int i=train.numInstances(); i<train.numInstances()+test.numInstances(); i++)
    {
        for(int j=0; j<test.numAttributes(); j++)
        {
            data[i][j] = test.get(index).value(j);
        }
        ++index;
    }
    
    return data;
}

Example 4

Source File: TweetToWordListCountFeatureVector.java From AffectiveTweets with GNU General Public License v3.0

6 votes

@Override
protected Instances determineOutputFormat(Instances inputFormat)
		throws Exception {

	ArrayList<Attribute> att = new ArrayList<Attribute>();

	// Adds all attributes of the inputformat
	for (int i = 0; i < inputFormat.numAttributes(); i++) {
		att.add(inputFormat.attribute(i));
	}

	// adds the new attribute
	att.add(new Attribute("wordListCount"));
	
	Instances result = new Instances(inputFormat.relationName(), att, 0);

	// set the class index
	result.setClassIndex(inputFormat.classIndex());

	return result;
}

Example 5

Source File: InstanceTools.java From tsml with GNU General Public License v3.0

6 votes

public static void removeConstantTrainAttributes(Instances train, Instances test){
        int i=0;
        while(i<train.numAttributes()-1){ //Dont test class
// Test if constant
            int j=1;
            while(j<train.numInstances() && train.instance(j-1).value(i)==train.instance(j).value(i))
                j++;
            if(j==train.numInstances()){
    // Remove from train
                train.deleteAttributeAt(i);
                test.deleteAttributeAt(i);
    // Remove from test            
            }else{
                i++;
            }
        }       
    }

Example 6

Source File: TunedXGBoost.java From tsml with GNU General Public License v3.0

5 votes

@Override
    public void buildClassifier(Instances insts) throws Exception {
//        long startTime=System.nanoTime(); 
        long startTime=System.nanoTime(); 

        booster = null;
        trainResults =new ClassifierResults();

        trainInsts = new Instances(insts);
        numTrainInsts = insts.numInstances();
        numAtts = insts.numAttributes();
        numClasses = insts.numClasses();

        if(cvFolds>numTrainInsts)
            cvFolds=numTrainInsts;
//        rng = new Random(seed); //for tie resolution etc if needed

        buildActualClassifer();

        if(getEstimateOwnPerformance()&& !tuneParameters) //if tuneparas, will take the cv results of the best para set
            trainResults = estimateTrainAcc(trainInsts);

        if(saveEachParaAcc)
            trainResults.setBuildTime(combinedBuildTime);
        else
            trainResults.setBuildTime(System.nanoTime()-startTime);
//            trainResults.buildTime=System.nanoTime()-startTime;

        trainResults.setTimeUnit(TimeUnit.NANOSECONDS);
        trainResults.setClassifierName(tuneParameters ? "TunedXGBoost" : "XGBoost");
        trainResults.setDatasetName(trainInsts.relationName());
        trainResults.setParas(getParameters());
    }

Example 7

Source File: EnsembleEvaluatorTest.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Test
public void ensembleEvaluatorTest() throws Exception {
    logger.info("Starting cluster evaluation test...");

    /* load dataset and create a train-test-split */
    OpenmlConnector connector = new OpenmlConnector();
    DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
    File file = ds.getDataset(DataSetUtils.API_KEY);
    Instances data = new Instances(new BufferedReader(new FileReader(file)));
    data.setClassIndex(data.numAttributes() - 1);
    List<Instances> split = WekaUtil.getStratifiedSplit(data, 42, .05f);

    Instances insts = split.get(0);

    long timeStart = System.currentTimeMillis();

    ReliefFAttributeEval eval = new ReliefFAttributeEval();
    eval.buildEvaluator(insts);

    long timeStartEval = System.currentTimeMillis();

    double attEvalSum = 0;
    for (int i = 0; i < insts.numAttributes(); i++) {
        attEvalSum += eval.evaluateAttribute(i);
    }
    attEvalSum /= insts.numAttributes();

    long timeTaken = System.currentTimeMillis() - timeStart;
    long timeTakenEval = System.currentTimeMillis() - timeStartEval;

    logger.info("Value: " + attEvalSum);
    Assert.assertTrue(attEvalSum > 0);
    logger.debug("Clustering took " + (timeTaken / 1000) + " s.");
    logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s.");
}

Example 8

Source File: PAA.java From tsml with GNU General Public License v3.0

5 votes

@Override
protected Instances determineOutputFormat(Instances inputFormat)
        throws Exception {
    
    //Check all attributes are real valued, otherwise throw exception
    for (int i = 0; i < inputFormat.numAttributes(); i++) {
        if (inputFormat.classIndex() != i) {
            if (!inputFormat.attribute(i).isNumeric()) {
                throw new Exception("Non numeric attribute not allowed for PAA");
            }
        }
    }
    
    //Set up instances size and format. 
    ArrayList<Attribute> attributes = new ArrayList<>();
    
    for (int i = 0; i < numIntervals; i++)
        attributes.add(new Attribute("PAAInterval_" + i));

    if (inputFormat.classIndex() >= 0) {	//Classification set, set class 
        //Get the class values as a fast vector			
        Attribute target = inputFormat.attribute(inputFormat.classIndex());

        ArrayList<String> vals = new ArrayList<>(target.numValues());
        for (int i = 0; i < target.numValues(); i++) {
            vals.add(target.value(i));
        }
        attributes.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
    }
    
    Instances result = new Instances("PAA" + inputFormat.relationName(), attributes, inputFormat.numInstances());
    if (inputFormat.classIndex() >= 0) {
        result.setClassIndex(result.numAttributes() - 1);
    }
    return result;
}

Example 9

Source File: PartitionedMultiFilter.java From tsml with GNU General Public License v3.0

5 votes

/**
  * determines the indices of unused attributes (ones that are not covered
  * by any of the range).
  *
  * @param data	the data to base the determination on
  * @see 		#m_IndicesUnused
  */
 protected void determineUnusedIndices(Instances data) {
   Vector<Integer>	indices;
   int			i;
   int			n;
   boolean		covered;

   // traverse all ranges
   indices = new Vector<Integer>();
   for (i = 0; i < data.numAttributes(); i++) {
     if (i == data.classIndex())
continue;

     covered = false;
     for (n = 0; n < getRanges().length; n++) {
if (getRanges()[n].isInRange(i)) {
  covered = true;
  break;
}
     }

     if (!covered)
indices.add(new Integer(i));
   }

   // create array
   m_IndicesUnused = new int[indices.size()];
   for (i = 0; i < indices.size(); i++)
     m_IndicesUnused[i] = indices.get(i).intValue();

   if (getDebug())
     System.out.println(
  "Unused indices: " + Utils.arrayToString(m_IndicesUnused));
 }

Example 10

Source File: FilteredSubsetEval.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Initializes a filtered attribute evaluator.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully
 */
public void buildEvaluator(Instances data) throws Exception {
  // can evaluator handle data?
  getCapabilities().testWithFail(data);

  // Structure of original
  Instances original = new Instances(data, 0);

  m_filter.setInputFormat(data);
  data = Filter.useFilter(data, m_filter);

  // Can only proceed if filter has not altered the order or
  // number of attributes in the data
  if (data.numAttributes() != original.numAttributes()) {
    throw new Exception("Filter must not alter the number of "
                        +"attributes in the data!");
  }

  // Check the class index (if set)
  if (original.classIndex() >= 0) {
    if (data.classIndex() != original.classIndex()) {
      throw new Exception("Filter must not change the class attribute!");
    }
  }

  // check the order
  for (int i = 0; i < original.numAttributes(); i++) {
    if (!data.attribute(i).name().equals(original.attribute(i).name())) {
      throw new Exception("Filter must not alter the order of the attributes!");
    }
  }

  // can the evaluator handle this data?
  ((ASEvaluation)getSubsetEvaluator()).getCapabilities().testWithFail(data);
  m_filteredInstances = data.stringFreeStructure();
  
  ((ASEvaluation)m_evaluator).buildEvaluator(data);
}

Example 11

Source File: ModelFactory.java From AIDR with GNU Affero General Public License v3.0

5 votes

private static Instances getTemplateSet(Instances dataSet) {
	ArrayList<Attribute> attributes = new ArrayList<Attribute>(
			dataSet.numAttributes());
	for (int i = 0; i < dataSet.numAttributes(); i++) {
		attributes.add(dataSet.attribute(i));
	}
	Instances specification = new Instances("spec", attributes, 0);
	specification.setClassIndex(specification.numAttributes() - 1);
	return specification;
}

Example 12

Source File: RotationForest.java From tsml with GNU General Public License v3.0

5 votes

/** 
 * Checks m_MinGroup and m_MaxGroup
 * 
 * @param data the dataset
 */
protected void checkMinMax(Instances data) {
  if( m_MinGroup > m_MaxGroup ) {
    int tmp = m_MaxGroup;
    m_MaxGroup = m_MinGroup;
    m_MinGroup = tmp;
  }
  
  int n = data.numAttributes();
  if( m_MaxGroup >= n )
    m_MaxGroup = n - 1;
  if( m_MinGroup >= n )
    m_MinGroup = n - 1;
}

Example 13

Source File: Tools.java From gsn with GNU General Public License v3.0

5 votes

/**
 * add a new feature in the dataset containing the predicted values by the classifier
 * @param c the classifier
 * @param i the dataset
 * @throws Exception
 */
public static void add_predictions(Classifier c, Instances i) throws Exception{
	
	double[] computed = new double[i.numInstances()];
	for(int m = 0;m<computed.length;m++){
		computed[m] = c.classifyInstance(i.instance(m)); 
	}
	Attribute a = new Attribute("interpolate");
	int num = i.numAttributes();
	i.insertAttributeAt(a, num);
	for(int m = 0;m<computed.length;m++){
		i.instance(m).setValue(num, computed[m]);
	}
}

Example 14

Source File: sIB.java From tsml with GNU General Public License v3.0

5 votes

/**
  * Transpose the document-term matrix to term-document matrix
  * @param data instances with document-term info
  * @return a term-document matrix transposed from the input dataset
  */
 private Matrix getTransposedMatrix(Instances data) {
   double[][] temp = new double[data.numAttributes()][data.numInstances()];
   for (int i = 0; i < data.numInstances(); i++) {
     Instance inst = data.instance(i);
     for (int v = 0; v < inst.numValues(); v++) {
temp[inst.index(v)][i] = inst.valueSparse(v);
     }
   }
   Matrix My_x = new Matrix(temp);
   return My_x;
 }

Example 15

Source File: LinearModel.java From tsml with GNU General Public License v3.0

5 votes

public LinearModel(Instances data)
	{
//Form X and Y from Instances		
		n=data.numInstances();
		m=data.numAttributes();	//includes the constant term
		y = data.attributeToDoubleArray(data.classIndex());
		Y=new Matrix(y,y.length);
		double[][] xt = new double[m][n];
		for(int i=0;i<n;i++)
			xt[0][i]=1;
		for(int i=1;i<m;i++)
			xt[i]=data.attributeToDoubleArray(i-1);
		Xt=new Matrix(xt);
		X=Xt.transpose();
	}

Example 16

Source File: ARAMNetwork.java From meka with GNU General Public License v3.0

4 votes

/**
  * Generates the classifier.
  *
  * @param instances set of instances serving as training data 
  * @exception Exception if the classifier has not been generated 
  * successfully
  */
  
 public void buildClassifier(Instances D) throws Exception {

	int L = D.classIndex();
	int featlength =  (D.numAttributes() -L)*2;
	int numSamples = D.numInstances();
	int classlength = L * 2;
	if (this.order==null){

		order = new ArrayList<Integer>();
		for (int j=0; j<D.numInstances();j++){
			order.add(j);
		}
	}

	if (numFeatures==-1){
	    initARAM( featlength,classlength ,roa , threshold );
		}else{
	if (featlength != numFeatures) {
		return ;

	}
	if (classlength != numClasses) {
		return ;

	}}

// Copy the instances so we don't mess up the original data.
// Function calls do not deep copy the arguments..
//Instances m_Instances = new Instances(instances);

// Use the enumeration of instances to train classifier.
// Do any sanity checks (e.g., missing attributes etc here
// before calling updateClassifier for the actual learning
//Enumeration enumInsts = D.enumerateInstances();

for(int i=0; i<D.numInstances();i++){
	Instance instance = D.get(order.get(i));
	updateClassifier(instance);
}
   System.out.println("Training done, used "+numCategories+" neurons with rho ="+roa+".");

// Alternatively, you can put the training logic within this method,
// rather than updateClassifier(...). However, if you omit the 
// updateClassifier(...) method, you should remove 
// UpdateableClassifier from the class declaration above.
 }

Example 17

Source File: ArffLexiconEvaluator.java From AffectiveTweets with GNU General Public License v3.0

4 votes

/**
 * Processes  all the dictionary files.
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
	BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
	Instances lexInstances=new Instances(reader);

	// set upper value for word index
	lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);
	
	List<Attribute> numericAttributes=new ArrayList<Attribute>();
	List<Attribute> nominalAttributes=new ArrayList<Attribute>();



	// checks all numeric and nominal attributes and discards the word attribute
	for(int i=0;i<lexInstances.numAttributes();i++){

		if(i!=this.lexiconWordIndex.getIndex()){
			if(lexInstances.attribute(i).isNumeric() ){
				numericAttributes.add(lexInstances.attribute(i));	
				// adds the attribute name to the message-level features to be calculated
				this.featureNames.add(this.lexiconName+"-"+lexInstances.attribute(i).name());
			}

			else if(lexInstances.attribute(i).isNominal() ){
				nominalAttributes.add(lexInstances.attribute(i));	
				// adds the attribute name together with the nominal value to the message-level features to be calculated
				int numValues=lexInstances.attribute(i).numValues();
				for(int j=0;j<numValues;j++)
					this.featureNames.add(this.lexiconName+"-"+lexInstances.attribute(i).name()+"-"+lexInstances.attribute(i).value(j));

			}

		}

	}


	// Maps all words with their affective scores discarding missing values
	for(Instance inst:lexInstances){
		if(inst.attribute(this.lexiconWordIndex.getIndex()).isString()){
			String word=inst.stringValue(this.lexiconWordIndex.getIndex());
			// stems the word
			word=this.m_stemmer.stem(word);

			// map numeric scores
			if(!numericAttributes.isEmpty()){
				Map<String,Double> wordVals=new HashMap<String,Double>();
				for(Attribute na:numericAttributes){
					if(!weka.core.Utils.isMissingValue(inst.value(na)))
						wordVals.put(na.name(),inst.value(na));
				}
				this.numDict.put(word, wordVals);					
			}

			// map nominal associations
			if(!nominalAttributes.isEmpty()){
				Map<String,String> wordCounts=new HashMap<String,String>();
				for(Attribute no:nominalAttributes){
					if(!weka.core.Utils.isMissingValue(inst.value(no))){	
						wordCounts.put(no.name(),no.value((int) inst.value(no)));
					}

					this.nomDict.put(word, wordCounts);

				}

			}				

		}

	}




}

Example 18

Source File: WAODE.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Generates the classifier.
  *
  * @param instances set of instances serving as training data
  * @throws Exception if the classifier has not been generated successfully
  */
 public void buildClassifier(Instances instances) throws Exception {
   
   // can classifier handle the data?
   getCapabilities().testWithFail(instances);

   // only class? -> build ZeroR model
   if (instances.numAttributes() == 1) {
     System.err.println(
  "Cannot build model (only class attribute present in data!), "
  + "using ZeroR model instead!");
     m_ZeroR = new weka.classifiers.rules.ZeroR();
     m_ZeroR.buildClassifier(instances);
     return;
   }
   else {
     m_ZeroR = null;
   }

   // reset variable
   m_NumClasses = instances.numClasses();
   m_ClassIndex = instances.classIndex();
   m_NumAttributes = instances.numAttributes();
   m_NumInstances = instances.numInstances();
   m_TotalAttValues = 0;
   
   // allocate space for attribute reference arrays
   m_StartAttIndex = new int[m_NumAttributes];
   m_NumAttValues = new int[m_NumAttributes];
   
   // set the starting index of each attribute and the number of values for
   // each attribute and the total number of values for all attributes (not including class).
   for (int i = 0; i < m_NumAttributes; i++) {
     if (i != m_ClassIndex) {
m_StartAttIndex[i] = m_TotalAttValues;
m_NumAttValues[i] = instances.attribute(i).numValues();
m_TotalAttValues += m_NumAttValues[i];
     }
     else {
m_StartAttIndex[i] = -1;
m_NumAttValues[i] = m_NumClasses;
     }
   }
   
   // allocate space for counts and frequencies
   m_ClassCounts = new double[m_NumClasses];
   m_AttCounts = new double[m_TotalAttValues];
   m_AttAttCounts = new double[m_TotalAttValues][m_TotalAttValues];
   m_ClassAttAttCounts = new double[m_NumClasses][m_TotalAttValues][m_TotalAttValues];
   m_Header = new Instances(instances, 0);
   
   // Calculate the counts
   for (int k = 0; k < m_NumInstances; k++) {
     int classVal=(int)instances.instance(k).classValue();
     m_ClassCounts[classVal] ++;
     int[] attIndex = new int[m_NumAttributes];
     for (int i = 0; i < m_NumAttributes; i++) {
if (i == m_ClassIndex){
  attIndex[i] = -1;
}
else{
  attIndex[i] = m_StartAttIndex[i] + (int)instances.instance(k).value(i);
  m_AttCounts[attIndex[i]]++;
}
     }
     for (int Att1 = 0; Att1 < m_NumAttributes; Att1++) {
if (attIndex[Att1] == -1) continue;
for (int Att2 = 0; Att2 < m_NumAttributes; Att2++) {
  if ((attIndex[Att2] != -1)) {
    m_AttAttCounts[attIndex[Att1]][attIndex[Att2]] ++;
    m_ClassAttAttCounts[classVal][attIndex[Att1]][attIndex[Att2]] ++;
  }
}
     }
   }
   
   //compute mutual information between each attribute and class
   m_mutualInformation=new double[m_NumAttributes];
   for (int att=0;att<m_NumAttributes;att++){
     if (att == m_ClassIndex) continue;
     m_mutualInformation[att]=mutualInfo(att);
   }
 }

Example 19

Source File: ARAMNetworkSparse.java From meka with GNU General Public License v3.0

4 votes

/**
  * Generates the classifier.
  *
  * @param instances set of instances serving as training data 
  * @exception Exception if the classifier has not been generated 
  * successfully
  */
  
 public void buildClassifier(Instances D) throws Exception {

	int L = D.classIndex();
	int featlength =  (D.numAttributes() -L)*2;
	int numSamples = D.numInstances();
	int classlength = L * 2;
	if (this.order==null){

		order = new ArrayList<Integer>();
		for (int j=0; j<D.numInstances();j++){
			order.add(j);
		}
	}

	if (numFeatures==-1){
	    initARAM( featlength,classlength ,roa , threshold );
		}else{
	if (featlength != numFeatures) {
		return ;

	}
	if (classlength != numClasses) {
		return ;

	}}

// Copy the instances so we don't mess up the original data.
// Function calls do not deep copy the arguments..
//Instances m_Instances = new Instances(instances);

// Use the enumeration of instances to train classifier.
// Do any sanity checks (e.g., missing attributes etc here
// before calling updateClassifier for the actual learning
	for(int i=0; i<D.numInstances();i++){
		Instance instance = D.get(order.get(i));
		updateClassifier(instance);
	}
   System.out.println("Training done, used "+numCategories+" neurons with rho ="+roa+".");

// Alternatively, you can put the training logic within this method,
// rather than updateClassifier(...). However, if you omit the 
// updateClassifier(...) method, you should remove 
// UpdateableClassifier from the class declaration above.
 }

Example 20

Source File: ArffLexiconWordLabeller.java From AffectiveTweets with GNU General Public License v3.0

2 votes

/**
 * Processes  all the dictionary files.
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
	BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
	Instances lexInstances=new Instances(reader);


	// set upper value for word index
	lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);
	
	// checks all numeric and nominal attributes and discards the word attribute
	for(int i=0;i<lexInstances.numAttributes();i++){

		if(i!=this.lexiconWordIndex.getIndex()){
			if(lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal()  ){
				this.attributes.add(lexInstances.attribute(i));
			}

		}

	}


	// Maps all words with their affective scores discarding missing values
	for(Instance inst:lexInstances){
		if(inst.attribute(this.lexiconWordIndex.getIndex()).isString()){
			String word=inst.stringValue(this.lexiconWordIndex.getIndex());
			// stems the word
			word=this.m_stemmer.stem(word);

			// map numeric scores
			if(!attributes.isEmpty()){
				Map<Attribute,Double> wordVals=new HashMap<Attribute,Double>();
				for(Attribute na:attributes){
					wordVals.put(na,inst.value(na));
				}
				this.attValMap.put(word, wordVals);					
			}



		}

	}




}