Java Code Examples for weka.core.Instances#enumerateAttributes()

The following examples show how to use weka.core.Instances#enumerateAttributes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Returns true if there is at least one nominal attribute in the given dataset that has more than 2 values.
 *
 * @param wekaInstances
 *            dataset that is checked
 * @param ignoreClassAttribute
 *            if true class attribute is ignored.
 */
public static boolean needsBinarization(final Instances wekaInstances, final boolean ignoreClassAttribute) {
	Attribute classAttribute = wekaInstances.classAttribute();
	if (!ignoreClassAttribute && classAttribute.isNominal() && classAttribute.numValues() >= 3) {
		return true;
	}
	// iterate over every attribute and check.
	for (Enumeration<Attribute> attributeEnum = wekaInstances.enumerateAttributes(); attributeEnum.hasMoreElements();) {
		Attribute currentAttr = attributeEnum.nextElement();
		if (!currentAttr.isNominal()) {
			continue; // ignore attributes that aren't nominal.
		}
		if (currentAttr == classAttribute) {
			// ignore class attribute (already checked in case ignoreClassAttribute==true):
			continue;
		}
		if (currentAttr.numValues() >= 3) {
			return true;
		}
	}
	return false;
}
 
Example 2
Source File: RuleStats.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Compute the number of all possible conditions that could 
  * appear in a rule of a given data.  For nominal attributes,
  * it's the number of values that could appear; for numeric 
  * attributes, it's the number of values * 2, i.e. <= and >=
  * are counted as different possible conditions.
  *
  * @param data the given data
  * @return number of all conditions of the data
  */
 public static double numAllConditions(Instances data){
   double total = 0;
   Enumeration attEnum = data.enumerateAttributes();	
   while(attEnum.hasMoreElements()){
     Attribute att= (Attribute)attEnum.nextElement();
     if(att.isNominal())
total += (double)att.numValues();
     else
total += 2.0 * (double)data.numDistinctValues(att);	
   }
   return total;
 }
 
Example 3
Source File: BestConf.java    From bestconf with Apache License 2.0 5 votes vote down vote up
public Instances generateMore(int number, int existedNum,
		Instances header) {
	ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
	Enumeration<Attribute> enu = header.enumerateAttributes();
	while (enu.hasMoreElements()) {
		localAtts.add(enu.nextElement());
	}
	Instances samplePoints = LHSInitializer.getMultiDimContinuous(
			localAtts, number + existedNum, false);
	samplePoints.insertAttributeAt(header.classAttribute(),
			samplePoints.numAttributes());
	samplePoints.setClassIndex(samplePoints.numAttributes() - 1);
	return samplePoints;
}
 
Example 4
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static List<Attribute> getAttributes(final Instances inst, final boolean includeClassAttribute) {
	List<Attribute> attributes = new ArrayList<>();
	Enumeration<Attribute> e = inst.enumerateAttributes();
	while (e.hasMoreElements()) {
		attributes.add(e.nextElement());
	}
	if (includeClassAttribute) {
		attributes.add(inst.classAttribute());
	}
	return attributes;
}
 
Example 5
Source File: Id3.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Method for building an Id3 tree.
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

  // Check if no instances have reached this node.
  if (data.numInstances() == 0) {
    m_Attribute = null;
    m_ClassValue = Utils.missingValue();
    m_Distribution = new double[data.numClasses()];
    return;
  }

  // Compute attribute with maximum information gain.
  double[] infoGains = new double[data.numAttributes()];
  Enumeration attEnum = data.enumerateAttributes();
  while (attEnum.hasMoreElements()) {
    Attribute att = (Attribute) attEnum.nextElement();
    infoGains[att.index()] = computeInfoGain(data, att);
  }
  m_Attribute = data.attribute(Utils.maxIndex(infoGains));
  
  // Make leaf if information gain is zero. 
  // Otherwise create successors.
  if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
    m_Attribute = null;
    m_Distribution = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
      m_Distribution[(int) inst.classValue()]++;
    }
    Utils.normalize(m_Distribution);
    m_ClassValue = Utils.maxIndex(m_Distribution);
    m_ClassAttribute = data.classAttribute();
  } else {
    Instances[] splitData = splitData(data, m_Attribute);
    m_Successors = new Id3[m_Attribute.numValues()];
    for (int j = 0; j < m_Attribute.numValues(); j++) {
      m_Successors[j] = new Id3();
      m_Successors[j].makeTree(splitData[j]);
    }
  }
}
 
Example 6
Source File: Ridor.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
    * Build one rule using the growing data
    *
    * @param data the growing data used to build the rule
    */    
   private void grow(Instances data){
     Instances growData = new Instances(data);
    
     m_AccuG = computeDefAccu(growData);
     m_CoverG = growData.sumOfWeights();
     /* Compute the default accurate rate of the growing data */
     double defAcRt= m_AccuG / m_CoverG; 
    
     /* Keep the record of which attributes have already been used*/    
     boolean[] used=new boolean [growData.numAttributes()];
     for (int k=0; k<used.length; k++)
used[k]=false;
     int numUnused=used.length;
    
     double maxInfoGain;
     boolean isContinue = true; // The stopping criterion of this rule
    
     while (isContinue){   
maxInfoGain = 0;       // We require that infoGain be positive
	
/* Build a list of antecedents */
Antd oneAntd=null;
Instances coverData = null;
Enumeration enumAttr=growData.enumerateAttributes();	    
int index=-1;  
	
/* Build one condition based on all attributes not used yet*/
while (enumAttr.hasMoreElements()){
  Attribute att= (Attribute)(enumAttr.nextElement());
  index++;
	    
  Antd antd =null;	
  if(att.isNumeric())
    antd = new NumericAntd(att);
  else
    antd = new NominalAntd(att);
	    
  if(!used[index]){
    /* Compute the best information gain for each attribute,
       it's stored in the antecedent formed by this attribute.
       This procedure returns the data covered by the antecedent*/
    Instances coveredData = computeInfoGain(growData, defAcRt, antd);
    if(coveredData != null){
      double infoGain = antd.getMaxInfoGain();			
      if(Utils.gr(infoGain, maxInfoGain)){
	oneAntd=antd;
	coverData = coveredData;  
	maxInfoGain = infoGain;
      }		    
    }
  }
}
	
if(oneAntd == null)	 return;
	
//Numeric attributes can be used more than once
if(!oneAntd.getAttr().isNumeric()){ 
  used[oneAntd.getAttr().index()]=true;
  numUnused--;
}
	
m_Antds.addElement((Object)oneAntd);
growData = coverData;// Grow data size is shrinking 
	
defAcRt = oneAntd.getAccuRate();
	
/* Stop if no more data, rule perfect, no more attributes */
if(Utils.eq(growData.sumOfWeights(), 0.0) || Utils.eq(defAcRt, 1.0) || (numUnused == 0))
  isContinue = false;
     }
   }
 
Example 7
Source File: OneR.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Generates the classifier.
  *
  * @param instances the instances to be used for building the classifier
  * @throws Exception if the classifier can't be built successfully
  */
 public void buildClassifier(Instances instances) 
   throws Exception {
   
   boolean noRule = true;

   // can classifier handle the data?
   getCapabilities().testWithFail(instances);

   // remove instances with missing class
   Instances data = new Instances(instances);
   data.deleteWithMissingClass();

   // only class? -> build ZeroR model
   if (data.numAttributes() == 1) {
     System.err.println(
  "Cannot build model (only class attribute present in data!), "
  + "using ZeroR model instead!");
     m_ZeroR = new weka.classifiers.rules.ZeroR();
     m_ZeroR.buildClassifier(data);
     return;
   }
   else {
     m_ZeroR = null;
   }
   
   // for each attribute ...
   Enumeration enu = instances.enumerateAttributes();
   while (enu.hasMoreElements()) {
     try {
OneRRule r = newRule((Attribute) enu.nextElement(), data);

// if this attribute is the best so far, replace the rule
if (noRule || r.m_correct > m_rule.m_correct) {
  m_rule = r;
}
noRule = false;
     } catch (Exception ex) {
     }
   }
   
   if (noRule)
     throw new WekaException("No attributes found to work with!");
 }
 
Example 8
Source File: WEKAMetaminer.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public void build(final List<ComponentInstance> distinctPipelines, final Instances metaFeatureInformation,
		final double[][][] performanceValues) throws AlgorithmException, InterruptedException {
	// Check whether has been built
	if (this.hasBeenBuilt) {
		throw new AlgorithmException("MetaMiner has already been built!");
	}

	// ----- Data set Characterization -----

	this.dataSetMetaFeaturesAttributes = metaFeatureInformation.enumerateAttributes();

	// Convert to matrix (Matrix X with rows representing data sets)
	INDArray datasetsMetafeatures = Nd4j.create(metaFeatureInformation.size(),
			metaFeatureInformation.numAttributes());
	for (int i = 0; i < metaFeatureInformation.size(); i++) {
		datasetsMetafeatures.putRow(i, Nd4j.create(metaFeatureInformation.get(i).toDoubleArray()));
	}
	this.logger.debug("Dataset metafeatures: {} x {}",datasetsMetafeatures.rows(),datasetsMetafeatures.columns());

	// ----- Pipeline Characterization -----

	// Compute relative performance ranks of pipelines on data sets
	this.logger.info("Computing relative performance Matrix.");
	INDArray rankMatrix = this.similarityComputer.computeRelativeRankMatrix(performanceValues);
	this.logger.info("Rank matrix: {} x {}",rankMatrix.rows(),rankMatrix.columns());
	this.logger.debug("Rank Matrix: {}",rankMatrix);

	// Initialize PipelineCharacterizer with list of distinct pipelines
	this.logger.info("WEKAMetaminer: Initializing pipeline characterizer.");
	this.pipelineCharacterizer.build(distinctPipelines);

	// Get Characterization of base pipelines from PipelineCharacterizer (Matrix W)
	INDArray pipelinesMetafeatures = Nd4j.create(this.pipelineCharacterizer.getCharacterizationsOfTrainingExamples());
	this.logger.debug("WEKAMetaminer: Pipeline Metafeatures: {} x {}",pipelinesMetafeatures.rows(),pipelinesMetafeatures.columns());

	// Initialize HeterogenousSimilarityMeasures
	this.logger.info("WEKAMetaminer: Create similarity measure.");
	this.similarityMeasure.build(datasetsMetafeatures, pipelinesMetafeatures, rankMatrix);

	// Building is finished
	this.hasBeenBuilt = true;
}
 
Example 9
Source File: Converter.java    From toolbox with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a set of {@link Attributes} from a given {@link weka.core.Instances} object.
 * @param modelContext a {@link weka.core.Instances} object.
 * @return a set of {@link Attributes}.
 */
public static Attributes convertAttributes(Instances modelContext){
    Enumeration attributesWeka = modelContext.enumerateAttributes();
    return convertAttributes(attributesWeka, modelContext.classAttribute());
}