Java Code Examples for weka.core.Attribute#isNominal()

The following examples show how to use weka.core.Attribute#isNominal() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RotationForest.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/** 
  * Adds random instances to the dataset.
  * 
  * @param dataset the dataset
  * @param numInstances the number of instances
  * @param random a random number generator
  */
 protected void addRandomInstances( Instances dataset, int numInstances, 
                                 Random random ) {
   int n = dataset.numAttributes();				
   double [] v = new double[ n ];
   for( int i = 0; i < numInstances; i++ ) {
     for( int j = 0; j < n; j++ ) {
       Attribute att = dataset.attribute( j );
       if( att.isNumeric() ) {
  v[ j ] = random.nextDouble();
}
else if ( att.isNominal() ) { 
  v[ j ] = random.nextInt( att.numValues() );
}
     }
     dataset.add( new DenseInstance( 1, v ) );
   }
 }
 
Example 2
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Returns true if there is at least one nominal attribute in the given dataset that has more than 2 values.
 *
 * @param wekaInstances
 *            dataset that is checked
 * @param ignoreClassAttribute
 *            if true class attribute is ignored.
 */
public static boolean needsBinarization(final Instances wekaInstances, final boolean ignoreClassAttribute) {
	Attribute classAttribute = wekaInstances.classAttribute();
	if (!ignoreClassAttribute && classAttribute.isNominal() && classAttribute.numValues() >= 3) {
		return true;
	}
	// iterate over every attribute and check.
	for (Enumeration<Attribute> attributeEnum = wekaInstances.enumerateAttributes(); attributeEnum.hasMoreElements();) {
		Attribute currentAttr = attributeEnum.nextElement();
		if (!currentAttr.isNominal()) {
			continue; // ignore attributes that aren't nominal.
		}
		if (currentAttr == classAttribute) {
			// ignore class attribute (already checked in case ignoreClassAttribute==true):
			continue;
		}
		if (currentAttr.numValues() >= 3) {
			return true;
		}
	}
	return false;
}
 
Example 3
Source File: OneR.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Create a rule branching on this attribute.
 *
 * @param attr the attribute to branch on
 * @param data the data to be used for creating the rule
 * @return the generated rule
 * @throws Exception if the rule can't be built successfully
 */
public OneRRule newRule(Attribute attr, Instances data) throws Exception {

  OneRRule r;

  // ... create array to hold the missing value counts
  int[] missingValueCounts =
    new int [data.classAttribute().numValues()];
  
  if (attr.isNominal()) {
    r = newNominalRule(attr, data, missingValueCounts);
  } else {
    r = newNumericRule(attr, data, missingValueCounts);
  }
  r.m_missingValueClass = Utils.maxIndex(missingValueCounts);
  if (missingValueCounts[r.m_missingValueClass] == 0) {
    r.m_missingValueClass = -1; // signal for no missing value class
  } else {
    r.m_correct += missingValueCounts[r.m_missingValueClass];
  }
  return r;
}
 
Example 4
Source File: ContractRotationForest.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/** 
  * Adds random instances to the dataset.
  * 
  * @param dataset the dataset
  * @param numInstances the number of instances
  * @param random a random number generator
  */
 protected void addRandomInstances( Instances dataset, int numInstances, 
                                 Random random ) {
   int n = dataset.numAttributes();				
   double [] v = new double[ n ];
   for( int i = 0; i < numInstances; i++ ) {
     for( int j = 0; j < n; j++ ) {
       Attribute att = dataset.attribute( j );
       if( att.isNumeric() ) {
  v[ j ] = random.nextDouble();
}
else if ( att.isNominal() ) { 
  v[ j ] = random.nextInt( att.numValues() );
}
     }
     dataset.add( new DenseInstance( 1, v ) );
   }
 }
 
Example 5
Source File: Analyzer.java    From NLIWOD with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Analyzes the question and extracts all features that were set for this Analyzer.
 * @param q question string
 * @return feature vector for the input question
 */
public Instance analyze(String q) {
	Instance tmpInstance = new DenseInstance(fvWekaAttributes.size());
	
	for (IAnalyzer analyzer : analyzers) {
		//special case for PartOfSpeechTags, need to set 36 attributes
		if(analyzer instanceof PartOfSpeechTags) {
			analyzePOS(tmpInstance, (PartOfSpeechTags) analyzer, q);
			continue;
		}		
		
		//special case for Dependencies, need to set 18 attributes
		if(analyzer instanceof Dependencies) {
			analyzeDeps(tmpInstance, (Dependencies) analyzer, q);
			continue;
		}
		
		Attribute attribute = analyzer.getAttribute();
		if (attribute.isNumeric()) {
			tmpInstance.setValue(attribute, (double) analyzer.analyze(q));
		} else if (attribute.isNominal() || attribute.isString()) {
			String value = (String) analyzer.analyze(q);
			tmpInstance.setValue(attribute,value);
			tmpInstance.setDataset(null);
		}
	}
	return tmpInstance;
}
 
Example 6
Source File: BinaryItem.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Constructor.
 * 
 * @param att the attribute that backs this item.
 * @param valueIndex the index of the value for this item.
 * @throws Exception if the backing attribute is not binary or unary.
 */
public BinaryItem(Attribute att, int valueIndex) throws Exception {
  super(att, valueIndex);
  
  if (att.isNumeric() || (att.isNominal() && att.numValues() > 2)) {
    throw new Exception("BinaryItem must be constructed using a nominal attribute" +
    		" with at most 2 values!");
  }
}
 
Example 7
Source File: MekaInstance.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
private Object transformAttributeValueToData(final Attribute att) {
	if (att.isNominal() || att.isString() || att.isRelationValued() || att.isDate() || att.isRegular()) {
		return att.value((int) this.getElement().value(att));
	} else {
		return this.getElement().value(att);
	}
}
 
Example 8
Source File: MekaInstancesUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static IAttribute transformWEKAAttributeToAttributeType(final Attribute att) {
	String attributeName = att.name();
	if (att.isNumeric()) {
		return new NumericAttribute(attributeName);
	} else if (att.isNominal()) {
		List<String> domain = new LinkedList<>();
		for (int i = 0; i < att.numValues(); i++) {
			domain.add(att.value(i));
		}
		return new IntBasedCategoricalAttribute(attributeName, domain);
	}
	throw new IllegalArgumentException("Can only transform numeric or categorical attributes");
}
 
Example 9
Source File: WekaInstancesUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static IAttribute transformWEKAAttributeToAttributeType(final Attribute att) {
	String attributeName = att.name();
	if (att.isNumeric()) {
		return new NumericAttribute(attributeName);
	} else if (att.isNominal()) {
		List<String> domain = new LinkedList<>();
		for (int i = 0; i < att.numValues(); i++) {
			domain.add(att.value(i));
		}
		return new IntBasedCategoricalAttribute(attributeName, domain);
	}
	throw new IllegalArgumentException("Can only transform numeric or categorical attributes");
}
 
Example 10
Source File: WekaInstance.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
private Object transformAttributeValueToData(final Attribute att) {
	if (att.isNominal() || att.isString() || att.isRelationValued() || att.isDate() || att.isRegular()) {
		return att.value((int) this.getElement().value(att));
	} else {
		return this.getElement().value(att);
	}
}
 
Example 11
Source File: WekaPipelineValidityCheckingNodeEvaluator.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
private boolean multiValuedNominalAttributesExist() {
	Instances data = this.getData().getInstances();
	for (int i = 0; i < data.numAttributes(); i++) {
		Attribute att = data.attribute(i);
		if (att != data.classAttribute() && att.isNominal() && att.numValues() > 2) {
			return true;
		}
	}
	return false;
}
 
Example 12
Source File: NominalToBinary.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** Computes average class values for each attribute and value */
 private void computeAverageClassValues() {

   double totalCounts, sum;
   Instance instance;
   double [] counts;

   double [][] avgClassValues = new double[getInputFormat().numAttributes()][0];
   m_Indices = new int[getInputFormat().numAttributes()][0];
   for (int j = 0; j < getInputFormat().numAttributes(); j++) {
     Attribute att = getInputFormat().attribute(j);
     if (att.isNominal()) {
avgClassValues[j] = new double [att.numValues()];
counts = new double [att.numValues()];
for (int i = 0; i < getInputFormat().numInstances(); i++) {
  instance = getInputFormat().instance(i);
  if (!instance.classIsMissing() && 
      (!instance.isMissing(j))) {
    counts[(int)instance.value(j)] += instance.weight();
    avgClassValues[j][(int)instance.value(j)] += 
      instance.weight() * instance.classValue();
  }
}
sum = Utils.sum(avgClassValues[j]);
totalCounts = Utils.sum(counts);
if (Utils.gr(totalCounts, 0)) {
  for (int k = 0; k < att.numValues(); k++) {
    if (Utils.gr(counts[k], 0)) {
      avgClassValues[j][k] /= (double)counts[k];
    } else {
      avgClassValues[j][k] = sum / (double)totalCounts;
    }
  }
}
m_Indices[j] = Utils.sort(avgClassValues[j]);
     }
   }
 }
 
Example 13
Source File: RuleStats.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Compute the number of all possible conditions that could 
  * appear in a rule of a given data.  For nominal attributes,
  * it's the number of values that could appear; for numeric 
  * attributes, it's the number of values * 2, i.e. <= and >=
  * are counted as different possible conditions.
  *
  * @param data the given data
  * @return number of all conditions of the data
  */
 public static double numAllConditions(Instances data){
   double total = 0;
   Enumeration attEnum = data.enumerateAttributes();	
   while(attEnum.hasMoreElements()){
     Attribute att= (Attribute)attEnum.nextElement();
     if(att.isNominal())
total += (double)att.numValues();
     else
total += 2.0 * (double)data.numDistinctValues(att);	
   }
   return total;
 }
 
Example 14
Source File: NaiveBayesSimple.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Calculates the class membership probabilities for the given test instance.
  *
  * @param instance the instance to be classified
  * @return predicted class probability distribution
  * @exception Exception if distribution can't be computed
  */
 public double[] distributionForInstance(Instance instance) throws Exception {
   
   double [] probs = new double[instance.numClasses()];
   int attIndex;
   
   for (int j = 0; j < instance.numClasses(); j++) {
     probs[j] = 1;
     Enumeration enumAtts = instance.enumerateAttributes();
     attIndex = 0;
     while (enumAtts.hasMoreElements()) {
Attribute attribute = (Attribute) enumAtts.nextElement();
if (!instance.isMissing(attribute)) {
  if (attribute.isNominal()) {
    probs[j] *= m_Counts[j][attIndex][(int)instance.value(attribute)];
  } else {
    probs[j] *= normalDens(instance.value(attribute),
			   m_Means[j][attIndex],
			   m_Devs[j][attIndex]);}
}
attIndex++;
     }
     probs[j] *= m_Priors[j];
   }

   // Normalize probabilities
   Utils.normalize(probs);

   return probs;
 }
 
Example 15
Source File: DecisionStump.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns the value as string out of the given distribution
 * 
 * @param c the attribute to get the value for
 * @param dist the distribution to extract the value
 * @return the value
 */
protected String sourceClass(Attribute c, double []dist) {

  if (c.isNominal()) {
    return Integer.toString(Utils.maxIndex(dist));
  } else {
    return Double.toString(dist[0]);
  }
}
 
Example 16
Source File: XMLInstances.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * adds the attribute to the XML structure
  * 
  * @param parent	the parent node to add the attribute node as child
  * @param att		the attribute to add
  */
 protected void addAttribute(Element parent, Attribute att) {
   Element		node;
   Element		child;
   Element		property;
   Element		label;
   String		tmpStr;
   Enumeration		enm;
   int			i;
   
   node = m_Document.createElement(TAG_ATTRIBUTE);
   parent.appendChild(node);
   
   // XML attributes
   // name
   node.setAttribute(ATT_NAME, validContent(att.name()));
   
   // type
   switch (att.type()) {
     case Attribute.NUMERIC:
node.setAttribute(ATT_TYPE, VAL_NUMERIC);
break;

     case Attribute.DATE:
node.setAttribute(ATT_TYPE, VAL_DATE);
break;

     case Attribute.NOMINAL:
node.setAttribute(ATT_TYPE, VAL_NOMINAL);
break;

     case Attribute.STRING:
node.setAttribute(ATT_TYPE, VAL_STRING);
break;

     case Attribute.RELATIONAL:
node.setAttribute(ATT_TYPE, VAL_RELATIONAL);
break;

     default:
node.setAttribute(ATT_TYPE, "???");
   }
   
   // labels
   if (att.isNominal()) {
     child = m_Document.createElement(TAG_LABELS);
     node.appendChild(child);
     enm = att.enumerateValues();
     while (enm.hasMoreElements()) {
tmpStr = enm.nextElement().toString();
label = m_Document.createElement(TAG_LABEL);
child.appendChild(label);
label.appendChild(m_Document.createTextNode(validContent(tmpStr)));
     }
   }
   
   // format
   if (att.isDate())
     node.setAttribute(ATT_FORMAT, validContent(att.getDateFormat()));
   
   // class
   if (m_Instances.classIndex() > -1) {
     if (att == m_Instances.classAttribute())
node.setAttribute(ATT_CLASS, VAL_YES);
   }
   
   // add meta-data
   if ( (att.getMetadata() != null) && (att.getMetadata().size() > 0) ) {
     child = m_Document.createElement(TAG_METADATA);
     node.appendChild(child);
     enm = att.getMetadata().propertyNames();
     while (enm.hasMoreElements()) {
tmpStr = enm.nextElement().toString();
property = m_Document.createElement(TAG_PROPERTY);
child.appendChild(property);
property.setAttribute(ATT_NAME, validContent(tmpStr));
property.appendChild(m_Document.createTextNode(validContent(att.getMetadata().getProperty(tmpStr, ""))));
     }
   }
   
   // relational attribute?
   if (att.isRelationValued()) {
     child = m_Document.createElement(TAG_ATTRIBUTES);
     node.appendChild(child);
     for (i = 0; i < att.relation().numAttributes(); i++)
addAttribute(child, att.relation().attribute(i));
   }
 }
 
Example 17
Source File: InputMappedClassifier.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public Instance constructMappedInstance(Instance incoming) throws Exception {
  
  boolean regenerateMapping = false;
  
  if (m_inputHeader == null) {
    m_inputHeader = incoming.dataset();
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  } else if (!m_inputHeader.equalHeaders(incoming.dataset())) {
    /*System.out.println("[InputMappedClassifier] incoming data does not match " +
              "last known input format - regenerating mapping...");
    System.out.println("Incoming\n" + new Instances(incoming.dataset(), 0));
    System.out.println("Stored input header\n" + new Instances(m_inputHeader, 0));
    System.out.println("Model header\n" + new Instances(m_modelHeader, 0)); */
    m_inputHeader = incoming.dataset();
    
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  } else if (m_attributeMap == null) {
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  }
  
  if (regenerateMapping) {
    regenerateMapping();
    m_vals = null;
    
    if (!m_suppressMappingReport) {
      StringBuffer result = createMappingReport();
      System.out.println(result.toString());
    }
  }    
  
  m_vals = new double[m_modelHeader.numAttributes()];
  
  for (int i = 0; i < m_modelHeader.numAttributes(); i++) {
    if (m_attributeStatus[i] == OK) {
      Attribute modelAtt = m_modelHeader.attribute(i);
      Attribute incomingAtt = m_inputHeader.attribute(m_attributeMap[i]);
      
      if (Utils.isMissingValue(incoming.value(m_attributeMap[i]))) {
        m_vals[i] = Utils.missingValue();
        continue;
      }
      
      if (modelAtt.isNumeric()) {
        m_vals[i] = incoming.value(m_attributeMap[i]);
      } else if (modelAtt.isNominal()) {
        int mapVal = m_nominalValueMap[i][(int)incoming.value(m_attributeMap[i])];
        
        if (mapVal == NO_MATCH) {
          m_vals[i] = Utils.missingValue();
        } else {
          m_vals[i] = mapVal;
        }
      }
    } else {
      m_vals[i] = Utils.missingValue();
    }
  }
  
  Instance newInst = new DenseInstance(incoming.weight(), m_vals);
  newInst.setDataset(m_modelHeader);

  return newInst;
}
 
Example 18
Source File: NaiveBayesSimple.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Returns a description of the classifier.
  *
  * @return a description of the classifier as a string.
  */
 public String toString() {

   if (m_Instances == null) {
     return "Naive Bayes (simple): No model built yet.";
   }
   try {
     StringBuffer text = new StringBuffer("Naive Bayes (simple)");
     int attIndex;
     
     for (int i = 0; i < m_Instances.numClasses(); i++) {
text.append("\n\nClass " + m_Instances.classAttribute().value(i) 
	    + ": P(C) = " 
	    + Utils.doubleToString(m_Priors[i], 10, 8)
	    + "\n\n");
Enumeration enumAtts = m_Instances.enumerateAttributes();
attIndex = 0;
while (enumAtts.hasMoreElements()) {
  Attribute attribute = (Attribute) enumAtts.nextElement();
  text.append("Attribute " + attribute.name() + "\n");
  if (attribute.isNominal()) {
    for (int j = 0; j < attribute.numValues(); j++) {
      text.append(attribute.value(j) + "\t");
    }
    text.append("\n");
    for (int j = 0; j < attribute.numValues(); j++)
      text.append(Utils.
		  doubleToString(m_Counts[i][attIndex][j], 10, 8)
		  + "\t");
  } else {
    text.append("Mean: " + Utils.
		doubleToString(m_Means[i][attIndex], 10, 8) + "\t");
    text.append("Standard Deviation: " 
		+ Utils.doubleToString(m_Devs[i][attIndex], 10, 8));
  }
  text.append("\n\n");
  attIndex++;
}
     }
     
     return text.toString();
   } catch (Exception e) {
     return "Can't print Naive Bayes classifier!";
   }
 }
 
Example 19
Source File: DecisionStump.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Returns a description of the classifier.
  *
  * @return a description of the classifier as a string.
  */
 public String toString(){

   // only ZeroR model?
   if (m_ZeroR != null) {
     StringBuffer buf = new StringBuffer();
     buf.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
     buf.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
     buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n");
     buf.append(m_ZeroR.toString());
     return buf.toString();
   }
   
   if (m_Instances == null) {
     return "Decision Stump: No model built yet.";
   }
   try {
     StringBuffer text = new StringBuffer();
     
     text.append("Decision Stump\n\n");
     text.append("Classifications\n\n");
     Attribute att = m_Instances.attribute(m_AttIndex);
     if (att.isNominal()) {
text.append(att.name() + " = " + att.value((int)m_SplitPoint) + 
	    " : ");
text.append(printClass(m_Distribution[0]));
text.append(att.name() + " != " + att.value((int)m_SplitPoint) + 
	    " : ");
text.append(printClass(m_Distribution[1]));
     } else {
text.append(att.name() + " <= " + m_SplitPoint + " : ");
text.append(printClass(m_Distribution[0]));
text.append(att.name() + " > " + m_SplitPoint + " : ");
text.append(printClass(m_Distribution[1]));
     }
     text.append(att.name() + " is missing : ");
     text.append(printClass(m_Distribution[2]));

     if (m_Instances.classAttribute().isNominal()) {
text.append("\nClass distributions\n\n");
if (att.isNominal()) {
  text.append(att.name() + " = " + att.value((int)m_SplitPoint) + 
	      "\n");
  text.append(printDist(m_Distribution[0]));
  text.append(att.name() + " != " + att.value((int)m_SplitPoint) + 
	      "\n");
  text.append(printDist(m_Distribution[1]));
} else {
  text.append(att.name() + " <= " + m_SplitPoint + "\n");
  text.append(printDist(m_Distribution[0]));
  text.append(att.name() + " > " + m_SplitPoint + "\n");
  text.append(printDist(m_Distribution[1]));
}
text.append(att.name() + " is missing\n");
text.append(printDist(m_Distribution[2]));
     }

     return text.toString();
   } catch (Exception e) {
     return "Can't print decision stump classifier!";
   }
 }
 
Example 20
Source File: LabelWordVectors.java    From AffectiveTweets with GNU General Public License v3.0 2 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat)
		throws Exception {

	ArrayList<Attribute> atts = new ArrayList<Attribute>();

	// Adds all attributes of the inputformat
	for (int i = 0; i < inputFormat.numAttributes(); i++) {
		atts.add(inputFormat.attribute(i));
	}


	// The dictionaries of the lexicons are intialized only in the first batch
	if(!this.isFirstBatchDone())
		this.initializeDicts();



	for(ArffLexiconWordLabeller lexEval:this.lexiconLabs){
		for(Attribute att:lexEval.getAttributes()){
			if(att.isNumeric())
				atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name()));
			else if(att.isNominal()){
				List<String> attValues=new ArrayList<String>();

				for(int i=0;i<att.numValues();i++){
					attValues.add(att.value(i));
				}					

				atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name(),attValues));

			}


		}

	}




	Instances result = new Instances(inputFormat.relationName(), atts, 0);

	// set the class index
	result.setClassIndex(inputFormat.classIndex());



	return result;
}