Java Code Examples for weka.core.Attribute#numValues()

The following examples show how to use weka.core.Attribute#numValues() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: EditableBayesNet.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/** change the name of a value of a node
 * @param nTargetNode index of the node to set name for
 * @param sValue current name of the value
 * @param sNewValue new name of the value
 */
public void renameNodeValue(int nTargetNode, String sValue, String sNewValue) {
	// update undo stack
	if (m_bNeedsUndoAction) {
		addUndoAction(new RenameValueAction(nTargetNode, sValue, sNewValue));
	}
	Attribute att = m_Instances.attribute(nTargetNode);
	int nCardinality = att.numValues();
	FastVector values = new FastVector(nCardinality);
	for (int iValue = 0; iValue < nCardinality; iValue++) {
		if (att.value(iValue).equals(sValue)) {
			values.addElement(sNewValue);
		} else {
			values.addElement(att.value(iValue));
		}
	}
	replaceAtt(nTargetNode, att.name(), values);
}
 
Example 2
Source File: GeneralizedSequentialPatterns.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Extracts the data sequences out of the original data set according to 
  * their sequence id attribute, which is removed after extraction.
  * 
  * @param originalDataSet 	the original data set
  * @param dataSeqID		the squence ID to use
  * @return 			set of distinct data sequences
  */
 protected FastVector extractDataSequences (Instances originalDataSet, int dataSeqID) {
   FastVector dataSequences = new FastVector();
   int firstInstance = 0;
   int lastInstance = 0;
   Attribute seqIDAttribute = originalDataSet.attribute(dataSeqID);

   for (int i = 0; i < seqIDAttribute.numValues(); i++) {
     double sequenceID = originalDataSet.instance(firstInstance).value(dataSeqID);
     while (lastInstance < originalDataSet.numInstances()
  && sequenceID == originalDataSet.instance(lastInstance).value(dataSeqID)) {
lastInstance++;
     }
     Instances dataSequence = new Instances(originalDataSet, firstInstance, (lastInstance)-firstInstance);
     dataSequence.deleteAttributeAt(dataSeqID);
     dataSequences.addElement(dataSequence);
     firstInstance = lastInstance;
   }
   return dataSequences;
 }
 
Example 3
Source File: ClusteredShapeletTransform.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 *
 * @param inputFormat - the format of the input data
 * @return a new Instances object in the desired output format
 * @throws Exception - if all required attributes of the filter are not initialised correctly
 */
@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception{

    int s=st.getNumberOfShapelets();
    if(s < 1 || s<noClust){
        throw new Exception("ShapeletFilter not initialised correctly - please specify a value of k that is greater than or equal to 1. You entered s="+s+" num clusters ="+noClust);
    }

    ArrayList<Attribute> atts = new ArrayList<>();
    String name;
    for(int i = 0; i < noClust; i++){
        name = "CShapelet_" + i;
        atts.add(new Attribute(name));
    }
    Attribute target = inputFormat.attribute(inputFormat.classIndex());

    ArrayList<String> vals = new ArrayList(target.numValues());
    for(int i = 0; i < target.numValues(); i++){
        vals.add(target.value(i));
    }
    atts.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
    Instances result = new Instances("CShapelets" + inputFormat.relationName(), atts, inputFormat.numInstances());
    result.setClassIndex(result.numAttributes() - 1);
    return result;
}
 
Example 4
Source File: WekaPipelineValidityCheckingNodeEvaluator.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
private boolean multiValuedNominalAttributesExist() {
	Instances data = this.getData().getInstances();
	for (int i = 0; i < data.numAttributes(); i++) {
		Attribute att = data.attribute(i);
		if (att != data.classAttribute() && att.isNominal() && att.numValues() > 2) {
			return true;
		}
	}
	return false;
}
 
Example 5
Source File: Utils.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Copies the attribute name and values of a given nominal attribute
 * @param oldAttribute attribute to copy
 * @return duplicated nominal attribute
 */
public static Attribute copyNominalAttribute(Attribute oldAttribute) {
  String[] classValues = new String[oldAttribute.numValues()];
  for (int classValI = 0; classValI < oldAttribute.numValues(); classValI++) {
    classValues[classValI] = oldAttribute.value(classValI);
  }
  return new Attribute(oldAttribute.name(), Arrays.asList(classValues));
}
 
Example 6
Source File: PowerCepstrum.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    //Set up instances size and format.

    int length=(fftFilter.findLength(inputFormat));
    length/=2;
    ArrayList<Attribute> atts=new ArrayList<>();
    String name;
    for(int i=0;i<length;i++){
        name = "PowerSpectrum_"+i;
        atts.add(new Attribute(name));
    }

    if(inputFormat.classIndex()>=0){	//Classification set, set class
        //Get the class values as a fast vector
        Attribute target =inputFormat.attribute(inputFormat.classIndex());

        ArrayList<String> vals=new ArrayList<>(target.numValues());
        for(int i=0;i<target.numValues();i++)
            vals.add(target.value(i));
        atts.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(),vals));
    }

    Instances result = new Instances("Cepstrum"+inputFormat.relationName(),atts,inputFormat.numInstances());
    if(inputFormat.classIndex()>=0)
        result.setClassIndex(result.numAttributes()-1);

    return result;
}
 
Example 7
Source File: PAA.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat)
        throws Exception {
    
    //Check all attributes are real valued, otherwise throw exception
    for (int i = 0; i < inputFormat.numAttributes(); i++) {
        if (inputFormat.classIndex() != i) {
            if (!inputFormat.attribute(i).isNumeric()) {
                throw new Exception("Non numeric attribute not allowed for PAA");
            }
        }
    }
    
    //Set up instances size and format. 
    ArrayList<Attribute> attributes = new ArrayList<>();
    
    for (int i = 0; i < numIntervals; i++)
        attributes.add(new Attribute("PAAInterval_" + i));

    if (inputFormat.classIndex() >= 0) {	//Classification set, set class 
        //Get the class values as a fast vector			
        Attribute target = inputFormat.attribute(inputFormat.classIndex());

        ArrayList<String> vals = new ArrayList<>(target.numValues());
        for (int i = 0; i < target.numValues(); i++) {
            vals.add(target.value(i));
        }
        attributes.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
    }
    
    Instances result = new Instances("PAA" + inputFormat.relationName(), attributes, inputFormat.numInstances());
    if (inputFormat.classIndex() >= 0) {
        result.setClassIndex(result.numAttributes() - 1);
    }
    return result;
}
 
Example 8
Source File: ACF.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
     * Sets up the header info for the transformed series
     * @param inputFormat
     * @return
     * @throws Exception 
     */
    @Override
    protected Instances determineOutputFormat(Instances inputFormat)  throws Exception {
    //Check capabilities for the filter. Can only handle real valued, no missing.       
        getCapabilities().testWithFail(inputFormat);
   
        seriesLength=inputFormat.numAttributes();	
        if(inputFormat.classIndex()>=0)
            seriesLength--;
//Cannot include the final endTerms correlations, since they are based on too little data and hence unreliable.
        if(maxLag>seriesLength-endTerms)
            maxLag=seriesLength-endTerms;
        if(maxLag<0)
            maxLag=inputFormat.numAttributes()-1;
        //Set up instances size and format. 
        ArrayList<Attribute> atts=new ArrayList<>();
        String name;
        for(int i=1;i<=maxLag;i++){
            name = "ACF_"+i;
            atts.add(new Attribute(name));
        }
        if(inputFormat.classIndex()>=0){
            //Get the class values as an ArrayList
            Attribute target =inputFormat.attribute(inputFormat.classIndex());

            ArrayList<String> vals=new ArrayList<>(target.numValues());
            for(int i=0;i<target.numValues();i++)
                    vals.add(target.value(i)+"");
            atts.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(),vals));
        }	
        Instances result = new Instances("ACF"+inputFormat.relationName(),atts,inputFormat.numInstances());
        if(inputFormat.classIndex()>=0){
            result.setClassIndex(result.numAttributes()-1);
        }
        return result;
    }
 
Example 9
Source File: BagOfPatterns.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat)
        throws Exception {
    
    //Check all attributes are real valued, otherwise throw exception
    for (int i = 0; i < inputFormat.numAttributes(); i++) {
        if (inputFormat.classIndex() != i) {
            if (!inputFormat.attribute(i).isNumeric()) {
                throw new Exception("Non numeric attribute not allowed for BoP conversion");
            }
        }
    }

    ArrayList<Attribute> attributes = new ArrayList<>();
    for (String word : dictionary) 
        attributes.add(new Attribute(word));
    
    Instances result = new Instances("BagOfPatterns_" + inputFormat.relationName(), attributes, inputFormat.numInstances());
    
    if (inputFormat.classIndex() >= 0) {	//Classification set, set class 
        //Get the class values as a fast vector			
        Attribute target = inputFormat.attribute(inputFormat.classIndex());

        ArrayList<String> vals = new ArrayList<>(target.numValues());
        for (int i = 0; i < target.numValues(); i++) {
            vals.add(target.value(i));
        }
        
        result.insertAttributeAt(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals), result.numAttributes());
        result.setClassIndex(result.numAttributes() - 1);
    }
 
    return result;
}
 
Example 10
Source File: Test.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Gives a string representation of the test, starting from the comparison
 * symbol.
 *
 * @return a string representing the test
 */   
private String testComparisonString() {
  Attribute att = m_Dataset.attribute(m_AttIndex);
  if (att.isNumeric()) {
    return ((m_Not ? ">= " : "< ") + Utils.doubleToString(m_Split,3));
  }
  else {
    if (att.numValues() != 2) 
      return ((m_Not ? "!= " : "= ") + att.value((int)m_Split));
    else return ("= " 
                 + (m_Not ?
    att.value((int)m_Split == 0 ? 1 : 0) : att.value((int)m_Split)));
  }
}
 
Example 11
Source File: PowerSpectrum.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat)
throws Exception {
    //Set up instances size and format.
    int length=(fftFilter.findLength(inputFormat));
    length/=2;
ArrayList<Attribute> atts=new ArrayList<>();
    String name;
    for(int i=0;i<length;i++){
            name = "PowerSpectrum_"+i;
            atts.add(new Attribute(name));
    }

    if(inputFormat.classIndex()>=0){	//Classification set, set class 
            //Get the class values as a fast vector			
            Attribute target =inputFormat.attribute(inputFormat.classIndex());

            ArrayList<String> vals=new ArrayList(target.numValues());
            for(int i=0;i<target.numValues();i++)
                    vals.add(target.value(i));
            atts.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(),vals));
    }	
    Instances result = new Instances("PowerSpectrum"+inputFormat.relationName(),atts,inputFormat.numInstances());
    if(inputFormat.classIndex()>=0)
            result.setClassIndex(result.numAttributes()-1);

    return result;
}
 
Example 12
Source File: MekaInstancesUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static IAttribute transformWEKAAttributeToAttributeType(final Attribute att) {
	String attributeName = att.name();
	if (att.isNumeric()) {
		return new NumericAttribute(attributeName);
	} else if (att.isNominal()) {
		List<String> domain = new LinkedList<>();
		for (int i = 0; i < att.numValues(); i++) {
			domain.add(att.value(i));
		}
		return new IntBasedCategoricalAttribute(attributeName, domain);
	}
	throw new IllegalArgumentException("Can only transform numeric or categorical attributes");
}
 
Example 13
Source File: TwoWayNominalSplit.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Gets the string describing the comparision the split depends on for a particular
 * branch. i.e. the right hand side of the description of the split.
 *
 * @param branchNum the branch of the split
 * @param dataset the dataset that the split is based on
 * @return a string describing the comparison
 */
public String comparisonString(int branchNum, Instances dataset) {

  Attribute att = dataset.attribute(attIndex);
  if (att.numValues() != 2) 
    return ((branchNum == 0 ? "= " : "!= ") + att.value(trueSplitValue));
  else return ("= " + (branchNum == 0 ?
	 att.value(trueSplitValue) :
	 att.value(trueSplitValue == 0 ? 1 : 0)));
}
 
Example 14
Source File: HoeffdingTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns class probabilities for an instance.
 * 
 * @param instance the instance to compute the distribution for
 * @return the class probabilities
 * @throws Exception if distribution can't be computed successfully
 */
@Override
public double[] distributionForInstance(Instance inst) throws Exception {

  Attribute classAtt = inst.classAttribute();
  double[] pred = new double[classAtt.numValues()];

  if (m_root != null) {
    LeafNode l = m_root.leafForInstance(inst, null, null);
    HNode actualNode = l.m_theNode;

    if (actualNode == null) {
      actualNode = l.m_parentNode;
    }

    pred = actualNode.getDistribution(inst, classAtt);

  } else {
    // all class values equally likely
    for (int i = 0; i < classAtt.numValues(); i++) {
      pred[i] = 1;
    }
    Utils.normalize(pred);
  }

  // Utils.normalize(pred);
  return pred;
}
 
Example 15
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static List<String> getClassesDeclaredInDataset(final Instances data) {
	List<String> classes = new ArrayList<>();
	Attribute classAttribute = data.classAttribute();
	for (int i = 0; i < classAttribute.numValues(); i++) {
		classes.add(classAttribute.value(i));
	}
	return classes;
}
 
Example 16
Source File: EditableBayesNet.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** change the name of a node
 * @param nTargetNode index of the node to set name for
 * @param sName new name to assign
 */
public void setNodeName(int nTargetNode, String sName) {
	// update undo stack
	if (m_bNeedsUndoAction) {
		addUndoAction(new RenameAction(nTargetNode, getNodeName(nTargetNode), sName));
	}
	Attribute att = m_Instances.attribute(nTargetNode);
	int nCardinality = att.numValues();
	FastVector values = new FastVector(nCardinality);
	for (int iValue = 0; iValue < nCardinality; iValue++) {
		values.addElement(att.value(iValue));
	}
	replaceAtt(nTargetNode, sName, values);
}
 
Example 17
Source File: NaiveBayesSimple.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Returns a description of the classifier.
  *
  * @return a description of the classifier as a string.
  */
 public String toString() {

   if (m_Instances == null) {
     return "Naive Bayes (simple): No model built yet.";
   }
   try {
     StringBuffer text = new StringBuffer("Naive Bayes (simple)");
     int attIndex;
     
     for (int i = 0; i < m_Instances.numClasses(); i++) {
text.append("\n\nClass " + m_Instances.classAttribute().value(i) 
	    + ": P(C) = " 
	    + Utils.doubleToString(m_Priors[i], 10, 8)
	    + "\n\n");
Enumeration enumAtts = m_Instances.enumerateAttributes();
attIndex = 0;
while (enumAtts.hasMoreElements()) {
  Attribute attribute = (Attribute) enumAtts.nextElement();
  text.append("Attribute " + attribute.name() + "\n");
  if (attribute.isNominal()) {
    for (int j = 0; j < attribute.numValues(); j++) {
      text.append(attribute.value(j) + "\t");
    }
    text.append("\n");
    for (int j = 0; j < attribute.numValues(); j++)
      text.append(Utils.
		  doubleToString(m_Counts[i][attIndex][j], 10, 8)
		  + "\t");
  } else {
    text.append("Mean: " + Utils.
		doubleToString(m_Means[i][attIndex], 10, 8) + "\t");
    text.append("Standard Deviation: " 
		+ Utils.doubleToString(m_Devs[i][attIndex], 10, 8));
  }
  text.append("\n\n");
  attIndex++;
}
     }
     
     return text.toString();
   } catch (Exception e) {
     return "Can't print Naive Bayes classifier!";
   }
 }
 
Example 18
Source File: InputMappedClassifier.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
private boolean regenerateMapping() throws Exception {
  loadModel(m_modelPath); // load a model (if specified)
  
  if (m_modelHeader == null) {
    return false;
  }
  
  m_attributeMap = new int[m_modelHeader.numAttributes()];
  m_attributeStatus = new int[m_modelHeader.numAttributes()];
  m_nominalValueMap = new int[m_modelHeader.numAttributes()][];
  
  for (int i = 0; i < m_modelHeader.numAttributes(); i++) {
    String modelAttName = m_modelHeader.attribute(i).name();
    m_attributeStatus[i] = NO_MATCH;
    
    for (int j = 0; j < m_inputHeader.numAttributes(); j++) {
      String incomingAttName = m_inputHeader.attribute(j).name();
      if (stringMatch(modelAttName, incomingAttName)) {
        m_attributeMap[i] = j;
        m_attributeStatus[i] = OK;
        
        Attribute modelAtt = m_modelHeader.attribute(i);
        Attribute incomingAtt = m_inputHeader.attribute(j);
        
        // check types
        if (modelAtt.type() != incomingAtt.type()) {
          m_attributeStatus[i] = TYPE_MISMATCH;
          break;
        }          
        
        // now check nominal values (number, names...)
        if (modelAtt.numValues() != incomingAtt.numValues()) {
          System.out.println("[InputMappedClassifier] Warning: incoming nominal " +
          		"attribute " + incomingAttName + " does not have the same " +
          				"number of values as model attribute "
          		+ modelAttName);
          
        }
        
        if (modelAtt.isNominal() && incomingAtt.isNominal()) {
          int[] valuesMap = new int[incomingAtt.numValues()];
          for (int k = 0; k < incomingAtt.numValues(); k++) {
            String incomingNomValue = incomingAtt.value(k);
            int indexInModel = modelAtt.indexOfValue(incomingNomValue);
            if (indexInModel < 0) {
              valuesMap[k] = NO_MATCH;
            } else {
              valuesMap[k] = indexInModel;
            }
          }
          m_nominalValueMap[i] = valuesMap;
        }
      }
    }
  }

  
  return true;
}
 
Example 19
Source File: LHSSampler.java    From bestconf with Apache License 2.0 4 votes vote down vote up
/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
Example 20
Source File: LabelWordVectors.java    From AffectiveTweets with GNU General Public License v3.0 2 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat)
		throws Exception {

	ArrayList<Attribute> atts = new ArrayList<Attribute>();

	// Adds all attributes of the inputformat
	for (int i = 0; i < inputFormat.numAttributes(); i++) {
		atts.add(inputFormat.attribute(i));
	}


	// The dictionaries of the lexicons are intialized only in the first batch
	if(!this.isFirstBatchDone())
		this.initializeDicts();



	for(ArffLexiconWordLabeller lexEval:this.lexiconLabs){
		for(Attribute att:lexEval.getAttributes()){
			if(att.isNumeric())
				atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name()));
			else if(att.isNominal()){
				List<String> attValues=new ArrayList<String>();

				for(int i=0;i<att.numValues();i++){
					attValues.add(att.value(i));
				}					

				atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name(),attValues));

			}


		}

	}




	Instances result = new Instances(inputFormat.relationName(), atts, 0);

	// set the class index
	result.setClassIndex(inputFormat.classIndex());



	return result;
}