Java Code Examples for weka.core.Instances#enumerateInstances()

The following examples show how to use weka.core.Instances#enumerateInstances() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Id3.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Computes the entropy of a dataset.
 * 
 * @param data the data for which entropy is to be computed
 * @return the entropy of the data's class distribution
 * @throws Exception if computation fails
 */
private double computeEntropy(Instances data) throws Exception {

  double [] classCounts = new double[data.numClasses()];
  Enumeration instEnum = data.enumerateInstances();
  while (instEnum.hasMoreElements()) {
    Instance inst = (Instance) instEnum.nextElement();
    classCounts[(int) inst.classValue()]++;
  }
  double entropy = 0;
  for (int j = 0; j < data.numClasses(); j++) {
    if (classCounts[j] > 0) {
      entropy -= classCounts[j] * Utils.log2(classCounts[j]);
    }
  }
  entropy /= (double) data.numInstances();
  return entropy + Utils.log2(data.numInstances());
}
 
Example 2
Source File: LocalScoreSearchAlgorithm.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
private double calcNodeScorePlain(int nNode) {
	Instances instances = m_BayesNet.m_Instances;
	ParentSet oParentSet = m_BayesNet.getParentSet(nNode);

	// determine cardinality of parent set & reserve space for frequency counts
	int nCardinality = oParentSet.getCardinalityOfParents();
	int numValues = instances.attribute(nNode).numValues();
	int[] nCounts = new int[nCardinality * numValues];

	// initialize (don't need this?)
	for (int iParent = 0; iParent < nCardinality * numValues; iParent++) {
		nCounts[iParent] = 0;
	}

	// estimate distributions
	Enumeration enumInsts = instances.enumerateInstances();

	while (enumInsts.hasMoreElements()) {
		Instance instance = (Instance) enumInsts.nextElement();

		// updateClassifier;
		double iCPT = 0;

		for (int iParent = 0; iParent < oParentSet.getNrOfParents(); iParent++) {
			int nParent = oParentSet.getParent(iParent);

			iCPT = iCPT * instances.attribute(nParent).numValues() + instance.value(nParent);
		}

		nCounts[numValues * ((int) iCPT) + (int) instance.value(nNode)]++;
	}

	return calcScoreOfCounts(nCounts, nCardinality, numValues, instances);
}
 
Example 3
Source File: BestConf.java    From bestconf with Apache License 2.0 5 votes vote down vote up
public static void testCOMT2() throws Exception{
	BestConf bestconf = new BestConf();
	Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff");
	trainingSet.setClassIndex(trainingSet.numAttributes()-1);
	
	Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false);
	samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes());
	samplePoints.setClassIndex(samplePoints.numAttributes()-1);
	
	COMT2 comt = new COMT2(samplePoints, COMT2Iteration);
	
	comt.buildClassifier(trainingSet);
	
	Evaluation eval = new Evaluation(trainingSet);
	eval.evaluateModel(comt, trainingSet);
	System.err.println(eval.toSummaryString());
	
	Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance());
	Instances bestInstances = new Instances(trainingSet,2);
	bestInstances.add(best);
	DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances);
	
	//now we output the training set with the class value updated as the predicted value
	Instances output = new Instances(trainingSet, trainingSet.numInstances());
	Enumeration<Instance> enu = trainingSet.enumerateInstances();
	while(enu.hasMoreElements()){
		Instance ins = enu.nextElement();
		double[] values = ins.toDoubleArray();
		values[values.length-1] = comt.classifyInstance(ins);
		output.add(ins.copy(values));
	}
	DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output);
}
 
Example 4
Source File: FarthestFirstClusterer.java    From mzmine2 with GNU General Public License v2.0 5 votes vote down vote up
@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

  List<Integer> clusters = new ArrayList<Integer>();
  String[] options = new String[2];
  FarthestFirst clusterer = new FarthestFirst();

  int numberOfGroups =
      parameters.getParameter(FarthestFirstClustererParameters.numberOfGroups).getValue();
  options[0] = "-N";
  options[1] = String.valueOf(numberOfGroups);

  try {
    clusterer.setOptions(options);
    clusterer.buildClusterer(dataset);
    Enumeration<?> e = dataset.enumerateInstances();
    while (e.hasMoreElements()) {
      clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
    }
    ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
        parameters.getParameter(EMClustererParameters.visualization).getValue());
    return result;
  } catch (Exception ex) {
    logger.log(Level.SEVERE, null, ex);
    return null;
  }
}
 
Example 5
Source File: EMClusterer.java    From mzmine2 with GNU General Public License v2.0 5 votes vote down vote up
@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

  List<Integer> clusters = new ArrayList<Integer>();
  String[] options = new String[2];
  EM clusterer = new EM();

  int numberOfIterations =
      parameters.getParameter(EMClustererParameters.numberOfIterations).getValue();
  options[0] = "-I";
  options[1] = String.valueOf(numberOfIterations);

  try {
    clusterer.setOptions(options);
    clusterer.buildClusterer(dataset);
    Enumeration<?> e = dataset.enumerateInstances();
    while (e.hasMoreElements()) {
      clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
    }
    ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
        parameters.getParameter(EMClustererParameters.visualization).getValue());
    return result;

  } catch (Exception ex) {
    logger.log(Level.SEVERE, null, ex);
    return null;
  }
}
 
Example 6
Source File: COMT2.java    From bestconf with Apache License 2.0 5 votes vote down vote up
private static double computeOmegaDelta(M5P model, M5P modelPi, Instances omega) throws Exception{
	double retval = 0., y;
	Enumeration<Instance> enu = omega.enumerateInstances();
	int idxClass = omega.classIndex();
	Instance ins;
	while(enu.hasMoreElements()){
		ins = enu.nextElement();
		y = ins.value(idxClass);
		retval += Math.pow(y-model.classifyInstance(ins), 2)-Math.pow(y-modelPi.classifyInstance(ins), 2);
	}
	return retval;
}
 
Example 7
Source File: Prism.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Does E contain any examples in the class C?
  *
  * @param E the instances to be checked
  * @param C the class
  * @return true if there are any instances of class C
  * @throws Exception if something goes wrong
  */
 private static boolean contains(Instances E, int C) throws Exception {

   Enumeration enu = E.enumerateInstances();
   while (enu.hasMoreElements()) {
     if ((int) ((Instance) enu.nextElement()).classValue() == C) {
return true;
     }
   }
   return false;
 }
 
Example 8
Source File: Prism.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Returns the set of instances that are not covered by this rule.
    *
    * @param data the instances to be checked
    * @return the instances not covered
    */
   public Instances notCoveredBy(Instances data) {

     Instances r = new Instances(data, data.numInstances());
     Enumeration enu = data.enumerateInstances();
     while (enu.hasMoreElements()) {
Instance i = (Instance) enu.nextElement();
if (resultRule(i) == -1) {
  r.add(i);
}
     }
     r.compactify();
     return r;
   }
 
Example 9
Source File: Prism.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Returns the set of instances that are covered by this rule.
    *
    * @param data the instances to be checked
    * @return the instances covered
    */
   public Instances coveredBy(Instances data) {

     Instances r = new Instances(data, data.numInstances());
     Enumeration enu = data.enumerateInstances();
     while (enu.hasMoreElements()) {
Instance i = (Instance) enu.nextElement();
if (resultRule(i) != -1) {
  r.add(i);
}
     }
     r.compactify();
     return r;
   }
 
Example 10
Source File: OneR.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Create a rule branching on this nominal attribute.
  *
  * @param attr the attribute to branch on
  * @param data the data to be used for creating the rule
  * @param missingValueCounts to be filled in
  * @return the generated rule
  * @throws Exception if the rule can't be built successfully
  */
 public OneRRule newNominalRule(Attribute attr, Instances data,
                                int[] missingValueCounts) throws Exception {

   // ... create arrays to hold the counts
   int[][] counts = new int [attr.numValues()]
                            [data.classAttribute().numValues()];
     
   // ... calculate the counts
   Enumeration enu = data.enumerateInstances();
   while (enu.hasMoreElements()) {
     Instance i = (Instance) enu.nextElement();
     if (i.isMissing(attr)) {
missingValueCounts[(int) i.classValue()]++; 
     } else {
counts[(int) i.value(attr)][(int) i.classValue()]++;
     }
   }

   OneRRule r = new OneRRule(data, attr); // create a new rule
   for (int value = 0; value < attr.numValues(); value++) {
     int best = Utils.maxIndex(counts[value]);
     r.m_classifications[value] = best;
     r.m_correct += counts[value][best];
   }
   return r;
 }
 
Example 11
Source File: EMClusterer.java    From mzmine3 with GNU General Public License v2.0 5 votes vote down vote up
@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

  List<Integer> clusters = new ArrayList<Integer>();
  String[] options = new String[2];
  EM clusterer = new EM();

  int numberOfIterations =
      parameters.getParameter(EMClustererParameters.numberOfIterations).getValue();
  options[0] = "-I";
  options[1] = String.valueOf(numberOfIterations);

  try {
    clusterer.setOptions(options);
    clusterer.buildClusterer(dataset);
    Enumeration<?> e = dataset.enumerateInstances();
    while (e.hasMoreElements()) {
      clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
    }
    ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
        parameters.getParameter(EMClustererParameters.visualization).getValue());
    return result;

  } catch (Exception ex) {
    logger.log(Level.SEVERE, null, ex);
    return null;
  }
}
 
Example 12
Source File: ADTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Simultanously sum the weights of all attribute values for all instances.
 *
 * @param instances the instances to get the weights from 
 * @param attIndex index of the attribute to be evaluated
 * @return a double array containing the weight of each attribute value
 */    
private double[] attributeValueWeights(Instances instances, int attIndex)
{
  
  double[] weights = new double[instances.attribute(attIndex).numValues()];
  for(int i = 0; i < weights.length; i++) weights[i] = 0.0;

  for (Enumeration e = instances.enumerateInstances(); e.hasMoreElements(); ) {
    Instance inst = (Instance) e.nextElement();
    if (!inst.isMissing(attIndex)) weights[(int)inst.value(attIndex)] += inst.weight();
  }
  return weights;
}
 
Example 13
Source File: ADTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Sets up the tree ready to be trained, using two-class optimized method.
  *
  * @param instances the instances to train the tree with
  * @exception Exception if training data is unsuitable
  */
 public void initClassifier(Instances instances) throws Exception {

   // clear stats
   m_nodesExpanded = 0;
   m_examplesCounted = 0;
   m_lastAddedSplitNum = 0;

   // prepare the random generator
   m_random = new Random(m_randomSeed);

   // create training set
   m_trainInstances = new Instances(instances);

   // create positive/negative subsets
   m_posTrainInstances = new ReferenceInstances(m_trainInstances,
					 m_trainInstances.numInstances());
   m_negTrainInstances = new ReferenceInstances(m_trainInstances,
					 m_trainInstances.numInstances());
   for (Enumeration e = m_trainInstances.enumerateInstances(); e.hasMoreElements(); ) {
     Instance inst = (Instance) e.nextElement();
     if ((int) inst.classValue() == 0)
m_negTrainInstances.addReference(inst); // belongs in negative class
     else
m_posTrainInstances.addReference(inst); // belongs in positive class
   }
   m_posTrainInstances.compactify();
   m_negTrainInstances.compactify();

   // create the root prediction node
   double rootPredictionValue = calcPredictionValue(m_posTrainInstances,
					     m_negTrainInstances);
   m_root = new PredictionNode(rootPredictionValue);

   // pre-adjust weights
   updateWeights(m_posTrainInstances, m_negTrainInstances, rootPredictionValue);
   
   // pre-calculate what we can
   generateAttributeIndicesSingle();
 }
 
Example 14
Source File: Distribution.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Creates a distribution with only one bag according
 * to instances in source.
 *
 * @exception Exception if something goes wrong
 */
public Distribution(Instances source) throws Exception {
  
  m_perClassPerBag = new double [1][0];
  m_perBag = new double [1];
  totaL = 0;
  m_perClass = new double [source.numClasses()];
  m_perClassPerBag[0] = new double [source.numClasses()];
  Enumeration enu = source.enumerateInstances();
  while (enu.hasMoreElements())
    add(0,(Instance) enu.nextElement());
}
 
Example 15
Source File: WekaNeurophSample.java    From NeurophFramework with Apache License 2.0 5 votes vote down vote up
/**
 * Prints Weka data set
 *
 * @param wekaDataset Instances Weka data set
 */
private static void printDataSet(Instances wekaDataset) {
    System.out.println("Weka dataset");
    Enumeration en = wekaDataset.enumerateInstances();
    while (en.hasMoreElements()) {
        Instance instance = (Instance) en.nextElement();
        double[] values = instance.toDoubleArray();
        System.out.println(Arrays.toString(values));
        System.out.println(instance.stringValue(instance.classIndex()));
    }
}
 
Example 16
Source File: SimpleKMeansClusterer.java    From mzmine3 with GNU General Public License v2.0 5 votes vote down vote up
@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

  List<Integer> clusters = new ArrayList<Integer>();
  String[] options = new String[2];
  SimpleKMeans clusterer = new SimpleKMeans();

  int numberOfGroups =
      parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue();
  options[0] = "-N";
  options[1] = String.valueOf(numberOfGroups);

  try {
    clusterer.setOptions(options);
    clusterer.buildClusterer(dataset);
    Enumeration<?> e = dataset.enumerateInstances();
    while (e.hasMoreElements()) {
      clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
    }
    ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
        parameters.getParameter(EMClustererParameters.visualization).getValue());
    return result;

  } catch (Exception ex) {
    logger.log(Level.SEVERE, null, ex);
    return null;
  }
}
 
Example 17
Source File: Id3.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Method for building an Id3 tree.
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

  // Check if no instances have reached this node.
  if (data.numInstances() == 0) {
    m_Attribute = null;
    m_ClassValue = Utils.missingValue();
    m_Distribution = new double[data.numClasses()];
    return;
  }

  // Compute attribute with maximum information gain.
  double[] infoGains = new double[data.numAttributes()];
  Enumeration attEnum = data.enumerateAttributes();
  while (attEnum.hasMoreElements()) {
    Attribute att = (Attribute) attEnum.nextElement();
    infoGains[att.index()] = computeInfoGain(data, att);
  }
  m_Attribute = data.attribute(Utils.maxIndex(infoGains));
  
  // Make leaf if information gain is zero. 
  // Otherwise create successors.
  if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
    m_Attribute = null;
    m_Distribution = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
      m_Distribution[(int) inst.classValue()]++;
    }
    Utils.normalize(m_Distribution);
    m_ClassValue = Utils.maxIndex(m_Distribution);
    m_ClassAttribute = data.classAttribute();
  } else {
    Instances[] splitData = splitData(data, m_Attribute);
    m_Successors = new Id3[m_Attribute.numValues()];
    for (int j = 0; j < m_Attribute.numValues(); j++) {
      m_Successors[j] = new Id3();
      m_Successors[j].makeTree(splitData[j]);
    }
  }
}
 
Example 18
Source File: ADTree.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Recursive function that carries out search for the best test (splitter) to add to
  * this part of the tree, by aiming to minimize the Z value. Performs Z-pure cutoff to
  * reduce search space.
  *
  * @param currentNode the root of the subtree to be searched, and the current node 
  * being considered as parent of a new split
  * @param posInstances the positive-class instances that apply at this node
  * @param negInstances the negative-class instances that apply at this node
  * @exception Exception if search fails
  */
 private void searchForBestTestSingle(PredictionNode currentNode,
			       Instances posInstances, Instances negInstances)
   throws Exception {

   // don't investigate pure or empty nodes any further
   if (posInstances.numInstances() == 0 || negInstances.numInstances() == 0) return;

   // do z-pure cutoff
   if (calcZpure(posInstances, negInstances) >= m_search_smallestZ) return;

   // keep stats
   m_nodesExpanded++;
   m_examplesCounted += posInstances.numInstances() + negInstances.numInstances();

   // evaluate static splitters (nominal)
   for (int i=0; i<m_nominalAttIndices.length; i++)
     evaluateNominalSplitSingle(m_nominalAttIndices[i], currentNode,
			 posInstances, negInstances);

   // evaluate dynamic splitters (numeric)
   if (m_numericAttIndices.length > 0) {

     // merge the two sets of instances into one
     Instances allInstances = new Instances(posInstances);
     for (Enumeration e = negInstances.enumerateInstances(); e.hasMoreElements(); )
allInstances.add((Instance) e.nextElement());
   
     // use method of finding the optimal Z split-point
     for (int i=0; i<m_numericAttIndices.length; i++)
evaluateNumericSplitSingle(m_numericAttIndices[i], currentNode,
			   posInstances, negInstances, allInstances);
   }

   if (currentNode.getChildren().size() == 0) return;

   // keep searching
   switch (m_searchPath) {
   case SEARCHPATH_ALL:
     goDownAllPathsSingle(currentNode, posInstances, negInstances);
     break;
   case SEARCHPATH_HEAVIEST: 
     goDownHeaviestPathSingle(currentNode, posInstances, negInstances);
     break;
   case SEARCHPATH_ZPURE: 
     goDownZpurePathSingle(currentNode, posInstances, negInstances);
     break;
   case SEARCHPATH_RANDOM: 
     goDownRandomPathSingle(currentNode, posInstances, negInstances);
     break;
   }
 }
 
Example 19
Source File: ConfigSampler.java    From bestconf with Apache License 2.0 4 votes vote down vote up
private static ArrayList<Attribute> scaleDownMindists(Instances previousSet, Instance center){
	ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
	int attNum = center.numAttributes();
	
	int pos = previousSet.attribute(PerformanceAttName).index();
	
	//traverse each dimension
	Enumeration<Instance> enu;
	double minDis;
	for(int i=0;i<attNum;i++){
		if(i==pos)
			continue;
		
		enu = previousSet.enumerateInstances();
		minDis = Double.MAX_VALUE;
		
		while(enu.hasMoreElements()){
			Instance ins = enu.nextElement();
			if(!ins.equals(center))
				minDis = Math.min((double)((int)(Math.abs(ins.value(i)-center.value(i))*1000))/1000.0, minDis);
		}
		
		//now we set the range
		Properties p1 = new Properties();
		double upper = center.value(i)+minDis, lower=center.value(i)-minDis;
		
		TreeSet<Double> detourSet = new TreeSet<Double>();
		detourSet.add(upper);
		detourSet.add(lower);
		detourSet.add(previousSet.attribute(i).getUpperNumericBound());
		detourSet.add(previousSet.attribute(i).getLowerNumericBound());
		switch(detourSet.size()){
		case 1:
			upper=lower=detourSet.first();
			break;
		case 2:
			upper = detourSet.last();
			lower = detourSet.first();
			break;
		case 3:
			upper=lower=detourSet.higher(detourSet.first());
			break;
		default://case 4:
			upper=detourSet.lower(detourSet.last());
			lower=detourSet.higher(detourSet.first());
			break;
		}
		
		p1.setProperty("range", "["+String.valueOf(lower)+","+String.valueOf(upper)+"]");
		ProtectedProperties prop1 = new ProtectedProperties(p1);
		
		localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
	}
	
	return localAtts;
}
 
Example 20
Source File: WARAM.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Generates the classifier.
  *
  * @param instances set of instances serving as training data 
  * @exception Exception if the classifier has not been generated 
  * successfully
  */
 public void buildClassifier(Instances D) throws Exception {
     // swap attributes to fit MEKA
	testCapabilities(D);

	int L = D.classIndex();
	int featlength =  (D.numAttributes() -L)*2;
	int numSamples = D.numInstances();
	int classlength = L * 2;

	System.out.println("Using rho="+roa);
	if (numFeatures==-1){
	    initARAM( featlength,classlength ,roa , threshold );
		}else{
	if (featlength != numFeatures) {
		return ;

	}
	if (classlength != numClasses) {
		return ;

	}}

// Copy the instances so we don't mess up the original data.
// Function calls do not deep copy the arguments..
//Instances m_Instances = new Instances(instances);

// Use the enumeration of instances to train classifier.
// Do any sanity checks (e.g., missing attributes etc here
// before calling updateClassifier for the actual learning
Enumeration enumInsts = D.enumerateInstances();
while (enumInsts.hasMoreElements()) {
	Instance instance = (Instance) enumInsts.nextElement();
	updateClassifier(instance);
}
   System.out.println("Training done, used "+numCategories+" neurons.");

// Alternatively, you can put the training logic within this method,
// rather than updateClassifier(...). However, if you omit the 
// updateClassifier(...) method, you should remove 
// UpdateableClassifier from the class declaration above.
 }