Java Code Examples for weka.core.Instance#setWeight()

The following examples show how to use weka.core.Instance#setWeight() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GlobalScoreSearchAlgorithm.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * LeaveOneOutCV returns the accuracy calculated using Leave One Out
 * cross validation. The dataset used is m_Instances associated with
 * the Bayes Network.
 * @param bayesNet : Bayes Network containing structure to evaluate
 * @return accuracy (in interval 0..1) measured using leave one out cv.
 * @throws Exception passed on by updateClassifier
 */
public double leaveOneOutCV(BayesNet bayesNet) throws Exception {
	m_BayesNet = bayesNet;
	double fAccuracy = 0.0;
	double fWeight = 0.0;
	Instances instances = bayesNet.m_Instances;
	bayesNet.estimateCPTs();
	for (int iInstance = 0; iInstance < instances.numInstances(); iInstance++) {
		Instance instance = instances.instance(iInstance);
		instance.setWeight(-instance.weight());
		bayesNet.updateClassifier(instance);
		fAccuracy += accuracyIncrease(instance);
		fWeight += instance.weight();
		instance.setWeight(-instance.weight());
		bayesNet.updateClassifier(instance);
	}
	return fAccuracy / fWeight;
}
 
Example 2
Source File: PartitionMembership.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Convert a single instance over. The converted instance is added to 
 * the end of the output queue.
 *
 * @param instance the instance to convert
 * @throws Exception if something goes wrong
 */
protected void convertInstance(Instance instance) throws Exception {
  
  // Make copy and set weight to one
  Instance cp = (Instance)instance.copy();
  cp.setWeight(1.0);
  
  // Set up values
  double [] instanceVals = new double[outputFormatPeek().numAttributes()];
  double [] vals = m_partitionGenerator.getMembershipValues(cp);
  System.arraycopy(vals, 0, instanceVals, 0, vals.length);
  if (instance.classIndex() >= 0) {
    instanceVals[instanceVals.length - 1] = instance.classValue();
  }
  
  push(new SparseInstance(instance.weight(), instanceVals));
}
 
Example 3
Source File: BaggingMLUpdateable.java    From meka with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void updateClassifier(Instance x) throws Exception {

	for(int i = 0; i < m_NumIterations; i++) {
		// Oza-Bag style
		int k = poisson(1.0, random);
		if (m_BagSizePercent == 100) {
			// Train on all instances
			k = 1;
		}
		if (k > 0) {
			// Train on this instance only if k > 0
			Instance x_weighted = (Instance) x.copy();
			x_weighted.setWeight(x.weight() * (double)k);
			((UpdateableClassifier)m_Classifiers[i]).updateClassifier(x_weighted);
		}
	}
}
 
Example 4
Source File: RotationForest.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * Transforms an instance for the i-th classifier.
 *
 * @param instance the instance to be transformed
 * @param i the base classifier number
 * @return the transformed instance
 * @throws Exception if the instance can't be converted successfully 
 */
protected Instance convertInstance( Instance instance, int i ) 
throws Exception {
  Instance newInstance = new DenseInstance( m_Headers[ i ].numAttributes( ) );
  newInstance.setWeight(instance.weight());
  newInstance.setDataset( m_Headers[ i ] );
  int currentAttribute = 0;

  // Project the data for each group
  for( int j = 0; j < m_Groups[i].length; j++ ) {
    Instance auxInstance = new DenseInstance( m_Groups[i][j].length + 1 );
    int k;
    for( k = 0; k < m_Groups[i][j].length; k++ ) {
      auxInstance.setValue( k, instance.value( m_Groups[i][j][k] ) );
    }
    auxInstance.setValue( k, instance.classValue( ) );
    auxInstance.setDataset( m_ReducedHeaders[ i ][ j ] );
    m_ProjectionFilters[i][j].input( auxInstance );
    auxInstance = m_ProjectionFilters[i][j].output( );
    m_ProjectionFilters[i][j].batchFinished();
    for( int a = 0; a < auxInstance.numAttributes() - 1; a++ ) {
      newInstance.setValue( currentAttribute++, auxInstance.value( a ) );
    }
  }

  newInstance.setClassValue( instance.classValue() );
  return newInstance;
}
 
Example 5
Source File: PropositionalToMultiInstance.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * adds a new bag out of the given data and adds it to the output
 * 
 * @param input       the intput dataset
 * @param output      the dataset this bag is added to
 * @param bagInsts    the instances in this bag
 * @param bagIndex    the bagIndex of this bag
 * @param classValue  the associated class value
 * @param bagWeight   the weight of the bag
 */
protected void addBag(
    Instances input,
    Instances output,
    Instances bagInsts, 
    int bagIndex, 
    double classValue, 
    double bagWeight) {
  
  // copy strings/relational values
  for (int i = 0; i < bagInsts.numInstances(); i++) {
    RelationalLocator.copyRelationalValues(
 bagInsts.instance(i), false, 
 input, m_InputRelAtts,
 bagInsts, m_BagRelAtts);

    StringLocator.copyStringValues(
 bagInsts.instance(i), false, 
 input, m_InputStringAtts,
 bagInsts, m_BagStringAtts);
  }
  
  int value = output.attribute(1).addRelation(bagInsts);
  Instance newBag = new DenseInstance(output.numAttributes());        
  newBag.setValue(0, bagIndex);
  newBag.setValue(2, classValue);
  newBag.setValue(1, value);
  newBag.setWeight(bagWeight);
  newBag.setDataset(output);
  output.add(newBag);
}
 
Example 6
Source File: ContractRotationForest.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * Transforms an instance for the i-th classifier.
 *
 * @param instance the instance to be transformed
 * @param i the base classifier number
 * @return the transformed instance
 * @throws Exception if the instance can't be converted successfully 
 */
protected Instance convertInstance( Instance instance, int i ) 
throws Exception {
  Instance newInstance = new DenseInstance( headers.get(i).numAttributes( ) );
  newInstance.setWeight(instance.weight());
  newInstance.setDataset(headers.get(i));
  int currentAttribute = 0;

  // Project the data for each group
  int[][] g=groups.get(i);
  for( int j = 0; j < g.length; j++ ) {
    Instance auxInstance = new DenseInstance(g[j].length + 1 );
    int k;
    for( k = 0; k < g[j].length; k++ ) {
      auxInstance.setValue( k, instance.value( g[j][k] ) );
    }
    auxInstance.setValue( k, instance.classValue( ) );
    auxInstance.setDataset(reducedHeaders.get(i)[ j ] );
    Filter[] projection=projectionFilters.get(i);
    projection[j].input( auxInstance );
    auxInstance = projection[j].output( );
    projection[j].batchFinished();
    for( int a = 0; a < auxInstance.numAttributes() - 1; a++ ) {
      newInstance.setValue( currentAttribute++, auxInstance.value( a ) );
    }
  }

  newInstance.setClassValue( instance.classValue() );
  return newInstance;
}
 
Example 7
Source File: LPS.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Splits instances into subsets based on the given split.
 * 
 * @param data the data to work with
 * @return the subsets of instances
 * @throws Exception if something goes wrong
 */
protected Instances[] splitData(Instances data) throws Exception {

  // Allocate array of Instances objects
  Instances[] subsets = new Instances[m_Prop.length];
  for (int i = 0; i < m_Prop.length; i++) {
    subsets[i] = new Instances(data, data.numInstances());
  }

  // Go through the data
  for (int i = 0; i < data.numInstances(); i++) {

    // Get instance
    Instance inst = data.instance(i);

    // Does the instance have a missing value?
    if (inst.isMissing(m_Attribute)) {

      // Split instance up
      for (int k = 0; k < m_Prop.length; k++) {
        if (m_Prop[k] > 0) {
          Instance copy = (Instance) inst.copy();
          copy.setWeight(m_Prop[k] * inst.weight());
          subsets[k].add(copy);
        }
      }

      // Proceed to next instance
      continue;
    }

    // Do we have a nominal attribute?
    if (data.attribute(m_Attribute).isNominal()) {
      subsets[(int) inst.value(m_Attribute)].add(inst);

      // Proceed to next instance
      continue;
    }

    // Do we have a numeric attribute?
    if (data.attribute(m_Attribute).isNumeric()) {
      subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst);

      // Proceed to next instance
      continue;
    }

    // Else throw an exception
    throw new IllegalArgumentException("Unknown attribute type");
  }

  // Save memory
  for (int i = 0; i < m_Prop.length; i++) {
    subsets[i].compactify();
  }

  // Return the subsets
  return subsets;
}
 
Example 8
Source File: RacedIncrementalLogitBoost.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/** 
    * performs a boosting iteration, returning a new model for the committee
    * 
    * @param data the data to boost on
    * @return the new model
    * @throws Exception if anything goes wrong
    */
   protected Classifier[] boost(Instances data) throws Exception {
     
     Classifier[] newModel = AbstractClassifier.makeCopies(m_Classifier, m_NumClasses);
     
     // Create a copy of the data with the class transformed into numeric
     Instances boostData = new Instances(data);
     boostData.deleteWithMissingClass();
     int numInstances = boostData.numInstances();
     
     // Temporarily unset the class index
     int classIndex = data.classIndex();
     boostData.setClassIndex(-1);
     boostData.deleteAttributeAt(classIndex);
     boostData.insertAttributeAt(new Attribute("'pseudo class'"), classIndex);
     boostData.setClassIndex(classIndex);
     double [][] trainFs = new double [numInstances][m_NumClasses];
     double [][] trainYs = new double [numInstances][m_NumClasses];
     for (int j = 0; j < m_NumClasses; j++) {
for (int i = 0, k = 0; i < numInstances; i++, k++) {
  while (data.instance(k).classIsMissing()) k++;
  trainYs[i][j] = (data.instance(k).classValue() == j) ? 1 : 0;
}
     }
     
     // Evaluate / increment trainFs from the classifiers
     for (int x = 0; x < m_models.size(); x++) {
for (int i = 0; i < numInstances; i++) {
  double [] pred = new double [m_NumClasses];
  double predSum = 0;
  Classifier[] model = (Classifier[]) m_models.elementAt(x);
  for (int j = 0; j < m_NumClasses; j++) {
    pred[j] = model[j].classifyInstance(boostData.instance(i));
    predSum += pred[j];
  }
  predSum /= m_NumClasses;
  for (int j = 0; j < m_NumClasses; j++) {
    trainFs[i][j] += (pred[j] - predSum) * (m_NumClasses-1) 
      / m_NumClasses;
  }
}
     }

     for (int j = 0; j < m_NumClasses; j++) {

// Set instance pseudoclass and weights
for (int i = 0; i < numInstances; i++) {
  double p = RtoP(trainFs[i], j);
  Instance current = boostData.instance(i);
  double z, actual = trainYs[i][j];
  if (actual == 1) {
    z = 1.0 / p;
    if (z > Z_MAX) { // threshold
      z = Z_MAX;
    }
  } else if (actual == 0) {
    z = -1.0 / (1.0 - p);
    if (z < -Z_MAX) { // threshold
      z = -Z_MAX;
    }
  } else {
    z = (actual - p) / (p * (1 - p));
  }

  double w = (actual - p) / z;
  current.setValue(classIndex, z);
  current.setWeight(numInstances * w);
}

Instances trainData = boostData;
if (m_UseResampling) {
  double[] weights = new double[boostData.numInstances()];
  for (int kk = 0; kk < weights.length; kk++) {
    weights[kk] = boostData.instance(kk).weight();
  }
  trainData = boostData.resampleWithWeights(m_RandomInstance, 
					    weights);
}

// Build the classifier
newModel[j].buildClassifier(trainData);
     }      
     
     return newModel;
   }
 
Example 9
Source File: RandomTree.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Splits instances into subsets based on the given split.
 * 
 * @param data the data to work with
 * @return the subsets of instances
 * @throws Exception if something goes wrong
 */
protected Instances[] splitData(Instances data) throws Exception {

  // Allocate array of Instances objects
  Instances[] subsets = new Instances[m_Prop.length];
  for (int i = 0; i < m_Prop.length; i++) {
    subsets[i] = new Instances(data, data.numInstances());
  }

  // Go through the data
  for (int i = 0; i < data.numInstances(); i++) {

    // Get instance
    Instance inst = data.instance(i);

    // Does the instance have a missing value?
    if (inst.isMissing(m_Attribute)) {

      // Split instance up
      for (int k = 0; k < m_Prop.length; k++) {
        if (m_Prop[k] > 0) {
          Instance copy = (Instance) inst.copy();
          copy.setWeight(m_Prop[k] * inst.weight());
          subsets[k].add(copy);
        }
      }

      // Proceed to next instance
      continue;
    }

    // Do we have a nominal attribute?
    if (data.attribute(m_Attribute).isNominal()) {
      subsets[(int) inst.value(m_Attribute)].add(inst);

      // Proceed to next instance
      continue;
    }

    // Do we have a numeric attribute?
    if (data.attribute(m_Attribute).isNumeric()) {
      subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst);

      // Proceed to next instance
      continue;
    }

    // Else throw an exception
    throw new IllegalArgumentException("Unknown attribute type");
  }

  // Save memory
  for (int i = 0; i < m_Prop.length; i++) {
    subsets[i].compactify();
  }

  // Return the subsets
  return subsets;
}
 
Example 10
Source File: NBTreeSplit.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Creates split on enumerated attribute.
  *
  * @exception Exception if something goes wrong
  */
 private void handleEnumeratedAttribute(Instances trainInstances)
      throws Exception {

   m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights, true);
   m_c45S.buildClassifier(trainInstances);
   if (m_c45S.numSubsets() == 0) {
     return;
   }
   m_errors = 0;
   Instance instance;

   Instances [] trainingSets = new Instances [m_complexityIndex];
   for (int i = 0; i < m_complexityIndex; i++) {
     trainingSets[i] = new Instances(trainInstances, 0);
   }
   /*    m_distribution = new Distribution(m_complexityIndex,
  trainInstances.numClasses()); */
   int subset;
   for (int i = 0; i < trainInstances.numInstances(); i++) {
     instance = trainInstances.instance(i);
     subset = m_c45S.whichSubset(instance);
     if (subset > -1) {
trainingSets[subset].add((Instance)instance.copy());
     } else {
double [] weights = m_c45S.weights(instance);
for (int j = 0; j < m_complexityIndex; j++) {
  try {
    Instance temp = (Instance) instance.copy();
    if (weights.length == m_complexityIndex) {
      temp.setWeight(temp.weight() * weights[j]);
    } else {
      temp.setWeight(temp.weight() / m_complexityIndex);
    }
    trainingSets[j].add(temp);
  } catch (Exception ex) {
    ex.printStackTrace();
    System.err.println("*** "+m_complexityIndex);
    System.err.println(weights.length);
    System.exit(1);
  }
}
     }
   }

   /*    // compute weights (weights of instances per subset
   m_weights = new double [m_complexityIndex];
   for (int i = 0; i < m_complexityIndex; i++) {
     m_weights[i] = trainingSets[i].sumOfWeights();
   }
   Utils.normalize(m_weights); */

   /*
   // Only Instances with known values are relevant.
   Enumeration enu = trainInstances.enumerateInstances();
   while (enu.hasMoreElements()) {
     instance = (Instance) enu.nextElement();
     if (!instance.isMissing(m_attIndex)) {
//	m_distribution.add((int)instance.value(m_attIndex),instance);
trainingSets[(int)instances.value(m_attIndex)].add(instance);
     } else {
// add these to the error count
m_errors += instance.weight();
     }
     } */

   Random r = new Random(1);
   int minNumCount = 0;
   for (int i = 0; i < m_complexityIndex; i++) {
     if (trainingSets[i].numInstances() >= 5) {
minNumCount++;
// Discretize the sets
Discretize disc = new Discretize();
disc.setInputFormat(trainingSets[i]);
trainingSets[i] = Filter.useFilter(trainingSets[i], disc);

trainingSets[i].randomize(r);
trainingSets[i].stratify(5);
NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable();
fullModel.buildClassifier(trainingSets[i]);

// add the errors for this branch of the split
m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r);
     } else {
// if fewer than min obj then just count them as errors
for (int j = 0; j < trainingSets[i].numInstances(); j++) {
  m_errors += trainingSets[i].instance(j).weight();
}
     }
   }
   
   // Check if there are at least five instances in at least two of the subsets
   // subsets.
   if (minNumCount > 1) {
     m_numSubsets = m_complexityIndex;
   }
 }
 
Example 11
Source File: NBTreeSplit.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Creates split on numeric attribute.
  *
  * @exception Exception if something goes wrong
  */
 private void handleNumericAttribute(Instances trainInstances)
      throws Exception {

   m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights, true);
   m_c45S.buildClassifier(trainInstances);
   if (m_c45S.numSubsets() == 0) {
     return;
   }
   m_errors = 0;

   Instances [] trainingSets = new Instances [m_complexityIndex];
   trainingSets[0] = new Instances(trainInstances, 0);
   trainingSets[1] = new Instances(trainInstances, 0);
   int subset = -1;
   
   // populate the subsets
   for (int i = 0; i < trainInstances.numInstances(); i++) {
     Instance instance = trainInstances.instance(i);
     subset = m_c45S.whichSubset(instance);
     if (subset != -1) {
trainingSets[subset].add((Instance)instance.copy());
     } else {
double [] weights = m_c45S.weights(instance);
for (int j = 0; j < m_complexityIndex; j++) {
  Instance temp = (Instance)instance.copy();
  if (weights.length == m_complexityIndex) {
    temp.setWeight(temp.weight() * weights[j]);
  } else {
    temp.setWeight(temp.weight() / m_complexityIndex);
  }
  trainingSets[j].add(temp); 
}
     }
   }
   
   /*    // compute weights (weights of instances per subset
   m_weights = new double [m_complexityIndex];
   for (int i = 0; i < m_complexityIndex; i++) {
     m_weights[i] = trainingSets[i].sumOfWeights();
   }
   Utils.normalize(m_weights); */

   Random r = new Random(1);
   int minNumCount = 0;
   for (int i = 0; i < m_complexityIndex; i++) {
     if (trainingSets[i].numInstances() > 5) {
minNumCount++;
// Discretize the sets
	Discretize disc = new Discretize();
disc.setInputFormat(trainingSets[i]);
trainingSets[i] = Filter.useFilter(trainingSets[i], disc);

trainingSets[i].randomize(r);
trainingSets[i].stratify(5);
NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable();
fullModel.buildClassifier(trainingSets[i]);

// add the errors for this branch of the split
m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r);
     } else {
for (int j = 0; j < trainingSets[i].numInstances(); j++) {
  m_errors += trainingSets[i].instance(j).weight();
}
     }
   }
   
   // Check if minimum number of Instances in at least two
   // subsets.
   if (minNumCount > 1) {
     m_numSubsets = m_complexityIndex;
   }
 }
 
Example 12
Source File: DTNB.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Classifies an instance for internal leave one out cross validation
  * of feature sets
  *
  * @param instance instance to be "left out" and classified
  * @param instA feature values of the selected features for the instance
  * @return the classification of the instance
  * @throws Exception if something goes wrong
  */
 double evaluateInstanceLeaveOneOut(Instance instance, double [] instA)
 throws Exception {

   DecisionTableHashKey thekey;
   double [] tempDist;
   double [] normDist;

   thekey = new DecisionTableHashKey(instA);

   // if this one is not in the table
   if ((tempDist = (double [])m_entries.get(thekey)) == null) {
     throw new Error("This should never happen!");
   } else {
     normDist = new double [tempDist.length];
     System.arraycopy(tempDist,0,normDist,0,tempDist.length);
     normDist[(int)instance.classValue()] -= instance.weight();

     // update the table
     // first check to see if the class counts are all zero now
     boolean ok = false;
     for (int i=0;i<normDist.length;i++) {
if (Utils.gr(normDist[i],1.0)) {
  ok = true;
  break;
}
     }

     // downdate the class prior counts
     m_classPriorCounts[(int)instance.classValue()] -= 
instance.weight(); 
     double [] classPriors = m_classPriorCounts.clone();
     Utils.normalize(classPriors);
     if (!ok) { // majority class	
normDist = classPriors;
     } else {
Utils.normalize(normDist);
     }

     m_classPriorCounts[(int)instance.classValue()] += 
     instance.weight();

     if (m_NB != null){
// downdate NaiveBayes

instance.setWeight(-instance.weight());
m_NB.updateClassifier(instance);
double [] nbDist = m_NB.distributionForInstance(instance);
instance.setWeight(-instance.weight());
m_NB.updateClassifier(instance);

for (int i = 0; i < normDist.length; i++) {
  normDist[i] = (Math.log(normDist[i]) - Math.log(classPriors[i]));
  normDist[i] += Math.log(nbDist[i]);
}
normDist = Utils.logs2probs(normDist);
// Utils.normalize(normDist);
     }

     if (m_evaluationMeasure == EVAL_AUC) {
m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, instance);						
     } else {
m_evaluation.evaluateModelOnce(normDist, instance);
     }
     return Utils.maxIndex(normDist);
   }
 }
 
Example 13
Source File: PropositionalToMultiInstance.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Signify that this batch of input to the filter is finished. 
 * If the filter requires all instances prior to filtering,
 * output() may now be called to retrieve the filtered instances.
 *
 * @return true if there are instances pending output
 * @throws IllegalStateException if no input structure has been defined
 */
public boolean batchFinished() {

  if (getInputFormat() == null) {
    throw new IllegalStateException("No input instance format defined");
  }

  Instances input = getInputFormat();
  input.sort(0);   // make sure that bagID is sorted
  Instances output = getOutputFormat();
  Instances bagInsts = output.attribute(1).relation();
  Instance inst = new DenseInstance(bagInsts.numAttributes());
  inst.setDataset(bagInsts);

  double bagIndex   = input.instance(0).value(0);
  double classValue = input.instance(0).classValue(); 
  double bagWeight  = 0.0;

  // Convert pending input instances
  for(int i = 0; i < input.numInstances(); i++) {
    double currentBagIndex = input.instance(i).value(0);

    // copy the propositional instance value, except the bagIndex and the class value
    for (int j = 0; j < input.numAttributes() - 2; j++) 
      inst.setValue(j, input.instance(i).value(j + 1));
    inst.setWeight(input.instance(i).weight());

    if (currentBagIndex == bagIndex){
      bagInsts.add(inst);
      bagWeight += inst.weight();
    }
    else{
      addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);

      bagInsts   = bagInsts.stringFreeStructure();  
      bagInsts.add(inst);
      bagIndex   = currentBagIndex;
      classValue = input.instance(i).classValue();
      bagWeight  = inst.weight();
    }
  }

  // reach the last instance, create and add the last bag
  addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);

  if (getRandomize())
    output.randomize(new Random(getSeed()));
  
  for (int i = 0; i < output.numInstances(); i++)
    push(output.instance(i));
  
  // Free memory
  flushInput();

  m_NewBatch = true;
  m_FirstBatchDone = true;
  
  return (numPendingOutput() != 0);
}
 
Example 14
Source File: NSR.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
public Instances convertInstances(Instances D, int L) throws Exception {

		//Gather combinations
		HashMap<String,Integer> distinctCombinations = MLUtils.classCombinationCounts(D);
		if(getDebug())
			System.out.println("Found "+distinctCombinations.size()+" unique combinations");

		//Prune combinations
		MLUtils.pruneCountHashMap(distinctCombinations,m_P);
		if(getDebug())
			System.out.println("Pruned to "+distinctCombinations.size()+" with P="+m_P);

		// Remove all class attributes
		Instances D_ = MLUtils.deleteAttributesAt(new Instances(D),MLUtils.gen_indices(L));
		// Add a new class attribute
		D_.insertAttributeAt(new Attribute("CLASS", new ArrayList(distinctCombinations.keySet())),0); // create the class attribute
		D_.setClassIndex(0);

		//Add class values
		for (int i = 0; i < D.numInstances(); i++) {
			String y = MLUtils.encodeValue(MLUtils.toIntArray(D.instance(i),L));
			// add it
			if(distinctCombinations.containsKey(y)) 	//if its class value exists
				D_.instance(i).setClassValue(y);
			// decomp
			else if(m_N > 0) { 
				String d_subsets[] = SuperLabelUtils.getTopNSubsets(y, distinctCombinations, m_N);
				for (String s : d_subsets) {
					int w = distinctCombinations.get(s);
					Instance copy = (Instance)(D_.instance(i)).copy();
					copy.setClassValue(s);
					copy.setWeight(1.0 / d_subsets.length);
					D_.add(copy);
				}
			}
		}

		// remove with missing class
		D_.deleteWithMissingClass();

		// keep the header of new dataset for classification
		m_InstancesTemplate = new Instances(D_, 0);

		if (getDebug())
			System.out.println(""+D_);

		return D_;
	}