Java Code Examples for weka.core.Instance#valueSparse()

The following examples show how to use weka.core.Instance#valueSparse() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Winnow.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/** 
  * Compute the actual prediction for prefiltered instance
  *
  * @param inst the instance for which prediction is to be computed
  * @return the prediction
  * @throws Exception if something goes wrong
  */
 private double makePrediction(Instance inst) throws Exception {

   double total = 0;

   int n1 = inst.numValues(); int classIndex = m_Train.classIndex();

   for(int i=0;i<n1;i++) {
     if(inst.index(i) != classIndex && inst.valueSparse(i)==1) {
total+=m_predPosVector[inst.index(i)];
     }
   }
   
   if(total > m_actualThreshold) {
     return(1);
   } else {
     return(0);
   }
 }
 
Example 2
Source File: SPegasos.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
protected static double dotProd(Instance inst1, double[] weights, int classIndex) {
  double result = 0;

  int n1 = inst1.numValues();
  int n2 = weights.length - 1; 

  for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
    int ind1 = inst1.index(p1);
    int ind2 = p2;
    if (ind1 == ind2) {
      if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
        result += inst1.valueSparse(p1) * weights[p2];
      }
      p1++;
      p2++;
    } else if (ind1 > ind2) {
      p2++;
    } else {
      p1++;
    }
  }
  return (result);
}
 
Example 3
Source File: NaiveBayesMultinomial.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * log(N!) + (for all the words)(log(Pi^ni) - log(ni!))
  *  
  *  where 
  *      N is the total number of words
  *      Pi is the probability of obtaining word i
  *      ni is the number of times the word at index i occurs in the document
  *
  * @param inst       The instance to be classified
  * @param classIndex The index of the class we are calculating the probability with respect to
  *
  * @return The log of the probability of the document occuring given the class
  */
   
 private double probOfDocGivenClass(Instance inst, int classIndex)
 {
   double answer = 0;
   //double totalWords = 0; //no need as we are not calculating the factorial at all.

   double freqOfWordInDoc;  //should be double
   for(int i = 0; i<inst.numValues(); i++)
     if(inst.index(i) != inst.classIndex())
{
  freqOfWordInDoc = inst.valueSparse(i);
  //totalWords += freqOfWordInDoc;
  answer += (freqOfWordInDoc * m_probOfWordGivenClass[classIndex][inst.index(i)] 
	     ); //- lnFactorial(freqOfWordInDoc));
}

   //answer += lnFactorial(totalWords);//The factorial terms don't make 
   //any difference to the classifier's
   //accuracy, so not needed.

   return answer;
 }
 
Example 4
Source File: SGD.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
protected static double dotProd(Instance inst1, double[] weights,
    int classIndex) {
  double result = 0;

  int n1 = inst1.numValues();
  int n2 = weights.length - 1;

  for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
    int ind1 = inst1.index(p1);
    int ind2 = p2;
    if (ind1 == ind2) {
      if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
        result += inst1.valueSparse(p1) * weights[p2];
      }
      p1++;
      p2++;
    } else if (ind1 > ind2) {
      p2++;
    } else {
      p1++;
    }
  }
  return (result);
}
 
Example 5
Source File: RandomProjection.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * computes one random projection for a given instance (skip missing values)
 *
 * @param rpIndex     offset the new random projection attribute
 * @param classIndex  classIndex of the input instance
 * @param instance    the instance to convert
 * @return    the random sum
 */

protected double computeRandomProjection(int rpIndex, int classIndex, Instance instance) {

  double sum = 0.0;
  for(int i = 0; i < instance.numValues(); i++) {
    int index = instance.index(i);
    if (index != classIndex) {
      double value = instance.valueSparse(i);
      if (!Utils.isMissingValue(value)) {
        sum += m_rmatrix[rpIndex][index] * value;
      }
    }
  }
  return sum;
}
 
Example 6
Source File: DMNBtext.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Calculates the class membership probabilities for the given test
 * instance.
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 * @exception Exception if there is a problem generating the prediction
 */
public double getLogProbForTargetClass(Instance ins) throws Exception {

  double probLog = m_classRatio;
  for (int a = 0; a < ins.numValues(); a++) {
    if (ins.index(a) != m_classIndex )
      {

        if (!m_MultinomialWord) {
          if (ins.valueSparse(a) > 0) {
            probLog += m_coefficient[ins.index(a)] -
              m_wordRatio;
          }
        } else {
          probLog += ins.valueSparse(a) *
            (m_coefficient[ins.index(a)] - m_wordRatio);
        }
      }
  }
  return probLog;
}
 
Example 7
Source File: CachedKernel.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Calculates a dot product between two instances
  * 
  * @param inst1	the first instance
  * @param inst2	the second instance
  * @return 		the dot product of the two instances.
  * @throws Exception	if an error occurs
  */
 protected final double dotProd(Instance inst1, Instance inst2)
   throws Exception {

   double result = 0;

   // we can do a fast dot product
   int n1 = inst1.numValues();
   int n2 = inst2.numValues();
   int classIndex = m_data.classIndex();
   for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
     int ind1 = inst1.index(p1);
     int ind2 = inst2.index(p2);
     if (ind1 == ind2) {
if (ind1 != classIndex) {
  result += inst1.valueSparse(p1) * inst2.valueSparse(p2);
}
p1++;
p2++;
     } else if (ind1 > ind2) {
p2++;
     } else {
p1++;
     }
   }
   return (result);
 }
 
Example 8
Source File: Winnow.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Actual update routine for prefiltered instances
  *
  * @param inst the instance to update the classifier with
  * @throws Exception if something goes wrong
  */
 private void actualUpdateClassifier(Instance inst) throws Exception {
   
   double posmultiplier;

   if (!inst.classIsMissing()) {
     double prediction = makePrediction(inst);
  
     if (prediction != inst.classValue()) {
m_Mistakes++;

if(prediction == 0) {
  /* false neg: promote */
  posmultiplier=m_Alpha;
} else {
  /* false pos: demote */
  posmultiplier=m_Beta;
}
int n1 = inst.numValues(); int classIndex = m_Train.classIndex();
for(int l = 0 ; l < n1 ; l++) {
  if(inst.index(l) != classIndex && inst.valueSparse(l)==1) {
    m_predPosVector[inst.index(l)]*=posmultiplier;
  }
}
//Utils.normalize(m_predPosVector);
     }
   }
   else {
     System.out.println("CLASS MISSING");
   }
 }
 
Example 9
Source File: VotedPerceptron.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * Computes the inner product of two instances
 * 
 * @param i1 first instance
 * @param i2 second instance
 * @return the inner product
 * @throws Exception if computation fails
 */
private double innerProduct(Instance i1, Instance i2) throws Exception {

  // we can do a fast dot product
  double result = 0;
  int n1 = i1.numValues(); int n2 = i2.numValues();
  int classIndex = m_Train.classIndex();
  for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
      int ind1 = i1.index(p1);
      int ind2 = i2.index(p2);
      if (ind1 == ind2) {
          if (ind1 != classIndex) {
              result += i1.valueSparse(p1) *
                        i2.valueSparse(p2);
          }
          p1++; p2++;
      } else if (ind1 > ind2) {
          p2++;
      } else {
          p1++;
      }
  }
  result += 1.0;
  
  if (m_Exponent != 1) {
    return Math.pow(result, m_Exponent);
  } else {
    return result;
  }
}
 
Example 10
Source File: Utils.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Converts a set of training instances to a DataSet. Assumes that the instances have been
 * suitably preprocessed - i.e. missing values replaced and nominals converted to binary/numeric.
 * Also assumes that the class index has been set
 *
 * @param insts the instances to convert
 * @return a DataSet
 */
public static DataSet instancesToDataSet(Instances insts) {
  INDArray data = Nd4j.zeros(insts.numInstances(), insts.numAttributes() - 1);
  INDArray outcomes = Nd4j.zeros(insts.numInstances(), insts.numClasses());

  for (int i = 0; i < insts.numInstances(); i++) {
    double[] independent = new double[insts.numAttributes() - 1];
    double[] dependent = new double[insts.numClasses()];
    Instance current = insts.instance(i);
    for (int j = 0; j < current.numValues(); j++) {
      int index = current.index(j);
      double value = current.valueSparse(j);

      if (index < insts.classIndex()) {
        independent[index] = value;
      } else if (index > insts.classIndex()) {
        // Shift by -1, since the class is left out from the feature matrix and put into a separate
        // outcomes matrix
        independent[index - 1] = value;
      }
    }

    // Set class values
    if (insts.numClasses() > 1) { // Classification
      final int oneHotIdx = (int) current.classValue();
      dependent[oneHotIdx] = 1.0;
    } else { // Regression (currently only single class)
      dependent[0] = current.classValue();
    }

    INDArray row = Nd4j.create(independent);
    data.putRow(i, row);
    outcomes.putRow(i, Nd4j.create(dependent));
  }
  return new DataSet(data, outcomes);
}
 
Example 11
Source File: ComplementNaiveBayes.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Classifies a given instance. <p>
    *
    * The classification rule is: <br>
    *     MinC(forAllWords(ti*Wci)) <br>
    *      where <br>
    *         ti is the frequency of word i in the given instance <br>
    *         Wci is the weight of word i in Class c. <p>
    *
    * For more information see section 4.4 of the paper mentioned above
    * in the classifiers description.
    *
    * @param instance the instance to classify
    * @return the index of the class the instance is most likely to belong.
    * @throws Exception if the classifier has not been built yet.
    */
   public double classifyInstance(Instance instance) throws Exception {

       if(wordWeights==null)
           throw new Exception("Error. The classifier has not been built "+
                               "properly.");
       
       double [] valueForClass = new double[numClasses];
double sumOfClassValues=0;

for(int c=0; c<numClasses; c++) {
    double sumOfWordValues=0;
    for(int w=0; w<instance.numValues(); w++) {
               if(instance.index(w)!=instance.classIndex()) {
                   double freqOfWordInDoc = instance.valueSparse(w);
                   sumOfWordValues += freqOfWordInDoc * 
                                 wordWeights[c][instance.index(w)];
               }
    }
    //valueForClass[c] = Math.log(probOfClass[c]) - sumOfWordValues;
    valueForClass[c] = sumOfWordValues;
    sumOfClassValues += valueForClass[c];
}

       int minidx=0;
for(int i=0; i<numClasses; i++)
    if(valueForClass[i]<valueForClass[minidx])
	minidx = i;

return minidx;
   }
 
Example 12
Source File: DMNBtext.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public void updateClassifier(Instance ins) throws
  Exception {
  //c=0 is 1, which is the target class, and c=1 is the rest
  int classIndex = 0;
  if (ins.value(ins.classIndex()) != m_targetClass)
    classIndex = 1;
  double prob = 1 -
    distributionForInstance(ins)[classIndex];


  double weight = prob * ins.weight();

  for (int a = 0; a < ins.numValues(); a++) {
    if (ins.index(a) != m_classIndex )
      {

        if (!m_MultinomialWord) {
          if (ins.valueSparse(a) > 0) {
            m_wordsPerClass[classIndex] +=
              weight;
            m_perWordPerClass[classIndex][ins.
                                          index(a)] +=
              weight;
          }
        } else {
          double t = ins.valueSparse(a) * weight;
          m_wordsPerClass[classIndex] += t;
          m_perWordPerClass[classIndex][ins.index(a)] += t;
        }
        //update coefficient
        m_coefficient[ins.index(a)] = Math.log(m_perWordPerClass[0][
                                                                    ins.index(a)] /
                                               m_perWordPerClass[1][ins.index(a)]);
      }
  }
  m_wordRatio = Math.log(m_wordsPerClass[0] / m_wordsPerClass[1]);
  m_classDistribution[classIndex] += weight;
  m_classRatio = Math.log(m_classDistribution[0] /
                          m_classDistribution[1]);
}
 
Example 13
Source File: NaiveBayesMultinomialUpdateable.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance 	the instance to be classified
  * @return 		predicted class probability distribution
  * @throws Exception 	if there is a problem generating the prediction
  */
 public double[] distributionForInstance(Instance instance) throws Exception {
   double[] probOfClassGivenDoc = new double[m_numClasses];

   // calculate the array of log(Pr[D|C])
   double[] logDocGivenClass = new double[m_numClasses];
   for (int c = 0; c < m_numClasses; c++) {
     logDocGivenClass[c] += Math.log(m_probOfClass[c]);
     int allWords = 0;
     for (int i = 0; i < instance.numValues(); i++) {
if (instance.index(i) == instance.classIndex())
  continue;
double frequencies = instance.valueSparse(i);
allWords += frequencies;
logDocGivenClass[c] += frequencies *
Math.log(m_probOfWordGivenClass[c][instance.index(i)]);
     }
     logDocGivenClass[c] -= allWords * Math.log(m_wordsPerClass[c]);
   }

   double max = logDocGivenClass[Utils.maxIndex(logDocGivenClass)];
   for (int i = 0; i < m_numClasses; i++)
     probOfClassGivenDoc[i] = Math.exp(logDocGivenClass[i] - max);

   Utils.normalize(probOfClassGivenDoc);

   return probOfClassGivenDoc;
 }
 
Example 14
Source File: Utils.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Converts a set of training instances to a DataSet. Assumes that the instances have been
 * suitably preprocessed - i.e. missing values replaced and nominals converted to binary/numeric.
 * Also assumes that the class index has been set
 *
 * @param insts the instances to convert
 * @return a DataSet
 */
public static DataSet instancesToDataSet(Instances insts) {
  INDArray data = Nd4j.zeros(insts.numInstances(), insts.numAttributes() - 1);
  INDArray outcomes = Nd4j.zeros(insts.numInstances(), insts.numClasses());

  for (int i = 0; i < insts.numInstances(); i++) {
    double[] independent = new double[insts.numAttributes() - 1];
    double[] dependent = new double[insts.numClasses()];
    Instance current = insts.instance(i);
    for (int j = 0; j < current.numValues(); j++) {
      int index = current.index(j);
      double value = current.valueSparse(j);

      if (index < insts.classIndex()) {
        independent[index] = value;
      } else if (index > insts.classIndex()) {
        // Shift by -1, since the class is left out from the feature matrix and put into a separate
        // outcomes matrix
        independent[index - 1] = value;
      }
    }

    // Set class values
    if (insts.numClasses() > 1) { // Classification
      final int oneHotIdx = (int) current.classValue();
      dependent[oneHotIdx] = 1.0;
    } else { // Regression (currently only single class)
      dependent[0] = current.classValue();
    }

    INDArray row = Nd4j.create(independent);
    data.putRow(i, row);
    outcomes.putRow(i, Nd4j.create(dependent));
  }
  return new DataSet(data, outcomes);
}
 
Example 15
Source File: ReliefFAttributeEval.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Updates the minimum and maximum values for all the attributes
 * based on a new instance.
 *
 * @param instance the new instance
 */
private void updateMinMax (Instance instance) {
  //    for (int j = 0; j < m_numAttribs; j++) {
  try {
    for (int j = 0; j < instance.numValues(); j++) {
      if ((instance.attributeSparse(j).isNumeric()) && 
          (!instance.isMissingSparse(j))) {
        if (Double.isNaN(m_minArray[instance.index(j)])) {
          m_minArray[instance.index(j)] = instance.valueSparse(j);
          m_maxArray[instance.index(j)] = instance.valueSparse(j);
        }
      else {
        if (instance.valueSparse(j) < m_minArray[instance.index(j)]) {
          m_minArray[instance.index(j)] = instance.valueSparse(j);
        }
        else {
          if (instance.valueSparse(j) > m_maxArray[instance.index(j)]) {
            m_maxArray[instance.index(j)] = instance.valueSparse(j);
          }
        }
      }
      }
    }
  } catch (Exception ex) {
    System.err.println(ex);
    ex.printStackTrace();
  }
}
 
Example 16
Source File: sIB.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Transpose the document-term matrix to term-document matrix
  * @param data instances with document-term info
  * @return a term-document matrix transposed from the input dataset
  */
 private Matrix getTransposedMatrix(Instances data) {
   double[][] temp = new double[data.numAttributes()][data.numInstances()];
   for (int i = 0; i < data.numInstances(); i++) {
     Instance inst = data.instance(i);
     for (int v = 0; v < inst.numValues(); v++) {
temp[inst.index(v)][i] = inst.valueSparse(v);
     }
   }
   Matrix My_x = new Matrix(temp);
   return My_x;
 }
 
Example 17
Source File: LibSVM.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * returns an instance into a sparse libsvm array
  * 
  * @param instance	the instance to work on
  * @return		the libsvm array
  * @throws Exception	if setup of array fails
  */
 protected Object instanceToArray(Instance instance) throws Exception {
   int		index;
   int		count;
   int 	i;
   Object 	result;
   
   // determine number of non-zero attributes
   /*for (i = 0; i < instance.numAttributes(); i++) {
     if (i == instance.classIndex())
continue;
     if (instance.value(i) != 0)
count++;
   } */
   count = 0;
   for (i = 0; i < instance.numValues(); i++) {
     if (instance.index(i) == instance.classIndex())
       continue;
     if (instance.valueSparse(i) != 0)
       count++;
   }

   // fill array
   /* result = Array.newInstance(Class.forName(CLASS_SVMNODE), count);
   index  = 0;
   for (i = 0; i < instance.numAttributes(); i++) {
     if (i == instance.classIndex())
continue;
     if (instance.value(i) == 0)
continue;

     Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance());
     setField(Array.get(result, index), "index", new Integer(i + 1));
     setField(Array.get(result, index), "value", new Double(instance.value(i)));
     index++;
   } */
   
   result = Array.newInstance(Class.forName(CLASS_SVMNODE), count);
   index  = 0;
   for (i = 0; i < instance.numValues(); i++) {
     
     int idx = instance.index(i);
     if (idx == instance.classIndex())
       continue;
     if (instance.valueSparse(i) == 0)
       continue;

     Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance());
     setField(Array.get(result, index), "index", new Integer(idx + 1));
     setField(Array.get(result, index), "value", new Double(instance.valueSparse(i)));
     index++;
   }
   
   return result;
 }
 
Example 18
Source File: SGD.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Updates the classifier with the given instance.
 * 
 * @param instance the new training instance to include in the model
 * @param filter true if the instance should pass through any of the filters
 *          set up in buildClassifier(). When batch training buildClassifier()
 *          already batch filters all training instances so don't need to
 *          filter them again here.
 * @exception Exception if the instance could not be incorporated in the
 *              model.
 */
protected void updateClassifier(Instance instance, boolean filter)
    throws Exception {

  if (!instance.classIsMissing()) {
    if (filter) {
      if (m_replaceMissing != null) {
        m_replaceMissing.input(instance);
        instance = m_replaceMissing.output();
      }

      if (m_nominalToBinary != null) {
        m_nominalToBinary.input(instance);
        instance = m_nominalToBinary.output();
      }

      if (m_normalize != null) {
        m_normalize.input(instance);
        instance = m_normalize.output();
      }
    }

    double wx = dotProd(instance, m_weights, instance.classIndex());

    double y;
    double z;
    if (instance.classAttribute().isNominal()) {
      y = (instance.classValue() == 0) ? -1 : 1;
      z = y * (wx + m_weights[m_weights.length - 1]);
    } else {
      y = instance.classValue();
      z = y - (wx + m_weights[m_weights.length - 1]);
      y = 1;
    }

    // Compute multiplier for weight decay
    double multiplier = 1.0;
    if (m_numInstances == 0) {
      multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
    } else {
      multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
    }
    for (int i = 0; i < m_weights.length - 1; i++) {
      m_weights[i] *= multiplier;
    }

    // Only need to do the following if the loss is non-zero
    // if (m_loss != HINGE || (z < 1)) {
    if (m_loss == SQUAREDLOSS || m_loss == LOGLOSS || m_loss == HUBER
        || (m_loss == HINGE && (z < 1))
        || (m_loss == EPSILON_INSENSITIVE && Math.abs(z) > m_epsilon)) {

      // Compute Factor for updates
      double factor = m_learningRate * y * dloss(z);

      // Update coefficients for attributes
      int n1 = instance.numValues();
      for (int p1 = 0; p1 < n1; p1++) {
        int indS = instance.index(p1);
        if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
          m_weights[indS] += factor * instance.valueSparse(p1);
        }
      }

      // update the bias
      m_weights[m_weights.length - 1] += factor;
    }
    m_t++;
  }
}
 
Example 19
Source File: SPegasos.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Updates the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model 
 * @exception Exception if the instance could not be incorporated in
 * the model.
 */
public void updateClassifier(Instance instance) throws Exception {
  if (!instance.classIsMissing()) {
    
    double learningRate = 1.0 / (m_lambda * m_t);
    //double scale = 1.0 - learningRate * m_lambda;
    double scale = 1.0 - 1.0 / m_t;
    double y = (instance.classValue() == 0) ? -1 : 1;
    double wx = dotProd(instance, m_weights, instance.classIndex());
    double z = y * (wx + m_weights[m_weights.length - 1]);        
    
    for (int j = 0; j < m_weights.length - 1; j++) {
      if (j != instance.classIndex()) {
        m_weights[j] *= scale;
      }
    }
    
    if (m_loss == LOGLOSS || (z < 1)) {
      double loss = dloss(z);
      int n1 = instance.numValues();
      for (int p1 = 0; p1 < n1; p1++) {
        int indS = instance.index(p1);
        if (indS != instance.classIndex() &&  !instance.isMissingSparse(p1)) {
          double m = learningRate * loss * (instance.valueSparse(p1) * y);
          m_weights[indS] += m;
        }
      }
      
      // update the bias
      m_weights[m_weights.length - 1] += learningRate * loss * y;
    }
    
    double norm = 0;
    for (int k = 0; k < m_weights.length - 1; k++) {
      if (k != instance.classIndex()) {
        norm += (m_weights[k] * m_weights[k]);
      }
    }
    
    double scale2 = Math.min(1.0, (1.0 / (m_lambda * norm)));
    if (scale2 < 1.0) {
      scale2 = Math.sqrt(scale2);
      for (int j = 0; j < m_weights.length - 1; j++) {
        if (j != instance.classIndex()) {
          m_weights[j] *= scale2;
        }
      }
    }
    m_t++;
  }
}
 
Example 20
Source File: ItemSet.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Checks if an instance contains an item set.
 * 
 * @param instance the instance to be tested
 * @return true if the given instance contains this item set
 */
public boolean containedByTreatZeroAsMissing(Instance instance) {

  if (instance instanceof weka.core.SparseInstance) {
    int numInstVals = instance.numValues();
    int numItemSetVals = m_items.length;

    for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) {
      int instIndex = Integer.MAX_VALUE;
      if (p1 < numInstVals) {
        instIndex = instance.index(p1);
      }
      int itemIndex = p2;

      if (m_items[itemIndex] > -1) {
        if (itemIndex != instIndex) {
          return false;
        } else {
          if (instance.isMissingSparse(p1)) {
            return false;
          }
          if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
            return false;
          }
        }

        p1++;
        p2++;
      } else {
        if (itemIndex < instIndex) {
          p2++;
        } else if (itemIndex == instIndex) {
          p2++;
          p1++;
        }
      }
    }
  } else {
    for (int i = 0; i < instance.numAttributes(); i++)
      if (m_items[i] > -1) {
        if (instance.isMissing(i) || (int) instance.value(i) == 0)
          return false;
        if (m_items[i] != (int) instance.value(i))
          return false;
      }
  }

  return true;
}