Java Code Examples for weka.core.Instance#numValues()

The following examples show how to use weka.core.Instance#numValues() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RegOptimizer.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * 
  * @param inst
  * @return
  * @throws Exception
  */
 public double SVMOutput(Instance inst) throws Exception {
   
   double result = -m_b;
   // Is the machine linear?
   if (m_weights != null) {
     // Is weight vector stored in sparse format?
     for (int i = 0; i < inst.numValues(); i++) {
if (inst.index(i) != m_classIndex) {
  result += m_weights[inst.index(i)] * inst.valueSparse(i);
}
     }
   } else {
     for (int i = m_supportVectors.getNext(-1); i != -1; i = m_supportVectors.getNext(i)) {
result += (m_alpha[i] - m_alphaStar[i]) * m_kernel.eval(-1, i, inst);
     }
   }
   return result;
 }
 
Example 2
Source File: CLOPE.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
      * Delete instance from cluster
      */
     public void DeleteInstance(Instance inst) {
if (inst instanceof SparseInstance) {
  //   System.out.println("DeleteSparceInstance");
  for (int i = 0; i < inst.numValues(); i++) {
    DeleteItem(inst.index(i));
  }
} else {
  for (int i = 0; i <= inst.numAttributes() - 1; i++) {

    if (!inst.isMissing(i)) {
      DeleteItem(i + inst.toString(i));
    }
  }
}
this.W = this.occ.size();
this.N--;
     }
 
Example 3
Source File: Winnow.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/** 
  * Compute our prediction (Balanced) for prefiltered instance 
  *
  * @param inst the instance for which prediction is to be computed
  * @return the prediction
  * @throws Exception if something goes wrong
  */
 private double makePredictionBalanced(Instance inst) throws Exception {
   double total=0;

   int n1 = inst.numValues(); int classIndex = m_Train.classIndex();
   for(int i=0;i<n1;i++) {
     if(inst.index(i) != classIndex && inst.valueSparse(i)==1) {
total+=(m_predPosVector[inst.index(i)]-m_predNegVector[inst.index(i)]);
     }
   }
    
   if(total > m_actualThreshold) {
     return(1);
   } else {
     return(0);
   }
 }
 
Example 4
Source File: SGD.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
protected static double dotProd(Instance inst1, double[] weights,
    int classIndex) {
  double result = 0;

  int n1 = inst1.numValues();
  int n2 = weights.length - 1;

  for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
    int ind1 = inst1.index(p1);
    int ind2 = p2;
    if (ind1 == ind2) {
      if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
        result += inst1.valueSparse(p1) * weights[p2];
      }
      p1++;
      p2++;
    } else if (ind1 > ind2) {
      p2++;
    } else {
      p1++;
    }
  }
  return (result);
}
 
Example 5
Source File: sIB.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Compute the JS divergence between an instance and a cluster, used for test data
  * @param inst instance to be clustered
  * @param t index of the cluster
  * @param pi1
  * @param pi2
  * @return the JS divergence
  */
 private double JS(Instance inst, int t, double pi1, double pi2) {
   if (Math.min(pi1, pi2) <= 0) {
     System.out.format("Warning: zero or negative weights in JS calculation! (pi1 %s, pi2 %s)\n", pi1, pi2);
     return 0;
   }
   double sum = Utils.sum(inst.toDoubleArray());
   double kl1 = 0.0, kl2 = 0.0, tmp = 0.0;    
   for (int i = 0; i < inst.numValues(); i++) {
     tmp = inst.valueSparse(i) / sum;      
     if(tmp != 0) {
kl1 += tmp * Math.log(tmp / (tmp * pi1 + pi2 * bestT.Py_t.get(inst.index(i), t)));
     }
   }
   for (int i = 0; i < m_numAttributes; i++) {
     if ((tmp = bestT.Py_t.get(i, t)) != 0) {
kl2 += tmp * Math.log(tmp / (inst.value(i) * pi1  / sum + pi2 * tmp));
     }
   }    
   return pi1 * kl1 + pi2 * kl2;
 }
 
Example 6
Source File: DMNBtext.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Calculates the class membership probabilities for the given test
 * instance.
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 * @exception Exception if there is a problem generating the prediction
 */
public double getLogProbForTargetClass(Instance ins) throws Exception {

  double probLog = m_classRatio;
  for (int a = 0; a < ins.numValues(); a++) {
    if (ins.index(a) != m_classIndex )
      {

        if (!m_MultinomialWord) {
          if (ins.valueSparse(a) > 0) {
            probLog += m_coefficient[ins.index(a)] -
              m_wordRatio;
          }
        } else {
          probLog += ins.valueSparse(a) *
            (m_coefficient[ins.index(a)] - m_wordRatio);
        }
      }
  }
  return probLog;
}
 
Example 7
Source File: FPGrowth.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
private void processSingleton(Instance current, 
    ArrayList<BinaryItem> singletons) throws Exception {
  
  if (current instanceof SparseInstance) {
    for (int j = 0; j < current.numValues(); j++) {
      int attIndex = current.index(j);
      singletons.get(attIndex).increaseFrequency();
    }
  } else {
    for (int j = 0; j < current.numAttributes(); j++) {
      if (!current.isMissing(j)) {
        if (current.attribute(j).numValues() == 1 
            || current.value(j) == m_positiveIndex - 1) {
          singletons.get(j).increaseFrequency();
        }
      }
    }
  }
}
 
Example 8
Source File: RandomProjection.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * computes one random projection for a given instance (skip missing values)
 *
 * @param rpIndex     offset the new random projection attribute
 * @param classIndex  classIndex of the input instance
 * @param instance    the instance to convert
 * @return    the random sum
 */

protected double computeRandomProjection(int rpIndex, int classIndex, Instance instance) {

  double sum = 0.0;
  for(int i = 0; i < instance.numValues(); i++) {
    int index = instance.index(i);
    if (index != classIndex) {
      double value = instance.valueSparse(i);
      if (!Utils.isMissingValue(value)) {
        sum += m_rmatrix[rpIndex][index] * value;
      }
    }
  }
  return sum;
}
 
Example 9
Source File: NaiveBayesMultinomial.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * log(N!) + (for all the words)(log(Pi^ni) - log(ni!))
  *  
  *  where 
  *      N is the total number of words
  *      Pi is the probability of obtaining word i
  *      ni is the number of times the word at index i occurs in the document
  *
  * @param inst       The instance to be classified
  * @param classIndex The index of the class we are calculating the probability with respect to
  *
  * @return The log of the probability of the document occuring given the class
  */
   
 private double probOfDocGivenClass(Instance inst, int classIndex)
 {
   double answer = 0;
   //double totalWords = 0; //no need as we are not calculating the factorial at all.

   double freqOfWordInDoc;  //should be double
   for(int i = 0; i<inst.numValues(); i++)
     if(inst.index(i) != inst.classIndex())
{
  freqOfWordInDoc = inst.valueSparse(i);
  //totalWords += freqOfWordInDoc;
  answer += (freqOfWordInDoc * m_probOfWordGivenClass[classIndex][inst.index(i)] 
	     ); //- lnFactorial(freqOfWordInDoc));
}

   //answer += lnFactorial(totalWords);//The factorial terms don't make 
   //any difference to the classifier's
   //accuracy, so not needed.

   return answer;
 }
 
Example 10
Source File: NaiveBayesMultinomialUpdateable.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Updates the classifier with the given instance.
  *
  * @param instance 	the new training instance to include in the model
  * @throws Exception 	if the instance could not be incorporated in
  * 			the model.
  */
 public void updateClassifier(Instance instance) throws Exception {
   int classIndex = (int) instance.value(instance.classIndex());
   m_probOfClass[classIndex] += instance.weight();

   for (int a = 0; a < instance.numValues(); a++) {
     if (instance.index(a) == instance.classIndex() ||
  instance.isMissing(a))
continue;

     double numOccurences = instance.valueSparse(a) * instance.weight();
     /*if (numOccurences < 0)
throw new Exception(
    "Numeric attribute values must all be greater or equal to zero."); */
     m_wordsPerClass[classIndex] += numOccurences;
     if (m_wordsPerClass[classIndex] < 0) {
       throw new Exception("Can't have a negative number of words for class " 
           + (classIndex + 1));
     }
     m_probOfWordGivenClass[classIndex][instance.index(a)] += numOccurences;
     if (m_probOfWordGivenClass[classIndex][instance.index(a)] < 0) {
       throw new Exception("Can't have a negative conditional sum for attribute " 
          + instance.index(a));
     }
   }
 }
 
Example 11
Source File: FPGrowth.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Inserts a single instance into the FPTree.
 * 
 * @param current the instance to insert
 * @param singletons the singleton item sets
 * @param tree the tree to insert into
 * @param minSupport the minimum support threshold
 */
private void insertInstance(Instance current, ArrayList<BinaryItem> singletons, 
    FPTreeRoot tree, int minSupport) {
  ArrayList<BinaryItem> transaction = new ArrayList<BinaryItem>();
  if (current instanceof SparseInstance) {
    for (int j = 0; j < current.numValues(); j++) {
      int attIndex = current.index(j);
      if (singletons.get(attIndex).getFrequency() >= minSupport) {
        transaction.add(singletons.get(attIndex));
      }
    }
    Collections.sort(transaction);
    tree.addItemSet(transaction, 1);
  } else {
    for (int j = 0; j < current.numAttributes(); j++) {
      if (!current.isMissing(j)) {
        if (current.attribute(j).numValues() == 1 
            || current.value(j) == m_positiveIndex - 1) {
          if (singletons.get(j).getFrequency() >= minSupport) {
            transaction.add(singletons.get(j));
          }
        }
      }
    }
    Collections.sort(transaction);
    tree.addItemSet(transaction, 1);
  }
}
 
Example 12
Source File: Utils.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Converts a set of training instances to a DataSet. Assumes that the instances have been
 * suitably preprocessed - i.e. missing values replaced and nominals converted to binary/numeric.
 * Also assumes that the class index has been set
 *
 * @param insts the instances to convert
 * @return a DataSet
 */
public static DataSet instancesToDataSet(Instances insts) {
  INDArray data = Nd4j.zeros(insts.numInstances(), insts.numAttributes() - 1);
  INDArray outcomes = Nd4j.zeros(insts.numInstances(), insts.numClasses());

  for (int i = 0; i < insts.numInstances(); i++) {
    double[] independent = new double[insts.numAttributes() - 1];
    double[] dependent = new double[insts.numClasses()];
    Instance current = insts.instance(i);
    for (int j = 0; j < current.numValues(); j++) {
      int index = current.index(j);
      double value = current.valueSparse(j);

      if (index < insts.classIndex()) {
        independent[index] = value;
      } else if (index > insts.classIndex()) {
        // Shift by -1, since the class is left out from the feature matrix and put into a separate
        // outcomes matrix
        independent[index - 1] = value;
      }
    }

    // Set class values
    if (insts.numClasses() > 1) { // Classification
      final int oneHotIdx = (int) current.classValue();
      dependent[oneHotIdx] = 1.0;
    } else { // Regression (currently only single class)
      dependent[0] = current.classValue();
    }

    INDArray row = Nd4j.create(independent);
    data.putRow(i, row);
    outcomes.putRow(i, Nd4j.create(dependent));
  }
  return new DataSet(data, outcomes);
}
 
Example 13
Source File: ComplementNaiveBayes.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Classifies a given instance. <p>
    *
    * The classification rule is: <br>
    *     MinC(forAllWords(ti*Wci)) <br>
    *      where <br>
    *         ti is the frequency of word i in the given instance <br>
    *         Wci is the weight of word i in Class c. <p>
    *
    * For more information see section 4.4 of the paper mentioned above
    * in the classifiers description.
    *
    * @param instance the instance to classify
    * @return the index of the class the instance is most likely to belong.
    * @throws Exception if the classifier has not been built yet.
    */
   public double classifyInstance(Instance instance) throws Exception {

       if(wordWeights==null)
           throw new Exception("Error. The classifier has not been built "+
                               "properly.");
       
       double [] valueForClass = new double[numClasses];
double sumOfClassValues=0;

for(int c=0; c<numClasses; c++) {
    double sumOfWordValues=0;
    for(int w=0; w<instance.numValues(); w++) {
               if(instance.index(w)!=instance.classIndex()) {
                   double freqOfWordInDoc = instance.valueSparse(w);
                   sumOfWordValues += freqOfWordInDoc * 
                                 wordWeights[c][instance.index(w)];
               }
    }
    //valueForClass[c] = Math.log(probOfClass[c]) - sumOfWordValues;
    valueForClass[c] = sumOfWordValues;
    sumOfClassValues += valueForClass[c];
}

       int minidx=0;
for(int i=0; i<numClasses; i++)
    if(valueForClass[i]<valueForClass[minidx])
	minidx = i;

return minidx;
   }
 
Example 14
Source File: DMNBtext.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public void updateClassifier(Instance ins) throws
  Exception {
  //c=0 is 1, which is the target class, and c=1 is the rest
  int classIndex = 0;
  if (ins.value(ins.classIndex()) != m_targetClass)
    classIndex = 1;
  double prob = 1 -
    distributionForInstance(ins)[classIndex];


  double weight = prob * ins.weight();

  for (int a = 0; a < ins.numValues(); a++) {
    if (ins.index(a) != m_classIndex )
      {

        if (!m_MultinomialWord) {
          if (ins.valueSparse(a) > 0) {
            m_wordsPerClass[classIndex] +=
              weight;
            m_perWordPerClass[classIndex][ins.
                                          index(a)] +=
              weight;
          }
        } else {
          double t = ins.valueSparse(a) * weight;
          m_wordsPerClass[classIndex] += t;
          m_perWordPerClass[classIndex][ins.index(a)] += t;
        }
        //update coefficient
        m_coefficient[ins.index(a)] = Math.log(m_perWordPerClass[0][
                                                                    ins.index(a)] /
                                               m_perWordPerClass[1][ins.index(a)]);
      }
  }
  m_wordRatio = Math.log(m_wordsPerClass[0] / m_wordsPerClass[1]);
  m_classDistribution[classIndex] += weight;
  m_classRatio = Math.log(m_classDistribution[0] /
                          m_classDistribution[1]);
}
 
Example 15
Source File: CoverTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Checks if there is any instance with missing values. Throws an
  * exception if there is, as KDTree does not handle missing values.
  * 
  * @param instances 	the instances to check
  * @throws Exception 	if missing values are encountered
  */
 protected void checkMissing(Instances instances) throws Exception {
   for (int i = 0; i < instances.numInstances(); i++) {
     Instance ins = instances.instance(i);
     for (int j = 0; j < ins.numValues(); j++) {
if (ins.index(j) != ins.classIndex())
  if (ins.isMissingSparse(j)) {
    throw new Exception("ERROR: KDTree can not deal with missing "
	+ "values. Please run ReplaceMissingValues filter "
	+ "on the dataset before passing it on to the KDTree.");
  }
     }
   }
 }
 
Example 16
Source File: VotedPerceptron.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * Computes the inner product of two instances
 * 
 * @param i1 first instance
 * @param i2 second instance
 * @return the inner product
 * @throws Exception if computation fails
 */
private double innerProduct(Instance i1, Instance i2) throws Exception {

  // we can do a fast dot product
  double result = 0;
  int n1 = i1.numValues(); int n2 = i2.numValues();
  int classIndex = m_Train.classIndex();
  for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
      int ind1 = i1.index(p1);
      int ind2 = i2.index(p2);
      if (ind1 == ind2) {
          if (ind1 != classIndex) {
              result += i1.valueSparse(p1) *
                        i2.valueSparse(p2);
          }
          p1++; p2++;
      } else if (ind1 > ind2) {
          p2++;
      } else {
          p1++;
      }
  }
  result += 1.0;
  
  if (m_Exponent != 1) {
    return Math.pow(result, m_Exponent);
  } else {
    return result;
  }
}
 
Example 17
Source File: sIB.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Transpose the document-term matrix to term-document matrix
  * @param data instances with document-term info
  * @return a term-document matrix transposed from the input dataset
  */
 private Matrix getTransposedMatrix(Instances data) {
   double[][] temp = new double[data.numAttributes()][data.numInstances()];
   for (int i = 0; i < data.numInstances(); i++) {
     Instance inst = data.instance(i);
     for (int v = 0; v < inst.numValues(); v++) {
temp[inst.index(v)][i] = inst.valueSparse(v);
     }
   }
   Matrix My_x = new Matrix(temp);
   return My_x;
 }
 
Example 18
Source File: SGD.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Updates the classifier with the given instance.
 * 
 * @param instance the new training instance to include in the model
 * @param filter true if the instance should pass through any of the filters
 *          set up in buildClassifier(). When batch training buildClassifier()
 *          already batch filters all training instances so don't need to
 *          filter them again here.
 * @exception Exception if the instance could not be incorporated in the
 *              model.
 */
protected void updateClassifier(Instance instance, boolean filter)
    throws Exception {

  if (!instance.classIsMissing()) {
    if (filter) {
      if (m_replaceMissing != null) {
        m_replaceMissing.input(instance);
        instance = m_replaceMissing.output();
      }

      if (m_nominalToBinary != null) {
        m_nominalToBinary.input(instance);
        instance = m_nominalToBinary.output();
      }

      if (m_normalize != null) {
        m_normalize.input(instance);
        instance = m_normalize.output();
      }
    }

    double wx = dotProd(instance, m_weights, instance.classIndex());

    double y;
    double z;
    if (instance.classAttribute().isNominal()) {
      y = (instance.classValue() == 0) ? -1 : 1;
      z = y * (wx + m_weights[m_weights.length - 1]);
    } else {
      y = instance.classValue();
      z = y - (wx + m_weights[m_weights.length - 1]);
      y = 1;
    }

    // Compute multiplier for weight decay
    double multiplier = 1.0;
    if (m_numInstances == 0) {
      multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
    } else {
      multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
    }
    for (int i = 0; i < m_weights.length - 1; i++) {
      m_weights[i] *= multiplier;
    }

    // Only need to do the following if the loss is non-zero
    // if (m_loss != HINGE || (z < 1)) {
    if (m_loss == SQUAREDLOSS || m_loss == LOGLOSS || m_loss == HUBER
        || (m_loss == HINGE && (z < 1))
        || (m_loss == EPSILON_INSENSITIVE && Math.abs(z) > m_epsilon)) {

      // Compute Factor for updates
      double factor = m_learningRate * y * dloss(z);

      // Update coefficients for attributes
      int n1 = instance.numValues();
      for (int p1 = 0; p1 < n1; p1++) {
        int indS = instance.index(p1);
        if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
          m_weights[indS] += factor * instance.valueSparse(p1);
        }
      }

      // update the bias
      m_weights[m_weights.length - 1] += factor;
    }
    m_t++;
  }
}
 
Example 19
Source File: XMLInstances.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * adds the instance to the XML structure
  * 
  * @param parent	the parent node to add the instance node as child
  * @param inst	the instance to add
  */
 protected void addInstance(Element parent, Instance inst) {
   Element		node;
   Element		value;
   Element		child;
   boolean		sparse;
   int			i;
   int			n;
   int			index;
   
   node = m_Document.createElement(TAG_INSTANCE);
   parent.appendChild(node);
   
   // sparse?
   sparse = (inst instanceof SparseInstance);
   if (sparse)
     node.setAttribute(ATT_TYPE, VAL_SPARSE);
   
   // weight
   if (inst.weight() != 1.0)
     node.setAttribute(ATT_WEIGHT, Utils.doubleToString(inst.weight(), m_Precision));
   
   // values
   for (i = 0; i < inst.numValues(); i++) {
     index = inst.index(i);
     
     value = m_Document.createElement(TAG_VALUE);
     node.appendChild(value);

     if (inst.isMissing(index)) {
value.setAttribute(ATT_MISSING, VAL_YES);
     }
     else {
if (inst.attribute(index).isRelationValued()) {
  child = m_Document.createElement(TAG_INSTANCES);
  value.appendChild(child);
  for (n = 0; n < inst.relationalValue(i).numInstances(); n++)
    addInstance(child, inst.relationalValue(i).instance(n));
}
else {
  if (inst.attribute(index).type() == Attribute.NUMERIC)
    value.appendChild(m_Document.createTextNode(Utils.doubleToString(inst.value(index), m_Precision)));
  else
    value.appendChild(m_Document.createTextNode(validContent(inst.stringValue(index))));
}
     }
     
     if (sparse)
value.setAttribute(ATT_INDEX, "" + (index+1));
   }
 }
 
Example 20
Source File: SPegasos.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Updates the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model 
 * @exception Exception if the instance could not be incorporated in
 * the model.
 */
public void updateClassifier(Instance instance) throws Exception {
  if (!instance.classIsMissing()) {
    
    double learningRate = 1.0 / (m_lambda * m_t);
    //double scale = 1.0 - learningRate * m_lambda;
    double scale = 1.0 - 1.0 / m_t;
    double y = (instance.classValue() == 0) ? -1 : 1;
    double wx = dotProd(instance, m_weights, instance.classIndex());
    double z = y * (wx + m_weights[m_weights.length - 1]);        
    
    for (int j = 0; j < m_weights.length - 1; j++) {
      if (j != instance.classIndex()) {
        m_weights[j] *= scale;
      }
    }
    
    if (m_loss == LOGLOSS || (z < 1)) {
      double loss = dloss(z);
      int n1 = instance.numValues();
      for (int p1 = 0; p1 < n1; p1++) {
        int indS = instance.index(p1);
        if (indS != instance.classIndex() &&  !instance.isMissingSparse(p1)) {
          double m = learningRate * loss * (instance.valueSparse(p1) * y);
          m_weights[indS] += m;
        }
      }
      
      // update the bias
      m_weights[m_weights.length - 1] += learningRate * loss * y;
    }
    
    double norm = 0;
    for (int k = 0; k < m_weights.length - 1; k++) {
      if (k != instance.classIndex()) {
        norm += (m_weights[k] * m_weights[k]);
      }
    }
    
    double scale2 = Math.min(1.0, (1.0 / (m_lambda * norm)));
    if (scale2 < 1.0) {
      scale2 = Math.sqrt(scale2);
      for (int j = 0; j < m_weights.length - 1; j++) {
        if (j != instance.classIndex()) {
          m_weights[j] *= scale2;
        }
      }
    }
    m_t++;
  }
}