Java Code Examples for weka.core.Instance#isMissingSparse()

The following examples show how to use weka.core.Instance#isMissingSparse() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SGD.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
protected static double dotProd(Instance inst1, double[] weights,
    int classIndex) {
  double result = 0;

  int n1 = inst1.numValues();
  int n2 = weights.length - 1;

  for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
    int ind1 = inst1.index(p1);
    int ind2 = p2;
    if (ind1 == ind2) {
      if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
        result += inst1.valueSparse(p1) * weights[p2];
      }
      p1++;
      p2++;
    } else if (ind1 > ind2) {
      p2++;
    } else {
      p1++;
    }
  }
  return (result);
}
 
Example 2
Source File: SPegasos.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
protected static double dotProd(Instance inst1, double[] weights, int classIndex) {
  double result = 0;

  int n1 = inst1.numValues();
  int n2 = weights.length - 1; 

  for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
    int ind1 = inst1.index(p1);
    int ind2 = p2;
    if (ind1 == ind2) {
      if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
        result += inst1.valueSparse(p1) * weights[p2];
      }
      p1++;
      p2++;
    } else if (ind1 > ind2) {
      p2++;
    } else {
      p1++;
    }
  }
  return (result);
}
 
Example 3
Source File: ReliefFAttributeEval.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Updates the minimum and maximum values for all the attributes
 * based on a new instance.
 *
 * @param instance the new instance
 */
private void updateMinMax (Instance instance) {
  //    for (int j = 0; j < m_numAttribs; j++) {
  try {
    for (int j = 0; j < instance.numValues(); j++) {
      if ((instance.attributeSparse(j).isNumeric()) && 
          (!instance.isMissingSparse(j))) {
        if (Double.isNaN(m_minArray[instance.index(j)])) {
          m_minArray[instance.index(j)] = instance.valueSparse(j);
          m_maxArray[instance.index(j)] = instance.valueSparse(j);
        }
      else {
        if (instance.valueSparse(j) < m_minArray[instance.index(j)]) {
          m_minArray[instance.index(j)] = instance.valueSparse(j);
        }
        else {
          if (instance.valueSparse(j) > m_maxArray[instance.index(j)]) {
            m_maxArray[instance.index(j)] = instance.valueSparse(j);
          }
        }
      }
      }
    }
  } catch (Exception ex) {
    System.err.println(ex);
    ex.printStackTrace();
  }
}
 
Example 4
Source File: CoverTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Checks if there is any instance with missing values. Throws an
  * exception if there is, as KDTree does not handle missing values.
  * 
  * @param instances 	the instances to check
  * @throws Exception 	if missing values are encountered
  */
 protected void checkMissing(Instances instances) throws Exception {
   for (int i = 0; i < instances.numInstances(); i++) {
     Instance ins = instances.instance(i);
     for (int j = 0; j < ins.numValues(); j++) {
if (ins.index(j) != ins.classIndex())
  if (ins.isMissingSparse(j)) {
    throw new Exception("ERROR: KDTree can not deal with missing "
	+ "values. Please run ReplaceMissingValues filter "
	+ "on the dataset before passing it on to the KDTree.");
  }
     }
   }
 }
 
Example 5
Source File: KDTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Checks if there is any instance with missing values. Throws an exception if
 * there is, as KDTree does not handle missing values.
 * 
 * @param instances	the instances to check
 * @throws Exception	if missing values are encountered
 */
protected void checkMissing(Instances instances) throws Exception {
  for (int i = 0; i < instances.numInstances(); i++) {
    Instance ins = instances.instance(i);
    for (int j = 0; j < ins.numValues(); j++) {
      if (ins.index(j) != ins.classIndex())
        if (ins.isMissingSparse(j)) {
          throw new Exception("ERROR: KDTree can not deal with missing "
              + "values. Please run ReplaceMissingValues filter "
              + "on the dataset before passing it on to the KDTree.");
        }
    }
  }
}
 
Example 6
Source File: KDTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Checks if there is any missing value in the given 
 * instance.
 * @param ins The instance to check missing values in.
 * @throws Exception If there is a missing value in the 
 * instance.
 */
protected void checkMissing(Instance ins) throws Exception {
  for (int j = 0; j < ins.numValues(); j++) {
    if (ins.index(j) != ins.classIndex())
      if (ins.isMissingSparse(j)) {
        throw new Exception("ERROR: KDTree can not deal with missing "
            + "values. Please run ReplaceMissingValues filter "
            + "on the dataset before passing it on to the KDTree.");
      }
  }
}
 
Example 7
Source File: ItemSet.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Checks if an instance contains an item set.
 * 
 * @param instance the instance to be tested
 * @return true if the given instance contains this item set
 */
public boolean containedByTreatZeroAsMissing(Instance instance) {

  if (instance instanceof weka.core.SparseInstance) {
    int numInstVals = instance.numValues();
    int numItemSetVals = m_items.length;

    for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) {
      int instIndex = Integer.MAX_VALUE;
      if (p1 < numInstVals) {
        instIndex = instance.index(p1);
      }
      int itemIndex = p2;

      if (m_items[itemIndex] > -1) {
        if (itemIndex != instIndex) {
          return false;
        } else {
          if (instance.isMissingSparse(p1)) {
            return false;
          }
          if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
            return false;
          }
        }

        p1++;
        p2++;
      } else {
        if (itemIndex < instIndex) {
          p2++;
        } else if (itemIndex == instIndex) {
          p2++;
          p1++;
        }
      }
    }
  } else {
    for (int i = 0; i < instance.numAttributes(); i++)
      if (m_items[i] > -1) {
        if (instance.isMissing(i) || (int) instance.value(i) == 0)
          return false;
        if (m_items[i] != (int) instance.value(i))
          return false;
      }
  }

  return true;
}
 
Example 8
Source File: SGD.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Updates the classifier with the given instance.
 * 
 * @param instance the new training instance to include in the model
 * @param filter true if the instance should pass through any of the filters
 *          set up in buildClassifier(). When batch training buildClassifier()
 *          already batch filters all training instances so don't need to
 *          filter them again here.
 * @exception Exception if the instance could not be incorporated in the
 *              model.
 */
protected void updateClassifier(Instance instance, boolean filter)
    throws Exception {

  if (!instance.classIsMissing()) {
    if (filter) {
      if (m_replaceMissing != null) {
        m_replaceMissing.input(instance);
        instance = m_replaceMissing.output();
      }

      if (m_nominalToBinary != null) {
        m_nominalToBinary.input(instance);
        instance = m_nominalToBinary.output();
      }

      if (m_normalize != null) {
        m_normalize.input(instance);
        instance = m_normalize.output();
      }
    }

    double wx = dotProd(instance, m_weights, instance.classIndex());

    double y;
    double z;
    if (instance.classAttribute().isNominal()) {
      y = (instance.classValue() == 0) ? -1 : 1;
      z = y * (wx + m_weights[m_weights.length - 1]);
    } else {
      y = instance.classValue();
      z = y - (wx + m_weights[m_weights.length - 1]);
      y = 1;
    }

    // Compute multiplier for weight decay
    double multiplier = 1.0;
    if (m_numInstances == 0) {
      multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
    } else {
      multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
    }
    for (int i = 0; i < m_weights.length - 1; i++) {
      m_weights[i] *= multiplier;
    }

    // Only need to do the following if the loss is non-zero
    // if (m_loss != HINGE || (z < 1)) {
    if (m_loss == SQUAREDLOSS || m_loss == LOGLOSS || m_loss == HUBER
        || (m_loss == HINGE && (z < 1))
        || (m_loss == EPSILON_INSENSITIVE && Math.abs(z) > m_epsilon)) {

      // Compute Factor for updates
      double factor = m_learningRate * y * dloss(z);

      // Update coefficients for attributes
      int n1 = instance.numValues();
      for (int p1 = 0; p1 < n1; p1++) {
        int indS = instance.index(p1);
        if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
          m_weights[indS] += factor * instance.valueSparse(p1);
        }
      }

      // update the bias
      m_weights[m_weights.length - 1] += factor;
    }
    m_t++;
  }
}
 
Example 9
Source File: SPegasos.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Updates the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model 
 * @exception Exception if the instance could not be incorporated in
 * the model.
 */
public void updateClassifier(Instance instance) throws Exception {
  if (!instance.classIsMissing()) {
    
    double learningRate = 1.0 / (m_lambda * m_t);
    //double scale = 1.0 - learningRate * m_lambda;
    double scale = 1.0 - 1.0 / m_t;
    double y = (instance.classValue() == 0) ? -1 : 1;
    double wx = dotProd(instance, m_weights, instance.classIndex());
    double z = y * (wx + m_weights[m_weights.length - 1]);        
    
    for (int j = 0; j < m_weights.length - 1; j++) {
      if (j != instance.classIndex()) {
        m_weights[j] *= scale;
      }
    }
    
    if (m_loss == LOGLOSS || (z < 1)) {
      double loss = dloss(z);
      int n1 = instance.numValues();
      for (int p1 = 0; p1 < n1; p1++) {
        int indS = instance.index(p1);
        if (indS != instance.classIndex() &&  !instance.isMissingSparse(p1)) {
          double m = learningRate * loss * (instance.valueSparse(p1) * y);
          m_weights[indS] += m;
        }
      }
      
      // update the bias
      m_weights[m_weights.length - 1] += learningRate * loss * y;
    }
    
    double norm = 0;
    for (int k = 0; k < m_weights.length - 1; k++) {
      if (k != instance.classIndex()) {
        norm += (m_weights[k] * m_weights[k]);
      }
    }
    
    double scale2 = Math.min(1.0, (1.0 / (m_lambda * norm)));
    if (scale2 < 1.0) {
      scale2 = Math.sqrt(scale2);
      for (int j = 0; j < m_weights.length - 1; j++) {
        if (j != instance.classIndex()) {
          m_weights[j] *= scale2;
        }
      }
    }
    m_t++;
  }
}
 
Example 10
Source File: ReplaceMissingValues.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Signify that this batch of input to the filter is finished. 
  * If the filter requires all instances prior to filtering,
  * output() may now be called to retrieve the filtered instances.
  *
  * @return true if there are instances pending output
  * @throws IllegalStateException if no input structure has been defined
  */
 public boolean batchFinished() {

   if (getInputFormat() == null) {
     throw new IllegalStateException("No input instance format defined");
   }

   if (m_ModesAndMeans == null) {
     // Compute modes and means
     double sumOfWeights =  getInputFormat().sumOfWeights();
     double[][] counts = new double[getInputFormat().numAttributes()][];
     for (int i = 0; i < getInputFormat().numAttributes(); i++) {
if (getInputFormat().attribute(i).isNominal()) {
  counts[i] = new double[getInputFormat().attribute(i).numValues()];
         if (counts[i].length > 0)
           counts[i][0] = sumOfWeights;
}
     }
     double[] sums = new double[getInputFormat().numAttributes()];
     for (int i = 0; i < sums.length; i++) {
sums[i] = sumOfWeights;
     }
     double[] results = new double[getInputFormat().numAttributes()];
     for (int j = 0; j < getInputFormat().numInstances(); j++) {
Instance inst = getInputFormat().instance(j);
for (int i = 0; i < inst.numValues(); i++) {
  if (!inst.isMissingSparse(i)) {
    double value = inst.valueSparse(i);
    if (inst.attributeSparse(i).isNominal()) {
             if (counts[inst.index(i)].length > 0) {
               counts[inst.index(i)][(int)value] += inst.weight();
               counts[inst.index(i)][0] -= inst.weight();
             }
    } else if (inst.attributeSparse(i).isNumeric()) {
      results[inst.index(i)] += inst.weight() * inst.valueSparse(i);
    }
  } else {
    if (inst.attributeSparse(i).isNominal()) {
             if (counts[inst.index(i)].length > 0) {
        counts[inst.index(i)][0] -= inst.weight();
             }
    } else if (inst.attributeSparse(i).isNumeric()) {
      sums[inst.index(i)] -= inst.weight();
    }
  }
}
     }
     m_ModesAndMeans = new double[getInputFormat().numAttributes()];
     for (int i = 0; i < getInputFormat().numAttributes(); i++) {
if (getInputFormat().attribute(i).isNominal()) {
         if (counts[i].length == 0)
           m_ModesAndMeans[i] = Utils.missingValue();
         else
    m_ModesAndMeans[i] = (double)Utils.maxIndex(counts[i]);
} else if (getInputFormat().attribute(i).isNumeric()) {
  if (Utils.gr(sums[i], 0)) {
    m_ModesAndMeans[i] = results[i] / sums[i];
  }
}
     }

     // Convert pending input instances
     for(int i = 0; i < getInputFormat().numInstances(); i++) {
convertInstance(getInputFormat().instance(i));
     }
   } 
   // Free memory
   flushInput();

   m_NewBatch = true;
   return (numPendingOutput() != 0);
 }