Java Code Examples for weka.core.Utils#missingValue()

The following examples show how to use weka.core.Utils#missingValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RacedIncrementalLogitBoost.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/** 
    * classifies an instance (given Fs values) with the committee 
    * 
    * @param Fs the Fs values
    * @return the classification
    * @throws Exception if anything goes wrong
    */
   public double classifyInstance(double[] Fs) throws Exception {
     
     double [] dist = distributionForInstance(Fs);

     double max = 0;
     int maxIndex = 0;
     
     for (int i = 0; i < dist.length; i++) {
if (dist[i] > max) {
  maxIndex = i;
  max = dist[i];
}
     }
     if (max > 0) {
return maxIndex;
     } else {
return Utils.missingValue();
     }
   }
 
Example 2
Source File: FilteredClassifier.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Computes an array that has a value for each element in the partition.
 * (If the base classifier supports this.)
 */
public double[] getMembershipValues(Instance inst) throws Exception {
  
  if (m_Classifier instanceof PartitionGenerator) {
    Instance newInstance = filterInstance(inst);
    if (newInstance == null) {
      double[] unclassified = new double[numElements()];
      for (int i = 0; i < unclassified.length; i++) {
        unclassified[i] = Utils.missingValue();
      }
      return unclassified;
    } else {
      return ((PartitionGenerator)m_Classifier).getMembershipValues(newInstance);
    }
  } else throw new Exception("Classifier: " + getClassifierSpec()
                             + " cannot generate a partition");
}
 
Example 3
Source File: MajorityConfidenceVote.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public double classifyInstance(final Instance instance) throws Exception {
	double result;
	int index;
	double[] dist = this.distributionForInstance(instance);
	if (instance.classAttribute().isNominal()) {
		index = Utils.maxIndex(dist);
		if (dist[index] == 0) {
			result = Utils.missingValue();
		} else {
			result = index;
		}
	} else if (instance.classAttribute().isNumeric()) {
		result = dist[0];
	} else {
		result = Utils.missingValue();
	}
	return result;
}
 
Example 4
Source File: ThresholdCurve.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Calculates the area under the precision-recall curve (AUPRC).
 *
 * @param tcurve a previously extracted threshold curve Instances.
 * @return the PRC area, or Double.NaN if you don't pass in 
 * a ThresholdCurve generated Instances. 
 */
public static double getPRCArea(Instances tcurve) {
  final int n = tcurve.numInstances();
  if (!RELATION_NAME.equals(tcurve.relationName()) 
      || (n == 0)) {
    return Double.NaN;
  }
  
  final int pInd = tcurve.attribute(PRECISION_NAME).index();
  final int rInd = tcurve.attribute(RECALL_NAME).index();
  final double [] pVals = tcurve.attributeToDoubleArray(pInd);
  final double [] rVals = tcurve.attributeToDoubleArray(rInd);
  
  double area = 0;
  double xlast = rVals[n - 1];
  
  // start from the first real p/r pair (not the artificial zero point)
  for (int i = n - 2; i >= 0; i--) {
    double recallDelta = rVals[i] - xlast;
    area += (pVals[i] * recallDelta);
    
    xlast = rVals[i];
  }
  
  if (area == 0) {
    return Utils.missingValue();
  }
  return area;
}
 
Example 5
Source File: AddUserFields.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Get the value of the attribute as a number or Utils.missingValue() if the
 * attribute is not numeric.
 * 
 * @return the value of the attribute as a number
 */
public double getNumericValue() {
  if (getResolvedType().toLowerCase().startsWith("numeric")) {
    return Double.parseDouble(getResolvedValue());
  }

  return Utils.missingValue(); // not a numeric attribute
}
 
Example 6
Source File: DataTableModel.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
public void insertInstance(int index, boolean notify) {
	if (!m_IgnoreChanges) {
		addUndoPoint();
	}
	double[] vals = new double[m_Data.numAttributes()];

	// set any string or relational attribute values to missing
	// in the new instance, just in case this is the very first
	// instance in the dataset.
	for (int i = 0; i < m_Data.numAttributes(); i++) {
		if (m_Data.attribute(i).isString()
			|| m_Data.attribute(i).isRelationValued()) {
			vals[i] = Utils.missingValue();
		}
	}
	Instance toAdd = new DenseInstance(1.0, vals);
	if (index < 0) {
		m_Data.add(toAdd);
	} else {
		m_Data.add(index, toAdd);
	}
	if (notify) {
		notifyListener(new TableModelEvent(this, m_Data.numInstances() - 1,
			m_Data.numInstances() - 1, TableModelEvent.ALL_COLUMNS,
			TableModelEvent.INSERT));
	}
}
 
Example 7
Source File: Prism.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Classifies a given instance.
 *
 * @param inst the instance to be classified
 * @return the classification
 */
public double classifyInstance(Instance inst) {

  int result = m_rules.resultRules(inst);
  if (result == -1) {
    return Utils.missingValue();
  } else {
    return (double)result;
  }
}
 
Example 8
Source File: Evaluation.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns the area under precision-recall curve (AUPRC) for those predictions
 * that have been collected in the evaluateClassifier(Classifier, Instances)
 * method. Returns Utils.missingValue() if the area is not available.
 * 
 * @param classIndex the index of the class to consider as "positive"
 * @return the area under the precision-recall curve or not a number
 */
public double areaUnderPRC(int classIndex) {
  // Check if any predictions have been collected
  if (m_Predictions == null) {
    return Utils.missingValue();
  } else {
    ThresholdCurve tc = new ThresholdCurve();
    Instances result = tc.getCurve(m_Predictions, classIndex);
    return ThresholdCurve.getPRCArea(result);
  }
}
 
Example 9
Source File: Evaluation.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns the area under ROC for those predictions that have been collected
 * in the evaluateClassifier(Classifier, Instances) method. Returns
 * Utils.missingValue() if the area is not available.
 * 
 * @param classIndex the index of the class to consider as "positive"
 * @return the area under the ROC curve or not a number
 */
public double areaUnderROC(int classIndex) {

  // Check if any predictions have been collected
  if (m_Predictions == null) {
    return Utils.missingValue();
  } else {
    ThresholdCurve tc = new ThresholdCurve();
    Instances result = tc.getCurve(m_Predictions, classIndex);
    return ThresholdCurve.getROCArea(result);
  }
}
 
Example 10
Source File: ThresholdCurve.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * generates an instance out of the given data
 * 
 * @param tc the statistics
 * @param prob the probability
 * @return the generated instance
 */
private Instance makeInstance(TwoClassStats tc, double prob) {

  int count = 0;
  double [] vals = new double[13];
  vals[count++] = tc.getTruePositive();
  vals[count++] = tc.getFalseNegative();
  vals[count++] = tc.getFalsePositive();
  vals[count++] = tc.getTrueNegative();
  vals[count++] = tc.getFalsePositiveRate();
  vals[count++] = tc.getTruePositiveRate();
  vals[count++] = tc.getPrecision();
  vals[count++] = tc.getRecall();
  vals[count++] = tc.getFallout();
  vals[count++] = tc.getFMeasure();
    double ss = (tc.getTruePositive() + tc.getFalsePositive()) / 
      (tc.getTruePositive() + tc.getFalsePositive() + tc.getTrueNegative() + tc.getFalseNegative());
  vals[count++] = ss;
  double expectedByChance = (ss * (tc.getTruePositive() + tc.getFalseNegative()));
  if (expectedByChance < 1) {
    vals[count++] = Utils.missingValue();
  } else {
  vals[count++] = tc.getTruePositive() / expectedByChance; 
   
  }
  vals[count++] = prob;
  return new DenseInstance(1.0, vals);
}
 
Example 11
Source File: CollectiveTree.java    From collective-classification-weka-package with GNU General Public License v3.0 5 votes vote down vote up
/**
 * determines the class of the instance. I.e. if it's not missing it just
 * returns it, otherwise it returns a random class based on the class
 * distribution
 * @param inst        the instance to get the class for
 * @param classDist   the class distribution
 * @return            the class for the instance
 */
protected double determineClass(Instance inst, double[] classDist) {
  double        result;
  double        val;
  double        currVal;
  int           i;

  result = Utils.missingValue();

  if (inst.classIsMissing()) {
    val = m_RandomClass.nextDouble() * Utils.sum(classDist);
    // determine class the random number fits into
    currVal = 0;
    for (i = 0; i < classDist.length; i++) {
      if ( (val >= currVal) && (val < classDist[i]) ) {
        result = i;
        break;
      }
      currVal = classDist[i];
    }
  }
  else {
    result = inst.classValue();
  }

  return result;
}
 
Example 12
Source File: Neighbors.java    From collective-classification-weka-package with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Invalidates the class and the rank. But the class only if the class label
 * is not already set via <code>updateClassValue()</code>
 *
 * @see     #updateClassValue()
 */
public void invalidate() {
  m_Updated = false;
  m_Rank    = -1;
  if (getInstance().classIsMissing())
    m_ClassValue = Utils.missingValue();
}
 
Example 13
Source File: Vote.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Classifies the given test instance.
 * 
 * @param instance the instance to be classified
 * @return the predicted most likely class for the instance or
 *         Utils.missingValue() if no prediction is made
 * @throws Exception if an error occurred during the prediction
 */
@Override
public double classifyInstance(Instance instance) throws Exception {
  double result;
  double[] dist;
  int index;

  switch (m_CombinationRule) {
  case AVERAGE_RULE:
  case PRODUCT_RULE:
  case MAJORITY_VOTING_RULE:
  case MIN_RULE:
  case MAX_RULE:
    dist = distributionForInstance(instance);
    if (instance.classAttribute().isNominal()) {
      index = Utils.maxIndex(dist);
      if (dist[index] == 0)
        result = Utils.missingValue();
      else
        result = index;
    } else if (instance.classAttribute().isNumeric()) {
      result = dist[0];
    } else {
      result = Utils.missingValue();
    }
    break;
  case MEDIAN_RULE:
    result = classifyInstanceMedian(instance);
    break;
  default:
    throw new IllegalStateException("Unknown combination rule '"
        + m_CombinationRule + "'!");
  }

  return result;
}
 
Example 14
Source File: LPS.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
     * Computes class distribution of an instance using the decision tree.
     * 
     * @param instance the instance to compute the distribution for
     * @return the computed class distribution
     * @throws Exception if computation fails
     */
    public double[] distributionForInstance(Instance instance) throws Exception {

      double[] returnedDist = null;

      if(m_Attribute > -1) {
        // Node is not a leaf
        if (instance.isMissing(m_Attribute)) {

          // Value is missing
          returnedDist = new double[m_Info.numClasses()];

          // Split instance up
          for (int i = 0; i < m_Successors.length; i++) {
            double[] help = m_Successors[i].distributionForInstance(instance);
            if (help != null) {
              for (int j = 0; j < help.length; j++) {
                returnedDist[j] += m_Prop[i] * help[j];
              }
            }
          }
        } else if (m_Info.attribute(m_Attribute).isNominal()) {

          // For nominal attributes
          returnedDist = m_Successors[(int) instance.value(m_Attribute)]
            .distributionForInstance(instance);
        } else {

          // For numeric attributes
          if (instance.value(m_Attribute) < m_SplitPoint) {
            returnedDist = m_Successors[0].distributionForInstance(instance);
          } else {
            returnedDist = m_Successors[1].distributionForInstance(instance);
          }
        }
      }

      // Node is a leaf or successor is empty?
      if ((m_Attribute == -1) || (returnedDist == null)) {
        lastNode=leafNodeID;
//          System.out.println("Setting last node ="+leafNodeID);
        // Is node empty?
        if (m_ClassDistribution == null) {
          if (getAllowUnclassifiedInstances()) {
            double[] result = new double[m_Info.numClasses()];
            if (m_Info.classAttribute().isNumeric()) {
              result[0] = Utils.missingValue();
            }
            return result;
          } else {
            return null;
          }
        }

        // Else return normalized distribution
        double[] normalizedDistribution = m_ClassDistribution.clone();
        if (m_Info.classAttribute().isNominal()) {
          Utils.normalize(normalizedDistribution);
        }
        return normalizedDistribution;
      } else {
        return returnedDist;
      }
    }
 
Example 15
Source File: HTML.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
protected void doPrintClassification(double[] dist, Instance inst, int index) throws Exception {
  int prec = m_NumDecimals;
  
  Instance withMissing = (Instance)inst.copy();
  withMissing.setDataset(inst.dataset());
  
  double predValue = 0;
  if (Utils.sum(dist) == 0) {
    predValue = Utils.missingValue();
  } else {
    if (inst.classAttribute().isNominal()) {
      predValue = Utils.maxIndex(dist);
    } else {
      predValue = dist[0];                         
    }
  }
  
  // index
  append("<tr>");
  append("<td>" + (index+1) + "</td>");

  if (inst.dataset().classAttribute().isNumeric()) {
    // actual
    if (inst.classIsMissing())
      append("<td align=\"right\">" + "?" + "</td>");
    else
      append("<td align=\"right\">" + Utils.doubleToString(inst.classValue(), prec) + "</td>");
    // predicted
    if (Utils.isMissingValue(predValue))
      append("<td align=\"right\">" + "?" + "</td>");
    else
      append("<td align=\"right\">" + Utils.doubleToString(predValue, prec) + "</td>");
    // error
    if (Utils.isMissingValue(predValue) || inst.classIsMissing())
      append("<td align=\"right\">" + "?" + "</td>");
    else
      append("<td align=\"right\">" + Utils.doubleToString(predValue - inst.classValue(), prec) + "</td>");
  } else {
    // actual
    append("<td>" + ((int) inst.classValue()+1) + ":" + sanitize(inst.toString(inst.classIndex())) + "</td>");
    // predicted
    if (Utils.isMissingValue(predValue))
      append("<td>" + "?" + "</td>");
    else
      append("<td>" + ((int) predValue+1) + ":" + sanitize(inst.dataset().classAttribute().value((int)predValue)) + "</td>");
    // error?
    if (!Utils.isMissingValue(predValue) && !inst.classIsMissing() && ((int) predValue+1 != (int) inst.classValue()+1))
      append("<td>" + "+" + "</td>");
    else
      append("<td>" + "&nbsp;" + "</td>");
    // prediction/distribution
    if (m_OutputDistribution) {
      if (Utils.isMissingValue(predValue)) {
        append("<td>" + "?" + "</td>");
      }
      else {
        append("<td align=\"right\">");
        for (int n = 0; n < dist.length; n++) {
          if (n > 0)
            append("</td><td align=\"right\">");
          if (n == (int) predValue)
            append("*");
          append(Utils.doubleToString(dist[n], prec));
        }
        append("</td>");
      }
    }
    else {
      if (Utils.isMissingValue(predValue))
        append("<td align=\"right\">" + "?" + "</td>");
      else
        append("<td align=\"right\">" + Utils.doubleToString(dist[(int)predValue], prec) + "</td>");
    }
  }

  // attributes
  append(attributeValuesString(withMissing) + "</tr>\n");    
}
 
Example 16
Source File: CSV.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Store the prediction made by the classifier as a string.
 * 
 * @param dist        the distribution to use
 * @param inst        the instance to generate text from
 * @param index       the index in the dataset
 * @throws Exception  if something goes wrong
 */
protected void doPrintClassification(double[] dist, Instance inst, int index) throws Exception {
  int prec = m_NumDecimals;

  Instance withMissing = (Instance)inst.copy();
  withMissing.setDataset(inst.dataset());
  
  double predValue = 0;
  if (Utils.sum(dist) == 0) {
    predValue = Utils.missingValue();
  } else {
    if (inst.classAttribute().isNominal()) {
      predValue = Utils.maxIndex(dist);
    } else {
      predValue = dist[0];                         
    }
  }
  
  // index
  append("" + (index+1));

  if (inst.dataset().classAttribute().isNumeric()) {
    // actual
    if (inst.classIsMissing())
      append(m_Delimiter + "?");
    else
      append(m_Delimiter + Utils.doubleToString(inst.classValue(), prec));
    // predicted
    if (Utils.isMissingValue(predValue))
      append(m_Delimiter + "?");
    else
      append(m_Delimiter + Utils.doubleToString(predValue, prec));
    // error
    if (Utils.isMissingValue(predValue) || inst.classIsMissing())
      append(m_Delimiter + "?");
    else
      append(m_Delimiter + Utils.doubleToString(predValue - inst.classValue(), prec));
  } else {
    // actual
    append(m_Delimiter + ((int) inst.classValue()+1) + ":" + inst.toString(inst.classIndex()));
    // predicted
    if (Utils.isMissingValue(predValue))
      append(m_Delimiter + "?");
    else
      append(m_Delimiter + ((int) predValue+1) + ":" + inst.dataset().classAttribute().value((int)predValue));
    // error?
    if (!Utils.isMissingValue(predValue) && !inst.classIsMissing() && ((int) predValue+1 != (int) inst.classValue()+1))
      append(m_Delimiter + "+");
    else
      append(m_Delimiter + "");
    // prediction/distribution
    if (m_OutputDistribution) {
      if (Utils.isMissingValue(predValue)) {
        append(m_Delimiter + "?");
      }
      else {
        append(m_Delimiter);
        for (int n = 0; n < dist.length; n++) {
          if (n > 0)
            append(m_Delimiter);
          if (n == (int) predValue)
            append("*");
          append(Utils.doubleToString(dist[n], prec));
        }
      }
    }
    else {
      if (Utils.isMissingValue(predValue))
        append(m_Delimiter + "?");
      else
        append(m_Delimiter + Utils.doubleToString(dist[(int)predValue], prec));
    }
  }

  // attributes
  if (m_Attributes != null)
    append(m_Delimiter + attributeValuesString(withMissing));
  append("\n");
}
 
Example 17
Source File: XML.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Store the prediction made by the classifier as a string.
 * 
 * @param dist        the distribution to use
 * @param inst        the instance to generate text from
 * @param index       the index in the dataset
 * @throws Exception  if something goes wrong
 */
protected void doPrintClassification(double[] dist, Instance inst, int index) throws Exception {
  int prec = m_NumDecimals;

  Instance withMissing = (Instance)inst.copy();
  withMissing.setDataset(inst.dataset());
  
  double predValue = 0;
  if (Utils.sum(dist) == 0) {
    predValue = Utils.missingValue();
  } else {
    if (inst.classAttribute().isNominal()) {
      predValue = Utils.maxIndex(dist);
    } else {
      predValue = dist[0];                         
    }
  }
  
  // opening tag
  append("  <" + TAG_PREDICTION + " " + ATT_INDEX + "=\"" + (index+1) + "\">\n");

  if (inst.dataset().classAttribute().isNumeric()) {
    // actual
    append("    <" + TAG_ACTUAL_VALUE + ">");
    if (inst.classIsMissing())
      append("?");
    else
      append(Utils.doubleToString(inst.classValue(), prec));
    append("</" + TAG_ACTUAL_VALUE + ">\n");
    // predicted
    append("    <" + TAG_PREDICTED_VALUE + ">");
    if (inst.classIsMissing())
      append("?");
    else
      append(Utils.doubleToString(predValue, prec));
    append("</" + TAG_PREDICTED_VALUE + ">\n");
    // error
    append("    <" + TAG_ERROR + ">");
    if (Utils.isMissingValue(predValue) || inst.classIsMissing())
      append("?");
    else
      append(Utils.doubleToString(predValue - inst.classValue(), prec));
    append("</" + TAG_ERROR + ">\n");
  } else {
    // actual
    append("    <" + TAG_ACTUAL_LABEL + " " + ATT_INDEX + "=\"" + ((int) inst.classValue()+1) + "\"" + ">");
    append(sanitize(inst.toString(inst.classIndex())));
    append("</" + TAG_ACTUAL_LABEL + ">\n");
    // predicted
    append("    <" + TAG_PREDICTED_LABEL + " " + ATT_INDEX + "=\"" + ((int) predValue+1) + "\"" + ">");
    if (Utils.isMissingValue(predValue))
      append("?");
    else
      append(sanitize(inst.dataset().classAttribute().value((int)predValue)));
    append("</" + TAG_PREDICTED_LABEL + ">\n");
    // error?
    append("    <" + TAG_ERROR + ">");
    if (!Utils.isMissingValue(predValue) && !inst.classIsMissing() && ((int) predValue+1 != (int) inst.classValue()+1))
      append(VAL_YES);
    else
      append(VAL_NO);
    append("</" + TAG_ERROR + ">\n");
    // prediction/distribution
    if (m_OutputDistribution) {
      append("    <" + TAG_DISTRIBUTION + ">\n");
      for (int n = 0; n < dist.length; n++) {
        append("      <" + TAG_CLASS_LABEL + " " + ATT_INDEX + "=\"" + (n+1) + "\"");
        if (!Utils.isMissingValue(predValue) && (n == (int) predValue))
          append(" " + ATT_PREDICTED + "=\"" + VAL_YES + "\"");
        append(">");
        append(Utils.doubleToString(dist[n], prec));
        append("</" + TAG_CLASS_LABEL + ">\n");
      }
      append("    </" + TAG_DISTRIBUTION + ">\n");
    }
    else {
      append("    <" + TAG_PREDICTION + ">");
      if (Utils.isMissingValue(predValue))
        append("?");
      else
        append(Utils.doubleToString(dist[(int)predValue], prec));
      append("</" + TAG_PREDICTION + ">\n");
    }
  }

  // attributes
  if (m_Attributes != null)
    append(attributeValuesString(withMissing));
  
  // closing tag
  append("  </" + TAG_PREDICTION + ">\n");
}
 
Example 18
Source File: Id3.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Method for building an Id3 tree.
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

  // Check if no instances have reached this node.
  if (data.numInstances() == 0) {
    m_Attribute = null;
    m_ClassValue = Utils.missingValue();
    m_Distribution = new double[data.numClasses()];
    return;
  }

  // Compute attribute with maximum information gain.
  double[] infoGains = new double[data.numAttributes()];
  Enumeration attEnum = data.enumerateAttributes();
  while (attEnum.hasMoreElements()) {
    Attribute att = (Attribute) attEnum.nextElement();
    infoGains[att.index()] = computeInfoGain(data, att);
  }
  m_Attribute = data.attribute(Utils.maxIndex(infoGains));
  
  // Make leaf if information gain is zero. 
  // Otherwise create successors.
  if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
    m_Attribute = null;
    m_Distribution = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
      m_Distribution[(int) inst.classValue()]++;
    }
    Utils.normalize(m_Distribution);
    m_ClassValue = Utils.maxIndex(m_Distribution);
    m_ClassAttribute = data.classAttribute();
  } else {
    Instances[] splitData = splitData(data, m_Attribute);
    m_Successors = new Id3[m_Attribute.numValues()];
    for (int j = 0; j < m_Attribute.numValues(); j++) {
      m_Successors[j] = new Id3();
      m_Successors[j].makeTree(splitData[j]);
    }
  }
}
 
Example 19
Source File: InputMappedClassifier.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public Instance constructMappedInstance(Instance incoming) throws Exception {
  
  boolean regenerateMapping = false;
  
  if (m_inputHeader == null) {
    m_inputHeader = incoming.dataset();
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  } else if (!m_inputHeader.equalHeaders(incoming.dataset())) {
    /*System.out.println("[InputMappedClassifier] incoming data does not match " +
              "last known input format - regenerating mapping...");
    System.out.println("Incoming\n" + new Instances(incoming.dataset(), 0));
    System.out.println("Stored input header\n" + new Instances(m_inputHeader, 0));
    System.out.println("Model header\n" + new Instances(m_modelHeader, 0)); */
    m_inputHeader = incoming.dataset();
    
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  } else if (m_attributeMap == null) {
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  }
  
  if (regenerateMapping) {
    regenerateMapping();
    m_vals = null;
    
    if (!m_suppressMappingReport) {
      StringBuffer result = createMappingReport();
      System.out.println(result.toString());
    }
  }    
  
  m_vals = new double[m_modelHeader.numAttributes()];
  
  for (int i = 0; i < m_modelHeader.numAttributes(); i++) {
    if (m_attributeStatus[i] == OK) {
      Attribute modelAtt = m_modelHeader.attribute(i);
      Attribute incomingAtt = m_inputHeader.attribute(m_attributeMap[i]);
      
      if (Utils.isMissingValue(incoming.value(m_attributeMap[i]))) {
        m_vals[i] = Utils.missingValue();
        continue;
      }
      
      if (modelAtt.isNumeric()) {
        m_vals[i] = incoming.value(m_attributeMap[i]);
      } else if (modelAtt.isNominal()) {
        int mapVal = m_nominalValueMap[i][(int)incoming.value(m_attributeMap[i])];
        
        if (mapVal == NO_MATCH) {
          m_vals[i] = Utils.missingValue();
        } else {
          m_vals[i] = mapVal;
        }
      }
    } else {
      m_vals[i] = Utils.missingValue();
    }
  }
  
  Instance newInst = new DenseInstance(incoming.weight(), m_vals);
  newInst.setDataset(m_modelHeader);

  return newInst;
}
 
Example 20
Source File: AccessibleRandomTree.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public double[] distributionForInstance(final Instance instance) throws Exception {
	double[] returnedDist = null;

	if (this.m_Attribute > -1) {
		// Node is not a leaf
		if (instance.isMissing(this.m_Attribute)) {

			// Value is missing
			returnedDist = new double[AccessibleRandomTree.this.m_Info.numClasses()];

			// Split instance up
			for (int i = 0; i < this.successors.length; i++) {
				double[] help = this.successors[i].distributionForInstance(instance);
				if (help != null) {
					for (int j = 0; j < help.length; j++) {
						returnedDist[j] += this.m_Prop[i] * help[j];
					}
				}
			}
		} else if (AccessibleRandomTree.this.m_Info.attribute(this.m_Attribute).isNominal()) {

			// For nominal attributes
			returnedDist = this.successors[(int) instance.value(this.m_Attribute)].distributionForInstance(instance);
		} else {

			// For numeric attributes
			if (instance.value(this.m_Attribute) < this.m_SplitPoint) {
				returnedDist = this.successors[0].distributionForInstance(instance);
			} else {
				returnedDist = this.successors[1].distributionForInstance(instance);
			}
		}
	}

	// Node is a leaf or successor is empty?
	if ((this.m_Attribute == -1) || (returnedDist == null)) {
		AccessibleRandomTree.this.lastNode = this.leafNodeID;
		// Is node empty?
		if (this.m_ClassDistribution == null) {
			if (AccessibleRandomTree.this.getAllowUnclassifiedInstances()) {
				double[] result = new double[AccessibleRandomTree.this.m_Info.numClasses()];
				if (AccessibleRandomTree.this.m_Info.classAttribute().isNumeric()) {
					result[0] = Utils.missingValue();
				}
				return result;
			} else {
				throw new PredictionException("Could not obtain a prediction.");
			}
		}

		// Else return normalized distribution
		double[] normalizedDistribution = this.m_ClassDistribution.clone();
		if (AccessibleRandomTree.this.m_Info.classAttribute().isNominal()) {
			Utils.normalize(normalizedDistribution);
		}
		return normalizedDistribution;
	} else {
		return returnedDist;
	}
}