Java Code Examples for weka.core.Utils#normalize()

The following examples show how to use weka.core.Utils#normalize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MOA.java    From moa with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Predicts the class memberships for a given instance. If
 * an instance is unclassified, the returned array elements
 * must be all zero. If the class is numeric, the array
 * must consist of only one element, which contains the
 * predicted value.
 *
 * @param instance the instance to be classified
 * @return an array containing the estimated membership
 * probabilities of the test instance in each class
 * or the numeric prediction
 * @throws Exception if distribution could not be
 * computed successfully
 */
public double[] distributionForInstance(Instance instance) throws Exception {
	double[]	result;

	result = m_ActualClassifier.getVotesForInstance(instanceConverter.samoaInstance(instance));
      // ensure that the array has as many elements as there are
      // class values!
      if (result.length < instance.numClasses()) {
        double[] newResult = new double[instance.numClasses()];
        System.arraycopy(result, 0, newResult, 0, result.length);
        result = newResult;
      }

	try {
		Utils.normalize(result);
	}
	catch (Exception e) {
		result = new double[instance.numClasses()];
	}

	return result;
}
 
Example 2
Source File: Bagging.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance the instance to be classified
  * @return predicted class probability distribution
  * @throws Exception if distribution can't be computed successfully 
  */
 public double[] distributionForInstance(Instance instance) throws Exception {

   double [] sums = new double [instance.numClasses()], newProbs; 
   for (int i = 0; i < m_NumIterations; i++) {
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(instance);
     } else {
newProbs = m_Classifiers[i].distributionForInstance(instance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)m_NumIterations;
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }
 
Example 3
Source File: END.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance the instance to be classified
  * @return preedicted class probability distribution
  * @throws Exception if distribution can't be computed successfully 
  */
 public double[] distributionForInstance(Instance instance) throws Exception {
   
   double [] sums = new double [instance.numClasses()], newProbs; 
   
   for (int i = 0; i < m_NumIterations; i++) {
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(instance);
     } else {
newProbs = m_Classifiers[i].distributionForInstance(instance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)m_NumIterations;
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }
 
Example 4
Source File: MIBoost.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Computes the distribution for a given exemplar
 *
 * @param exmp the exemplar for which distribution is computed
 * @return the classification
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance exmp) 
  throws Exception { 

  double[] rt = new double[m_NumClasses];

  Instances insts = new Instances(exmp.dataset(), 0);
  insts.add(exmp);

  // convert the training dataset into single-instance dataset
  insts = Filter.useFilter( insts, m_ConvertToSI);
  insts.deleteAttributeAt(0); //remove the bagIndex attribute	

  double n = insts.numInstances();

  if(m_DiscretizeBin > 0)
    insts = Filter.useFilter(insts, m_Filter);

  for(int y=0; y<n; y++){
    Instance ins = insts.instance(y);	
    for(int x=0; x<m_NumIterations; x++){ 
      rt[(int)m_Models[x].classifyInstance(ins)] += m_Beta[x]/n;
    }
  }

  for(int i=0; i<rt.length; i++)
    rt[i] = Math.exp(rt[i]);

  Utils.normalize(rt);
  return rt;
}
 
Example 5
Source File: NaiveBayesMultinomialUpdateable.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance 	the instance to be classified
  * @return 		predicted class probability distribution
  * @throws Exception 	if there is a problem generating the prediction
  */
 public double[] distributionForInstance(Instance instance) throws Exception {
   double[] probOfClassGivenDoc = new double[m_numClasses];

   // calculate the array of log(Pr[D|C])
   double[] logDocGivenClass = new double[m_numClasses];
   for (int c = 0; c < m_numClasses; c++) {
     logDocGivenClass[c] += Math.log(m_probOfClass[c]);
     int allWords = 0;
     for (int i = 0; i < instance.numValues(); i++) {
if (instance.index(i) == instance.classIndex())
  continue;
double frequencies = instance.valueSparse(i);
allWords += frequencies;
logDocGivenClass[c] += frequencies *
Math.log(m_probOfWordGivenClass[c][instance.index(i)]);
     }
     logDocGivenClass[c] -= allWords * Math.log(m_wordsPerClass[c]);
   }

   double max = logDocGivenClass[Utils.maxIndex(logDocGivenClass)];
   for (int i = 0; i < m_numClasses; i++)
     probOfClassGivenDoc[i] = Math.exp(logDocGivenClass[i] - max);

   Utils.normalize(probOfClassGivenDoc);

   return probOfClassGivenDoc;
 }
 
Example 6
Source File: ContractRotationForest.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance the instance to be classified
  * @return preedicted class probability distribution
  * @throws Exception if distribution can't be computed successfully 
  */
 @Override
 public double[] distributionForInstance(Instance instance) throws Exception {

   removeUseless.input(instance);
   instance =removeUseless.output();
   removeUseless.batchFinished();

   normalize.input(instance);
   instance =normalize.output();
   normalize.batchFinished();

   double [] sums = new double [instance.numClasses()], newProbs; 
   
   for (int i = 0; i < classifiers.size(); i++) {
     Instance convertedInstance = convertInstance(instance, i);
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += classifiers.get(i).classifyInstance(convertedInstance);
     } else {
newProbs = classifiers.get(i).distributionForInstance(convertedInstance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)classifiers.size();
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }
 
Example 7
Source File: DynamicWeightedMajority.java    From moa with GNU General Public License v3.0 5 votes vote down vote up
@Override
public double[] getVotesForInstance(Instance inst) {
    double[] Pr = new double[inst.numClasses()];
    for (int i = 0; i < this.experts.size(); i++) {
        double[] pr = this.experts.get(i).getVotesForInstance(inst);
        int yHat = Utils.maxIndex(pr);
        Pr[yHat] += this.weights.get(i);
    } // for
    Utils.normalize(Pr);
    return Pr;
}
 
Example 8
Source File: Vote.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Classifies a given instance using the selected combination rule.
 * 
 * @param instance the instance to be classified
 * @return the distribution
 * @throws Exception if instance could not be classified successfully
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {
  double[] result = new double[instance.numClasses()];

  switch (m_CombinationRule) {
  case AVERAGE_RULE:
    result = distributionForInstanceAverage(instance);
    break;
  case PRODUCT_RULE:
    result = distributionForInstanceProduct(instance);
    break;
  case MAJORITY_VOTING_RULE:
    result = distributionForInstanceMajorityVoting(instance);
    break;
  case MIN_RULE:
    result = distributionForInstanceMin(instance);
    break;
  case MAX_RULE:
    result = distributionForInstanceMax(instance);
    break;
  case MEDIAN_RULE:
    result[0] = classifyInstance(instance);
    break;
  default:
    throw new IllegalStateException("Unknown combination rule '"
        + m_CombinationRule + "'!");
  }

  if (!instance.classAttribute().isNumeric() && (Utils.sum(result) > 0))
    Utils.normalize(result);

  return result;
}
 
Example 9
Source File: StackingC.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Classifies a given instance using the stacked classifier.
 *
 * @param instance the instance to be classified
 * @return the distribution
 * @throws Exception if instance could not be classified
 * successfully
 */
public double[] distributionForInstance(Instance instance) throws Exception {

  int [] arrIdc = new int[m_Classifiers.length+1];
  arrIdc[m_Classifiers.length] = m_MetaFormat.numAttributes() - 1;
  double [] classProbs = new double[m_BaseFormat.numClasses()];
  Instance newInst;
  double sum = 0;

  for (int i = 0; i < m_MetaClassifiers.length; i++) {
    for (int j = 0; j < m_Classifiers.length; j++) {
        arrIdc[j] = m_BaseFormat.numClasses() * j + i;
    }
    m_makeIndicatorFilter.setAttributeIndex("" + (m_MetaFormat.classIndex() + 1));
    m_makeIndicatorFilter.setNumeric(true);
    m_makeIndicatorFilter.setValueIndex(i);
    m_makeIndicatorFilter.setInputFormat(m_MetaFormat);
    m_makeIndicatorFilter.input(metaInstance(instance));
    m_makeIndicatorFilter.batchFinished();
    newInst = m_makeIndicatorFilter.output();

    m_attrFilter.setAttributeIndicesArray(arrIdc);
    m_attrFilter.setInvertSelection(true);
    m_attrFilter.setInputFormat(m_makeIndicatorFilter.getOutputFormat());
    m_attrFilter.input(newInst);
    m_attrFilter.batchFinished();
    newInst = m_attrFilter.output();

    classProbs[i]=m_MetaClassifiers[i].classifyInstance(newInst);
    if (classProbs[i] > 1) { classProbs[i] = 1; }
    if (classProbs[i] < 0) { classProbs[i] = 0; }
    sum += classProbs[i];
  }

  if (sum!=0) Utils.normalize(classProbs,sum);

  return classProbs;
}
 
Example 10
Source File: LBR.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Calculates the class membership probabilities.
 * for the given test instance.
 *
 * @param instance the instance to be classified
 * @param instanceIndex 
 *
 * @return predicted class probability distribution
 * @throws Exception if distribution can't be computed
 */
public double[] localDistributionForInstance(Instance instance, Indexes instanceIndex) throws Exception {
  
  double sumForPriors = 0;
  double sumForCounts = 0;
  int attIndex, AIndex;
  int numClassesOfInstance = instance.numClasses();
  
  sumForPriors = 0;
  sumForCounts = 0;
  instanceIndex.setSequentialDataset(true);
  // Calculate all of conditional probabilities.
  sumForPriors = Utils.sum(m_Priors) + numClassesOfInstance;
  for (int j = 0; j < numClassesOfInstance; j++) {
    // pointer to counts to make access more efficient in loop
    int [][] countsPointer = m_Counts[j];
    posteriorsArray[j] = (m_Priors[j] + 1) / (sumForPriors);
    for(attIndex = 0; attIndex < instanceIndex.m_NumSeqAttsSet; attIndex++) {
      AIndex = instanceIndex.m_SequentialAttIndexes[attIndex];
      sumForCounts = Utils.sum(countsPointer[AIndex]);
      if (!instance.isMissing(AIndex)) {
        posteriorsArray[j] *= ((countsPointer[AIndex][(int)instance.value(AIndex)] + 1) / (sumForCounts + (double)instance.attribute(AIndex).numValues()));
      }
    }
  }
  
  // Normalize probabilities
  Utils.normalize(posteriorsArray);
  
  return posteriorsArray;
}
 
Example 11
Source File: RotationForest.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Calculates the class membership probabilities for the given test
  * instance.
  *
  * @param instance the instance to be classified
  * @return preedicted class probability distribution
  * @throws Exception if distribution can't be computed successfully 
  */
 public double[] distributionForInstance(Instance instance) throws Exception {

   m_RemoveUseless.input(instance);
   instance =m_RemoveUseless.output();
   m_RemoveUseless.batchFinished();

   m_Normalize.input(instance);
   instance =m_Normalize.output();
   m_Normalize.batchFinished();

   double [] sums = new double [instance.numClasses()], newProbs; 
   
   for (int i = 0; i < m_Classifiers.length; i++) {
     Instance convertedInstance = convertInstance(instance, i);
     if (instance.classAttribute().isNumeric() == true) {
sums[0] += m_Classifiers[i].classifyInstance(convertedInstance);
     } else {
newProbs = m_Classifiers[i].distributionForInstance(convertedInstance);
for (int j = 0; j < newProbs.length; j++)
  sums[j] += newProbs[j];
     }
   }
   if (instance.classAttribute().isNumeric() == true) {
     sums[0] /= (double)m_NumIterations;
     return sums;
   } else if (Utils.eq(Utils.sum(sums), 0)) {
     return sums;
   } else {
     Utils.normalize(sums);
     return sums;
   }
 }
 
Example 12
Source File: HoeffdingTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns class probabilities for an instance.
 * 
 * @param instance the instance to compute the distribution for
 * @return the class probabilities
 * @throws Exception if distribution can't be computed successfully
 */
@Override
public double[] distributionForInstance(Instance inst) throws Exception {

  Attribute classAtt = inst.classAttribute();
  double[] pred = new double[classAtt.numValues()];

  if (m_root != null) {
    LeafNode l = m_root.leafForInstance(inst, null, null);
    HNode actualNode = l.m_theNode;

    if (actualNode == null) {
      actualNode = l.m_parentNode;
    }

    pred = actualNode.getDistribution(inst, classAtt);

  } else {
    // all class values equally likely
    for (int i = 0; i < classAtt.numValues(); i++) {
      pred[i] = 1;
    }
    Utils.normalize(pred);
  }

  // Utils.normalize(pred);
  return pred;
}
 
Example 13
Source File: RandomTree.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Computes class distribution of an instance using the decision tree.
 * 
 * @param instance the instance to compute the distribution for
 * @return the computed class distribution
 * @throws Exception if computation fails
 */
public double[] distributionForInstance(Instance instance) throws Exception {

  double[] returnedDist = null;

  if (m_Attribute > -1) {

    // Node is not a leaf
    if (instance.isMissing(m_Attribute)) {

      // Value is missing
      returnedDist = new double[m_Info.numClasses()];

      // Split instance up
      for (int i = 0; i < m_Successors.length; i++) {
        double[] help = m_Successors[i].distributionForInstance(instance);
        if (help != null) {
          for (int j = 0; j < help.length; j++) {
            returnedDist[j] += m_Prop[i] * help[j];
          }
        }
      }
    } else if (m_Info.attribute(m_Attribute).isNominal()) {

      // For nominal attributes
      returnedDist = m_Successors[(int) instance.value(m_Attribute)]
          .distributionForInstance(instance);
    } else {

      // For numeric attributes
      if (instance.value(m_Attribute) < m_SplitPoint) {
        returnedDist = m_Successors[0].distributionForInstance(instance);
      } else {
        returnedDist = m_Successors[1].distributionForInstance(instance);
      }
    }
  }

  // Node is a leaf or successor is empty?
  if ((m_Attribute == -1) || (returnedDist == null)) {

    // Is node empty?
    if (m_ClassDistribution == null) {
      if (getAllowUnclassifiedInstances()) {
        return new double[m_Info.numClasses()];
      } else {
        return null;
      }
    }

    // Else return normalized distribution
    double[] normalizedDistribution = m_ClassDistribution.clone();
    Utils.normalize(normalizedDistribution);
    return normalizedDistribution;
  } else {
    return returnedDist;
  }
}
 
Example 14
Source File: LPS.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
     * Computes class distribution of an instance using the decision tree.
     * 
     * @param instance the instance to compute the distribution for
     * @return the computed class distribution
     * @throws Exception if computation fails
     */
    public double[] distributionForInstance(Instance instance) throws Exception {

      double[] returnedDist = null;

      if(m_Attribute > -1) {
        // Node is not a leaf
        if (instance.isMissing(m_Attribute)) {

          // Value is missing
          returnedDist = new double[m_Info.numClasses()];

          // Split instance up
          for (int i = 0; i < m_Successors.length; i++) {
            double[] help = m_Successors[i].distributionForInstance(instance);
            if (help != null) {
              for (int j = 0; j < help.length; j++) {
                returnedDist[j] += m_Prop[i] * help[j];
              }
            }
          }
        } else if (m_Info.attribute(m_Attribute).isNominal()) {

          // For nominal attributes
          returnedDist = m_Successors[(int) instance.value(m_Attribute)]
            .distributionForInstance(instance);
        } else {

          // For numeric attributes
          if (instance.value(m_Attribute) < m_SplitPoint) {
            returnedDist = m_Successors[0].distributionForInstance(instance);
          } else {
            returnedDist = m_Successors[1].distributionForInstance(instance);
          }
        }
      }

      // Node is a leaf or successor is empty?
      if ((m_Attribute == -1) || (returnedDist == null)) {
        lastNode=leafNodeID;
//          System.out.println("Setting last node ="+leafNodeID);
        // Is node empty?
        if (m_ClassDistribution == null) {
          if (getAllowUnclassifiedInstances()) {
            double[] result = new double[m_Info.numClasses()];
            if (m_Info.classAttribute().isNumeric()) {
              result[0] = Utils.missingValue();
            }
            return result;
          } else {
            return null;
          }
        }

        // Else return normalized distribution
        double[] normalizedDistribution = m_ClassDistribution.clone();
        if (m_Info.classAttribute().isNominal()) {
          Utils.normalize(normalizedDistribution);
        }
        return normalizedDistribution;
      } else {
        return returnedDist;
      }
    }
 
Example 15
Source File: IBk.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Turn the list of nearest neighbors into a probability distribution.
  *
  * @param neighbours the list of nearest neighboring instances
  * @param distances the distances of the neighbors
  * @return the probability distribution
  * @throws Exception if computation goes wrong or has no class attribute
  */
 protected double [] makeDistribution(Instances neighbours, double[] distances)
   throws Exception {

   double total = 0, weight;
   double [] distribution = new double [m_NumClasses];
   
   // Set up a correction to the estimator
   if (m_ClassType == Attribute.NOMINAL) {
     for(int i = 0; i < m_NumClasses; i++) {
distribution[i] = 1.0 / Math.max(1,m_Train.numInstances());
     }
     total = (double)m_NumClasses / Math.max(1,m_Train.numInstances());
   }

   for(int i=0; i < neighbours.numInstances(); i++) {
     // Collect class counts
     Instance current = neighbours.instance(i);
     distances[i] = distances[i]*distances[i];
     distances[i] = Math.sqrt(distances[i]/m_NumAttributesUsed);
     switch (m_DistanceWeighting) {
       case WEIGHT_INVERSE:
         weight = 1.0 / (distances[i] + 0.001); // to avoid div by zero
         break;
       case WEIGHT_SIMILARITY:
         weight = 1.0 - distances[i];
         break;
       default:                                 // WEIGHT_NONE:
         weight = 1.0;
         break;
     }
     weight *= current.weight();
     try {
       switch (m_ClassType) {
         case Attribute.NOMINAL:
           distribution[(int)current.classValue()] += weight;
           break;
         case Attribute.NUMERIC:
           distribution[0] += current.classValue() * weight;
           break;
       }
     } catch (Exception ex) {
       throw new Error("Data has no class attribute!");
     }
     total += weight;      
   }

   // Normalise distribution
   if (total > 0) {
     Utils.normalize(distribution, total);
   }
   return distribution;
 }
 
Example 16
Source File: DTNB.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
   * Calculates the class membership probabilities for the given 
   * test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @exception Exception if distribution can't be computed
   */
  public double [] distributionForInstance(Instance instance)
  throws Exception {

    DecisionTableHashKey thekey;
    double [] tempDist;
    double [] normDist;

    m_disTransform.input(instance);
    m_disTransform.batchFinished();
    instance = m_disTransform.output();

    m_delTransform.input(instance);
    m_delTransform.batchFinished();
    Instance dtInstance = m_delTransform.output();

    thekey = new DecisionTableHashKey(dtInstance, dtInstance.numAttributes(), false);

    // if this one is not in the table
    if ((tempDist = (double [])m_entries.get(thekey)) == null) {
      if (m_useIBk) {
	tempDist = m_ibk.distributionForInstance(dtInstance);
      } else {  
	// tempDist = new double [m_theInstances.classAttribute().numValues()];
//	tempDist[(int)m_majority] = 1.0;
	
	tempDist = m_classPriors.clone();
	// return tempDist; ??????
      }
    } else {
      // normalise distribution
      normDist = new double [tempDist.length];
      System.arraycopy(tempDist,0,normDist,0,tempDist.length);
      Utils.normalize(normDist);
      tempDist = normDist;			
    }

    double [] nbDist = m_NB.distributionForInstance(instance);
    for (int i = 0; i < nbDist.length; i++) {
      tempDist[i] = (Math.log(tempDist[i]) - Math.log(m_classPriors[i]));
      tempDist[i] += Math.log(nbDist[i]);

      /*tempDist[i] *= nbDist[i];
      tempDist[i] /= m_classPriors[i];*/
    }
    tempDist = Utils.logs2probs(tempDist);
    Utils.normalize(tempDist);

    return tempDist;
  }
 
Example 17
Source File: HNB.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Calculates the class membership probabilities for the given test instance
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 * @exception Exception if there is a problem generating the prediction
 */
public double[] distributionForInstance(Instance instance) throws Exception {

  //Definition of local variables
  double[] probs = new double[m_NumClasses];
  int sIndex;
  double prob;
  double condiMutualInfoSum;

  // store instance's att values in an int array
  int[] attIndex = new int[m_NumAttributes];
  for(int att = 0; att < m_NumAttributes; att++) {
    if(att == m_ClassIndex)
      attIndex[att] = -1;
    else
      attIndex[att] = m_StartAttIndex[att] + (int)instance.value(att);
  }

  // calculate probabilities for each possible class value
  for(int classVal = 0; classVal < m_NumClasses; classVal++) {
    probs[classVal]=(m_ClassCounts[classVal]+1.0/m_NumClasses)/(m_NumInstances+1.0);
    for(int son = 0; son < m_NumAttributes; son++) {
      if(attIndex[son]==-1) continue;
      sIndex=attIndex[son];
      attIndex[son]=-1;
      prob=0;
      condiMutualInfoSum=0;
      for(int parent=0; parent<m_NumAttributes; parent++) {
        if(attIndex[parent]==-1) continue;
        condiMutualInfoSum+=m_condiMutualInfo[son][parent];
        prob+=m_condiMutualInfo[son][parent]*(m_ClassAttAttCounts[classVal][attIndex[parent]][sIndex]+1.0/m_NumAttValues[son])/(m_ClassAttAttCounts[classVal][attIndex[parent]][attIndex[parent]] + 1.0);
      }
      if(condiMutualInfoSum>0){
        prob=prob/condiMutualInfoSum;
        probs[classVal] *= prob;
      }
      else{
        prob=(m_ClassAttAttCounts[classVal][sIndex][sIndex]+1.0/m_NumAttValues[son])/(m_ClassCounts[classVal]+1.0);
        probs[classVal]*= prob;
      }
      attIndex[son] = sIndex;
    }
  }
  Utils.normalize(probs);
  return probs;
}
 
Example 18
Source File: CollectiveTree.java    From collective-classification-weka-package with GNU General Public License v3.0 4 votes vote down vote up
/**
 * determines the distribution of the instances with a non-missing value
 * at the given attribute position.
 * @param data        the instances to work on
 * @param indices     the sorted indices
 * @param att         the attribute to determine the distribution for
 * @return            the distribution
 */
protected double[] determineAttributeDistribution( Instances data,
                                                   int[] indices,
                                                   int att) {
  double[]      result;
  int           i;
  Instance      inst;
  int           count;
  double[]      values;
  double        median;

  // nominal attribute
  if (data.attribute(att).isNominal()) {
    result = new double[data.attribute(att).numValues()];

    // determine attribute distribution (necessary to distribute instances
    // with no class and missing attribute)
    for (i = 0; i < indices.length; i++) {
      inst = data.instance(indices[i]);
      if (inst.isMissing(att))
        break;
      result[(int) inst.value(att)] += inst.weight();
    }
  }
  // numeric attribute
  else {
    result = new double[2];   // less or greater/equal than median

    // determine number of instances w/o missing attribute
    count = 0;
    for (i = 0; i < indices.length; i++) {
      inst = data.instance(indices[i]);
      if (inst.isMissing(att))
        break;
      count++;
    }

    // determine median
    values = new double[count];
    for (i = 0; i < count; i++) {
      inst      = data.instance(indices[i]);
      values[i] = inst.value(att);
    }
    if (values.length == 0)
      median = 0;
    else if (values.length == 1)
      median = values[0];
    else
      median = Utils.kthSmallestValue(values, values.length / 2);

    // disitribute
    for (i = 0; i < count; i++) {
      inst = data.instance(indices[i]);
      if (Utils.sm(inst.value(att), median))
        result[0] += inst.weight();
      else
        result[1] += inst.weight();
    }
  }

  if (Utils.gr(Utils.sum(result), 0))
    Utils.normalize(result);

  return result;
}
 
Example 19
Source File: MultiClassClassifierUpdateable.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Returns the distribution for an instance.
 * 
 * @param inst the instance to get the distribution for
 * @return the distribution
 * @throws Exception if the distribution can't be computed successfully
 */
@Override
public double[] distributionForInstance(Instance inst) throws Exception {

  if (m_Classifiers.length == 1) {
    return m_Classifiers[0].distributionForInstance(inst);
  }

  double[] probs = new double[inst.numClasses()];
  if (m_Method == METHOD_1_AGAINST_1) {
    double[][] r = new double[inst.numClasses()][inst.numClasses()];
    double[][] n = new double[inst.numClasses()][inst.numClasses()];

    for (int i = 0; i < m_ClassFilters.length; i++) {
      if (m_Classifiers[i] != null && m_SumOfWeights[i] > 0) {
        Instance tempInst = (Instance) inst.copy();
        tempInst.setDataset(m_TwoClassDataset);
        double[] current = m_Classifiers[i].distributionForInstance(tempInst);
        Range range = new Range(
            ((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
        range.setUpper(m_ClassAttribute.numValues());
        int[] pair = range.getSelection();
        if (m_pairwiseCoupling && inst.numClasses() > 2) {
          r[pair[0]][pair[1]] = current[0];
          n[pair[0]][pair[1]] = m_SumOfWeights[i];
        } else {
          if (current[0] > current[1]) {
            probs[pair[0]] += 1.0;
          } else {
            probs[pair[1]] += 1.0;
          }
        }
      }
    }
    if (m_pairwiseCoupling && inst.numClasses() > 2) {
      try {
        return pairwiseCoupling(n, r);
      } catch (IllegalArgumentException ex) {
      }
    }
    if (Utils.gr(Utils.sum(probs), 0)) {
      Utils.normalize(probs);
    }
    return probs;
  } else {
    probs = super.distributionForInstance(inst);
  }

  /*
   * if (probs.length == 1) { // ZeroR made the prediction return new
   * double[m_ClassAttribute.numValues()]; }
   */

  return probs;
}
 
Example 20
Source File: SimpleKMeansWithSilhouette.java    From apogen with Apache License 2.0 4 votes vote down vote up
/**
 * Initialize using the k-means++ method
 * 
 * @param data
 *            the training data
 * @throws Exception
 *             if a problem occurs
 */
protected void kMeansPlusPlusInit(Instances data) throws Exception {
	Random randomO = new Random(getSeed());
	HashMap<DecisionTableHashKey, String> initC = new HashMap<DecisionTableHashKey, String>();

	// choose initial center uniformly at random
	int index = randomO.nextInt(data.numInstances());
	m_ClusterCentroids.add(data.instance(index));
	DecisionTableHashKey hk = new DecisionTableHashKey(data.instance(index), data.numAttributes(), true);
	initC.put(hk, null);

	int iteration = 0;
	int remainingInstances = data.numInstances() - 1;
	if (m_NumClusters > 1) {
		// proceed with selecting the rest

		// distances to the initial randomly chose center
		double[] distances = new double[data.numInstances()];
		double[] cumProbs = new double[data.numInstances()];
		for (int i = 0; i < data.numInstances(); i++) {
			distances[i] = m_DistanceFunction.distance(data.instance(i), m_ClusterCentroids.instance(iteration));
		}

		// now choose the remaining cluster centers
		for (int i = 1; i < m_NumClusters; i++) {

			// distances converted to probabilities
			double[] weights = new double[data.numInstances()];
			System.arraycopy(distances, 0, weights, 0, distances.length);
			Utils.normalize(weights);

			double sumOfProbs = 0;
			for (int k = 0; k < data.numInstances(); k++) {
				sumOfProbs += weights[k];
				cumProbs[k] = sumOfProbs;
			}

			cumProbs[data.numInstances() - 1] = 1.0; // make sure there are no
														// rounding issues

			// choose a random instance
			double prob = randomO.nextDouble();
			for (int k = 0; k < cumProbs.length; k++) {
				if (prob < cumProbs[k]) {
					Instance candidateCenter = data.instance(k);
					hk = new DecisionTableHashKey(candidateCenter, data.numAttributes(), true);
					if (!initC.containsKey(hk)) {
						initC.put(hk, null);
						m_ClusterCentroids.add(candidateCenter);
					} else {
						// we shouldn't get here because any instance that is a duplicate
						// of
						// an already chosen cluster center should have zero distance (and
						// hence
						// zero probability of getting chosen) to that center.
						System.err.println("We shouldn't get here....");
					}
					remainingInstances--;
					break;
				}
			}
			iteration++;

			if (remainingInstances == 0) {
				break;
			}

			// prepare to choose the next cluster center.
			// check distances against the new cluster center to see if it is closer
			for (int k = 0; k < data.numInstances(); k++) {
				if (distances[k] > 0) {
					double newDist = m_DistanceFunction.distance(data.instance(k),
							m_ClusterCentroids.instance(iteration));
					if (newDist < distances[k]) {
						distances[k] = newDist;
					}
				}
			}
		}
	}
}