Java Code Examples for weka.core.Instances#numClasses()

The following examples show how to use weka.core.Instances#numClasses() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TTC.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception{
        String dataset = "Trace";
        Instances inst = DatasetLoading.loadDataNullable("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TRAIN.arff");
        Instances inst2 = DatasetLoading.loadDataNullable("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TEST.arff");
//        Instances inst = ClassifierTools.loadData("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TRAIN.arff");
//        Instances inst2 = ClassifierTools.loadData("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TEST.arff");
        inst.setClassIndex(inst.numAttributes()-1);
        inst.addAll(inst2);

        TTC k = new TTC();
        k.seed = 0;
        k.k = inst.numClasses();
        k.buildClusterer(inst);

        System.out.println(k.clusters.length);
        System.out.println(Arrays.toString(k.clusters));
        System.out.println(randIndex(k.assignments, inst));
    }
 
Example 2
Source File: ShapeletTransformTimingUtilities.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public static ShapeletFilter createTransform(Instances train){
    int numClasses = train.numClasses();
    int numInstances = train.numInstances() <= 2000 ? train.numInstances() : 2000;
    int numAttributes = train.numAttributes()-1;
    
    ShapeletFilter transform;
    if(numClasses == 2){
        transform = new ShapeletFilter();
    }else{
        transform = new BalancedClassShapeletFilter();
        transform.setClassValue(new BinaryClassValue());
    }
    
    //transform.setSubSeqDistance(new ImprovedOnlineShapeletDistance());
    transform.setShapeletMinAndMax(3, numAttributes);
    transform.setNumberOfShapelets(numInstances);
    transform.useCandidatePruning();
    transform.turnOffLog();
    transform.setRoundRobin(true);
    transform.supressOutput();
    
    return transform;
}
 
Example 3
Source File: FlipHistory.java    From collective-classification-weka-package with GNU General Public License v3.0 6 votes vote down vote up
/**
 * initializes the history
 * 
 * @param inst	the instance to initialize with
 */
public FlipHistory(Instances inst) {
  int       i;
  
  // create arrays
  m_Instances = new Instance[inst.numInstances()];
  m_Last      = new double[inst.numInstances()][inst.numClasses()];
  m_Average   = new double[inst.numInstances()][inst.numClasses()];
  m_Count     = new int[inst.numInstances()];

  // sort
  for (i = 0; i < inst.numInstances(); i++)
    m_Instances[i] = (Instance) inst.instance(i).copy();
  Arrays.sort(m_Instances, m_Comparator);

  // init
  for (i = 0; i < m_Instances.length; i++) {
    m_Last[i][(int) m_Instances[i].classValue()]    = 1.0;
    m_Average[i][(int) m_Instances[i].classValue()] = 1.0;
  }
}
 
Example 4
Source File: UnsupervisedShapelets.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception{
    String dataset = "Trace";
    Instances inst = DatasetLoading.loadDataNullable("Z:\\ArchiveData\\Univariate_arff\\"+dataset+"\\"+dataset+"_TRAIN.arff");
    Instances inst2 = DatasetLoading.loadDataNullable("Z:\\ArchiveData\\Univariate_arff\\"+dataset+"\\"+dataset+"_TEST.arff");
    inst.setClassIndex(inst.numAttributes()-1);
    inst.addAll(inst2);

    UnsupervisedShapelets us = new UnsupervisedShapelets();
    us.seed = 0;
    us.k = inst.numClasses();
    us.buildClusterer(inst);

    System.out.println(us.clusters.length);
    System.out.println(Arrays.toString(us.assignments));
    System.out.println(Arrays.toString(us.clusters));
    System.out.println(randIndex(us.assignments, inst));
}
 
Example 5
Source File: LPS.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Backfits the given data into the tree.
 */
public void backfitData(Instances data) throws Exception {

  double totalWeight = 0;
  double totalSumSquared = 0;

  // Compute initial class counts
  double[] classProbs = new double[data.numClasses()];
  for (int i = 0; i < data.numInstances(); i++) {
    Instance inst = data.instance(i);
    if (data.classAttribute().isNominal()) {
      classProbs[(int) inst.classValue()] += inst.weight();
      totalWeight += inst.weight();
    } else {
      classProbs[0] += inst.classValue() * inst.weight();
      totalSumSquared += inst.classValue() * inst.classValue()
        * inst.weight();
      totalWeight += inst.weight();
    }
  }

  double trainVariance = 0;
  if (data.classAttribute().isNumeric()) {
    trainVariance = RandomRegressionTree.singleVariance(classProbs[0],
      totalSumSquared, totalWeight) / totalWeight;
    classProbs[0] /= totalWeight;
  }

  // Fit data into tree
  backfitData(data, classProbs, totalWeight);
}
 
Example 6
Source File: Sampling.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * Reorder the dataset by its largest class
 * @param data
 * @return
 */
public static Instances orderByLargestClass(Instances data) {
	Instances newData = new Instances(data, data.numInstances());
	
	// get the number of class in the data
	int nbClass = data.numClasses();
	int[] instancePerClass = new int[nbClass];
	int[] labels = new int[nbClass];
	int[] classIndex = new int[nbClass];
	
	// sort the data base on its class
	data.sort(data.classAttribute());
	
	// get the number of instances per class in the data
	for (int i = 0; i < nbClass; i++) {
		instancePerClass[i] = data.attributeStats(data.classIndex()).nominalCounts[i];
		labels[i] = i;
		if (i > 0)
			classIndex[i] = classIndex[i-1] + instancePerClass[i-1];
	}
	QuickSort.sort(instancePerClass, labels);
	
	for (int i = nbClass-1; i >=0 ; i--) {
		for (int j = 0; j < instancePerClass[i]; j++) {
			newData.add(data.instance(classIndex[labels[i]] + j));
		}
	}
	
	return newData;
}
 
Example 7
Source File: Ridor.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Builds a single rule learner with REP dealing with 2 classes.
    * This rule learner always tries to predict the class with label 
    * m_Class.
    *
    * @param instances the training data
    * @throws Exception if classifier can't be built successfully
    */
   public void buildClassifier(Instances instances) throws Exception {
     m_ClassAttribute = instances.classAttribute();
     if (!m_ClassAttribute.isNominal()) 
throw new UnsupportedClassTypeException(" Only nominal class, please.");
     if(instances.numClasses() != 2)
throw new Exception(" Only 2 classes, please.");
    
     Instances data = new Instances(instances);
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" No training data.");
    
     data.deleteWithMissingClass();
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" The class labels of all the training data are missing.");	
    
     if(data.numInstances() < m_Folds)
throw new Exception(" Not enough data for REP.");
    
     m_Antds = new FastVector();	
    
     /* Split data into Grow and Prune*/
     m_Random = new Random(m_Seed);
     data.randomize(m_Random);
     data.stratify(m_Folds);
     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);
    
     grow(growData);      // Build this rule
    
     prune(pruneData);    // Prune this rule
   }
 
Example 8
Source File: TunedXGBoost.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
    public void buildClassifier(Instances insts) throws Exception {
//        long startTime=System.nanoTime(); 
        long startTime=System.nanoTime(); 

        booster = null;
        trainResults =new ClassifierResults();

        trainInsts = new Instances(insts);
        numTrainInsts = insts.numInstances();
        numAtts = insts.numAttributes();
        numClasses = insts.numClasses();

        if(cvFolds>numTrainInsts)
            cvFolds=numTrainInsts;
//        rng = new Random(seed); //for tie resolution etc if needed

        buildActualClassifer();

        if(getEstimateOwnPerformance()&& !tuneParameters) //if tuneparas, will take the cv results of the best para set
            trainResults = estimateTrainAcc(trainInsts);

        if(saveEachParaAcc)
            trainResults.setBuildTime(combinedBuildTime);
        else
            trainResults.setBuildTime(System.nanoTime()-startTime);
//            trainResults.buildTime=System.nanoTime()-startTime;

        trainResults.setTimeUnit(TimeUnit.NANOSECONDS);
        trainResults.setClassifierName(tuneParameters ? "TunedXGBoost" : "XGBoost");
        trainResults.setDatasetName(trainInsts.relationName());
        trainResults.setParas(getParameters());
    }
 
Example 9
Source File: RnnTextEmbeddingInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void testOutputFormat() throws Exception {
  Instances data = DatasetLoader.loadReutersMinimal();
  for (int tl : Arrays.asList(10, 50, 200)) {
    tii.setTruncateLength(tl);
    for (int bs : Arrays.asList(1, 4, 8, 16)) {
      final DataSetIterator it = tii.getDataSetIterator(data, TestUtil.SEED, bs);
      assertEquals(bs, it.batch());
      assertEquals(Arrays.asList("0", "1"), it.getLabels());
      final DataSet next = Utils.getNext(it);

      // Check feature shape, expect: (batchsize x wordvecsize x sequencelength)
      final long[] shapeFeats = next.getFeatures().shape();
      final long[] expShapeFeats = {bs, WORD_VEC_SIZE, tl};
      assertEquals(expShapeFeats[0], shapeFeats[0]);
      assertEquals(expShapeFeats[1], shapeFeats[1]);
      assertTrue(expShapeFeats[2] >= shapeFeats[2]);

      // Check label shape, expect: (batchsize x numclasses x sequencelength)
      final long[] shapeLabels = next.getLabels().shape();
      final long[] expShapeLabels = {bs, data.numClasses(), tl};
      assertEquals(expShapeLabels[0], shapeLabels[0]);
      assertEquals(expShapeLabels[1], shapeLabels[1]);
      assertTrue(expShapeLabels[2] >= shapeLabels[2]);
    }
  }
}
 
Example 10
Source File: InstanceTools.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * by Tony
 * Public method to calculate the class distributions of a dataset.
 */
public static double[] findClassDistributions(Instances data)
{
    double[] dist=new double[data.numClasses()];
    for(Instance d:data)
        dist[(int)d.classValue()]++;
    for(int i=0;i<dist.length;i++)
        dist[i]/=data.numInstances();
    return dist;
}
 
Example 11
Source File: InstanceTools.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public static double[] classDistribution(Instances instances) {
    double[] distribution = new double[instances.numClasses()];
    for(Instance instance : instances) {
        distribution[(int) instance.classValue()]++;
    }
    normalise(distribution);
    return distribution;
}
 
Example 12
Source File: MLPipeline.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {

	/* reduce dimensionality */
	long start;
	int numAttributesBefore = data.numAttributes();
	logger.info("Starting to build the preprocessors of the pipeline.");

	for (SupervisedFilterSelector pp : this.preprocessors) {

		/* if the filter has not been trained yet, do so now and store it */
		if (!pp.isPrepared()) {
			try {
				start = System.currentTimeMillis();
				pp.prepare(data);
				this.timeForTrainingPreprocessors = (int) (System.currentTimeMillis() - start);
				int newNumberOfClasses = pp.apply(data).numClasses();
				if (data.numClasses() != newNumberOfClasses) {
					logger.info("{} changed number of classes from {} to {}", pp.getSelector(), data.numClasses(), newNumberOfClasses);
				}
			} catch (NullPointerException e) {
				logger.error("Could not apply preprocessor", e);
			}
		}

		/* now apply the attribute selector */
		data = pp.apply(data);
	}
	logger.info("Reduced number of attributes from {} to {}", numAttributesBefore, data.numAttributes());

	/* build classifier based on reduced data */
	start = System.currentTimeMillis();
	super.getClassifier().buildClassifier(data);
	this.timeForTrainingClassifier = (int) (System.currentTimeMillis() - start);
	this.trained = true;
	this.timeForExecutingPreprocessors = new DescriptiveStatistics();
	this.timeForExecutingClassifier = new DescriptiveStatistics();
}
 
Example 13
Source File: AbstractTextEmbeddingIterator.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Create a sentence provider from the given data.
 *
 * @param data Data
 * @return Sentence provider
 */
public LabeledSentenceProvider getSentenceProvider(Instances data) {
  List<String> sentences = new ArrayList<>();
  List<String> labels = new ArrayList<>();
  final int clsIdx = data.classIndex();
  for (Instance inst : data) {
    labels.add(String.valueOf(inst.value(clsIdx)));
    sentences.add(inst.stringValue(1 - clsIdx));
  }
  return new CollectionLabeledSentenceProvider(sentences, labels, data.numClasses());
}
 
Example 14
Source File: Utils.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Converts a set of training instances to a DataSet. Assumes that the instances have been
 * suitably preprocessed - i.e. missing values replaced and nominals converted to binary/numeric.
 * Also assumes that the class index has been set
 *
 * @param insts the instances to convert
 * @return a DataSet
 */
public static DataSet instancesToDataSet(Instances insts) {
  INDArray data = Nd4j.zeros(insts.numInstances(), insts.numAttributes() - 1);
  INDArray outcomes = Nd4j.zeros(insts.numInstances(), insts.numClasses());

  for (int i = 0; i < insts.numInstances(); i++) {
    double[] independent = new double[insts.numAttributes() - 1];
    double[] dependent = new double[insts.numClasses()];
    Instance current = insts.instance(i);
    for (int j = 0; j < current.numValues(); j++) {
      int index = current.index(j);
      double value = current.valueSparse(j);

      if (index < insts.classIndex()) {
        independent[index] = value;
      } else if (index > insts.classIndex()) {
        // Shift by -1, since the class is left out from the feature matrix and put into a separate
        // outcomes matrix
        independent[index - 1] = value;
      }
    }

    // Set class values
    if (insts.numClasses() > 1) { // Classification
      final int oneHotIdx = (int) current.classValue();
      dependent[oneHotIdx] = 1.0;
    } else { // Regression (currently only single class)
      dependent[0] = current.classValue();
    }

    INDArray row = Nd4j.create(independent);
    data.putRow(i, row);
    outcomes.putRow(i, Nd4j.create(dependent));
  }
  return new DataSet(data, outcomes);
}
 
Example 15
Source File: Ridor.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Builds a ripple-down manner rule learner.
 *
 * @param instances the training data
 * @throws Exception if classifier can't be built successfully
 */
public void buildClassifier(Instances instances) throws Exception {

  // can classifier handle the data?
  getCapabilities().testWithFail(instances);

  // remove instances with missing class
  Instances data = new Instances(instances);
  data.deleteWithMissingClass();
  
  int numCl = data.numClasses();
  m_Root = new Ridor_node();
  m_Class = instances.classAttribute();     // The original class label
	
  int index = data.classIndex();
  m_Cover = data.sumOfWeights();

  m_Random = new Random(m_Seed);
	
  /* Create a binary attribute */
  FastVector binary_values = new FastVector(2);
  binary_values.addElement("otherClasses");
  binary_values.addElement("defClass");
  Attribute attr = new Attribute ("newClass", binary_values);
  data.insertAttributeAt(attr, index);	
  data.setClassIndex(index);                 // The new class label

  /* Partition the data into bags according to their original class values */
  Instances[] dataByClass = new Instances[numCl];
  for(int i=0; i < numCl; i++)
    dataByClass[i] = new Instances(data, data.numInstances()); // Empty bags
  for(int i=0; i < data.numInstances(); i++){ // Partitioning
    Instance inst = data.instance(i);
    inst.setClassValue(0);           // Set new class vaue to be 0
    dataByClass[(int)inst.value(index+1)].add(inst); 
  }	
	
  for(int i=0; i < numCl; i++)    
    dataByClass[i].deleteAttributeAt(index+1);   // Delete original class
	
  m_Root.findRules(dataByClass, 0);
  
}
 
Example 16
Source File: FastShapelets.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public void train(Instances data, int R, int top_k) {
    int sax_max_len, sax_len, w;
    int max_len = data.numAttributes() - 1, min_len = 10, step = 1; //consider whole search space.

    double percent_mask;
    Shapelet sh;

    rand = new Random(seed);

    numClass = data.numClasses();
    numObj = data.numInstances();

    sax_max_len = 15;
    percent_mask = 0.25;
    //R = 10;
    //top_k = 10;

    readTrainData(data);

    //initialise our data structures.
    nodeObjList = new ArrayList<>();
    finalSh = new ArrayList<>();
    uSAXMap = new HashMap<>();
    scoreList = new ArrayList<>();
    classifyList = new ArrayList<>();

    /// Find Shapelet
    for (int node_id = 1; (node_id == 1) || (node_id < nodeObjList.size()); node_id++) {
        Shapelet bsf_sh = new Shapelet();
        if (node_id <= 1) {
            setCurData(node_id);
        } else if (classifyList.get(node_id) == -1) { /// non-leaf node (-1:body node, -2:unused node)
            setCurData(node_id);
        } else {
            continue;
        }

        //3 to series length.
        for (subseqLength = min_len; subseqLength <= max_len; subseqLength += step) {
            /// Shapelet cannot be too short, e.g. len=1.
            if (subseqLength < SH_MIN_LEN) {
                continue;
            }

            sax_len = sax_max_len;
            /// Make w and sax_len both integer
            w = (int) Math.ceil(1.0 * subseqLength / sax_len);
            sax_len = (int) Math.ceil(1.0 * subseqLength / w);

            createSAXList(subseqLength, sax_len, w);

            randomProjection(R, percent_mask, sax_len);
            scoreAllSAX(R);

            sh = findBestSAX(top_k);

            if (bsf_sh.lessThan(sh)) {
                bsf_sh = sh;
            }

            uSAXMap.clear();
            scoreList.clear();
        }

        if (bsf_sh.len > 0) {
            double[] query = new double[bsf_sh.len];
            for (int i = 0; i < bsf_sh.len; i++) {
                query[i] = this.data.get(bsf_sh.obj).get(bsf_sh.pos + i);
            }

            bsf_sh.setTS(query);
            finalSh.add(bsf_sh);
            /// post-processing: create tree
            setNextNodeObj(node_id, bsf_sh);
        }
    }
}
 
Example 17
Source File: Sampling.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/** 
 * Reorder the data by compactness of each class using Euclidean distance
 * @param data
 * @return
 */
public static Instances orderByCompactClass(Instances data) {
	Instances newData = new Instances(data, data.numInstances());
	
	// get the number of class in the data
	int nbClass = data.numClasses();
	int[] instancePerClass = new int[nbClass];
	int[] labels = new int[nbClass];
	int[] classIndex = new int[nbClass];
	double[] compactness = new double[nbClass];
	
	// sort the data base on its class
	data.sort(data.classAttribute());
	
	int start = 0;
	// get the number of instances per class in the data
	for (int i = 0; i < nbClass; i++) {
		instancePerClass[i] = data.attributeStats(data.classIndex()).nominalCounts[i];
		labels[i] = i;
		if (i > 0) 
			classIndex[i] = classIndex[i-1] + instancePerClass[i-1];
		int end = start + instancePerClass[i];
		int counter = 0;
		double[][] dataPerClass = new double[instancePerClass[i]][data.numAttributes()-1];
		for (int j = start; j < end; j++) {
			dataPerClass[counter++] = data.instance(j).toDoubleArray();
		}
		double[] mean = arithmeticMean(dataPerClass);
		double d = 0;
		for (int j = 0; j < instancePerClass[i]; j++) {
			double temp = euclideanDistance(mean, dataPerClass[j]);
			temp *= temp;
			temp -= (mean[0] - dataPerClass[j][0]) * (mean[0] - dataPerClass[j][0]);
			d += temp;
		}
		compactness[i] = d / instancePerClass[i];
		start = end;
	}
	
	QuickSort.sort(compactness, labels);
	
	for (int i = nbClass-1; i >=0 ; i--) {
		for (int j = 0; j < instancePerClass[labels[i]]; j++) {
			newData.add(data.instance(classIndex[labels[i]] + j));
		}
	}
	
	return newData;
}
 
Example 18
Source File: BFTree.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Generate successor nodes for a node and put them into BestFirstElements 
  * according to gini gain or information gain in a descending order.
  *
  * @param BestFirstElements 	list to store BestFirst nodes
  * @param data 		training instance
  * @param subsetSortedIndices	sorted indices of instances of successor nodes
  * @param subsetWeights 	weights of instances of successor nodes
  * @param dists 		class distributions of successor nodes
  * @param att 		attribute used to split the node
  * @param useHeuristic 	if use heuristic search for nominal attributes in multi-class problem
  * @param useGini 		if use Gini index as splitting criterion
  * @throws Exception 		if something goes wrong 
  */
 protected void makeSuccessors(FastVector BestFirstElements,Instances data,
     int[][][] subsetSortedIndices, double[][][] subsetWeights,
     double[][][] dists,
     Attribute att, boolean useHeuristic, boolean useGini) throws Exception {

   m_Successors = new BFTree[2];

   for (int i=0; i<2; i++) {
     m_Successors[i] = new BFTree();
     m_Successors[i].m_isLeaf = true;

     // class probability and distribution for this successor node
     m_Successors[i].m_ClassProbs = new double[data.numClasses()];
     m_Successors[i].m_Distribution = new double[data.numClasses()];
     System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_ClassProbs,
  0,m_Successors[i].m_ClassProbs.length);
     System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_Distribution,
  0,m_Successors[i].m_Distribution.length);
     if (Utils.sum(m_Successors[i].m_ClassProbs)!=0)
Utils.normalize(m_Successors[i].m_ClassProbs);

     // split information for this successor node
     double[][] props = new double[data.numAttributes()][2];
     double[][][] subDists = new double[data.numAttributes()][2][data.numClasses()];
     double[][] totalSubsetWeights = new double[data.numAttributes()][2];
     FastVector splitInfo = m_Successors[i].computeSplitInfo(m_Successors[i], data,
  subsetSortedIndices[i], subsetWeights[i], subDists, props,
  totalSubsetWeights, useHeuristic, useGini);

     // branch proportion for this successor node
     int splitIndex = ((Attribute)splitInfo.elementAt(1)).index();
     m_Successors[i].m_Props = new double[2];
     System.arraycopy(props[splitIndex], 0, m_Successors[i].m_Props, 0,
  m_Successors[i].m_Props.length);

     // sorted indices and weights of each attribute for this successor node
     m_Successors[i].m_SortedIndices = new int[data.numAttributes()][0];
     m_Successors[i].m_Weights = new double[data.numAttributes()][0];
     for (int j=0; j<m_Successors[i].m_SortedIndices.length; j++) {
m_Successors[i].m_SortedIndices[j] = subsetSortedIndices[i][j];
m_Successors[i].m_Weights[j] = subsetWeights[i][j];
     }

     // distribution of each attribute for this successor node
     m_Successors[i].m_Dists = new double[data.numAttributes()][2][data.numClasses()];
     for (int j=0; j<subDists.length; j++) {
m_Successors[i].m_Dists[j] = subDists[j];
     }

     // total weights for this successor node. 
     m_Successors[i].m_TotalWeight = Utils.sum(totalSubsetWeights[splitIndex]);

     // insert this successor node into BestFirstElements according to gini gain or information gain
     //  descendingly
     if (BestFirstElements.size()==0) {
BestFirstElements.addElement(splitInfo);
     } else {
double gGain = ((Double)(splitInfo.elementAt(3))).doubleValue();
int vectorSize = BestFirstElements.size();
FastVector lastNode = (FastVector)BestFirstElements.elementAt(vectorSize-1);

// If gini gain is less than that of last node in FastVector
if (gGain<((Double)(lastNode.elementAt(3))).doubleValue()) {
  BestFirstElements.insertElementAt(splitInfo, vectorSize);
} else {
  for (int j=0; j<vectorSize; j++) {
    FastVector node = (FastVector)BestFirstElements.elementAt(j);
    double nodeGain = ((Double)(node.elementAt(3))).doubleValue();
    if (gGain>=nodeGain) {
      BestFirstElements.insertElementAt(splitInfo, j);
      break;
    }
  }
}
     }
   }
 }
 
Example 19
Source File: WindowSearcher.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * This is similar to buildClassifier but it is an estimate.
 * This is used for large dataset where it takes very long to run.
 * The main purpose of this is to get the run time and not actually search for the best window.
 * We use this to draw Figure 1 of our SDM18 paper
 *
 * @param data
 * @param estimate
 * @throws Exception
 */
public void buildClassifierEstimate(Instances data, int estimate) throws Exception {
    // Initialise training dataset
    Attribute classAttribute = data.classAttribute();

    classedData = new HashMap <>();
    classedDataIndices = new HashMap <>();
    for (int c = 0; c < data.numClasses(); c++) {
        classedData.put(data.classAttribute().value(c), new ArrayList <SymbolicSequence>());
        classedDataIndices.put(data.classAttribute().value(c), new ArrayList <Integer>());
    }

    train = new SymbolicSequence[data.numInstances()];
    classMap = new String[train.length];
    maxLength = 0;
    for (int i = 0; i < train.length; i++) {
        Instance sample = data.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        maxLength = Math.max(maxLength, sequence.length);
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }
        train[i] = new SymbolicSequence(sequence);
        String clas = sample.stringValue(classAttribute);
        classMap[i] = clas;
        classedData.get(clas).add(train[i]);
        classedDataIndices.get(clas).add(i);
    }

    warpingMatrix = new double[maxLength][maxLength];
    U = new double[maxLength];
    L = new double[maxLength];

    maxWindow = Math.round(1 * maxLength);
    searchResults = new String[maxWindow + 1];
    nns = new int[maxWindow + 1][train.length];
    dist = new double[maxWindow + 1][train.length];

    int[] nErrors = new int[maxWindow + 1];
    double[] score = new double[maxWindow + 1];
    double bestScore = Double.MAX_VALUE;
    double minD;
    bestWarpingWindow = -1;

    // Start searching for the best window.
    // Only loop through a given size of the dataset, but still search for NN from the whole train
    // for every sequence in train, we find NN for all window
    // then in the end, update the best score
    for (int i = 0; i < estimate; i++) {
        SymbolicSequence testSeq = train[i];

        for (int w = 0; w <= maxWindow; w++) {
            testSeq.LB_KeoghFillUL(w, U, L);

            minD = Double.MAX_VALUE;
            String classValue = null;
            for (int j = 0; j < train.length; j++) {
                if (i == j)
                    continue;
                SymbolicSequence trainSeq = train[j];
                if (SymbolicSequence.LB_KeoghPreFilled(trainSeq, U, L) < minD) {
                    double tmpD = testSeq.DTW(trainSeq, w, warpingMatrix);
                    if (tmpD < minD) {
                        minD = tmpD;
                        classValue = classMap[j];
                        nns[w][i] = j;
                    }
                    dist[w][j] = tmpD * tmpD;
                }
            }
            if (classValue == null || !classValue.equals(classMap[i])) {
                nErrors[w]++;
            }
            score[w] = 1.0 * nErrors[w] / train.length;
        }
    }

    for (int w = 0; w < maxWindow; w++) {
        if (score[w] < bestScore) {
            bestScore = score[w];
            bestWarpingWindow = w;
        }
    }

    // Saving best windows found
    System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1 - bestScore));
}
 
Example 20
Source File: RnnSequenceClassifier.java    From wekaDeeplearning4j with GNU General Public License v3.0 4 votes vote down vote up
/**
 * The method to use when making predictions for test instances.
 *
 * @param insts the instances to get predictions for
 * @return the class probability estimates (if the class is nominal) or the numeric predictions
 * (if it is numeric)
 * @throws Exception if something goes wrong at prediction time
 */
@Override
public double[][] distributionsForInstances(Instances insts) throws Exception {

  log.info("Calc. dist for {} instances", insts.numInstances());

  // Do we only have a ZeroR model?
  if (zeroR != null) {
    return zeroR.distributionsForInstances(insts);
  }

  // Process input data to have the same filters applied as the training data
  insts = applyFilters(insts);

  // Get predictions
  final DataSetIterator it = getDataSetIterator(insts, CacheMode.NONE);
  double[][] preds = new double[insts.numInstances()][insts.numClasses()];

  if (it.resetSupported()) {
    it.reset();
  }

  int offset = 0;
  boolean next = it.hasNext();

  // Get predictions batch-wise
  while (next) {
    final DataSet ds = Utils.getNext(it);
    final INDArray features = ds.getFeatures();
    final INDArray labelsMask = ds.getLabelsMaskArray();
    INDArray lastTimeStepIndices;
    if (labelsMask != null) {
      lastTimeStepIndices = Nd4j.argMax(labelsMask, 1);
    } else {
      lastTimeStepIndices = Nd4j.zeros(features.size(0), 1);
    }
    INDArray predBatch = model.outputSingle(features);
    int currentBatchSize = (int) predBatch.size(0);
    for (int i = 0; i < currentBatchSize; i++) {
      int thisTimeSeriesLastIndex = lastTimeStepIndices.getInt(i);
      INDArray thisExampleProbabilities =
          predBatch.get(
              NDArrayIndex.point(i),
              NDArrayIndex.all(),
              NDArrayIndex.point(thisTimeSeriesLastIndex));
      for (int j = 0; j < insts.numClasses(); j++) {
        preds[i + offset][j] = thisExampleProbabilities.getDouble(j);
      }
    }

    offset += currentBatchSize; // add batchsize as offset
    boolean iteratorHasInstancesLeft = offset < insts.numInstances();
    next = it.hasNext() || iteratorHasInstancesLeft;
  }

  // Fix classes
  for (int i = 0; i < preds.length; i++) {
    if (preds[i].length > 1) {
      weka.core.Utils.normalize(preds[i]);
    } else {
      // Rescale numeric classes with the computed coefficients in the initialization phase
      preds[i][0] = preds[i][0] * x1 + x0;
    }
  }
  return preds;
}