weka.core.Instances#instance

Source File: DataProcessing.java From tsml with GNU General Public License v3.0

6 votes

public static void testSimpleClassifier() throws Exception{
    String path="Z:\\Data\\MultivariateTSCProblems\\";
    for(int i=15;i<DatasetLists.mtscProblems2018.length;i++){
        String prob=DatasetLists.mtscProblems2018[i]; 
        System.out.println("PROBLEM "+prob);        
        Instances train =DatasetLoading.loadDataNullable(path+prob+"\\"+prob+"_TRAIN");
        Instances test =DatasetLoading.loadDataNullable(path+prob+"\\"+prob+"_TEST");
        System.out.println("Num train instances ="+train.numInstances());
        System.out.println("Num test instances ="+test.numInstances());
        System.out.println("num attributes (should be 2!)="+train.numAttributes());
        System.out.println("num classes="+train.numClasses());
        Instance temp=train.instance(0);
        Instances x= temp.relationalValue(0);
        System.out.println(" number of dimensions "+x.numInstances());
        System.out.println(" number of attributes per dimension "+x.numAttributes());
        NN_ED_I nb = new NN_ED_I();
        nb.buildClassifier(train);
        double a=ClassifierTools.accuracy(test, nb);
        System.out.println("Problem ="+prob+" 1-NN ED accuracy  ="+a);

    }
    
    
    
}

Source File: Tools.java From gsn with GNU General Public License v3.0

6 votes

/**
* pre-process the data be normalizing and removing unused attributes
* @param i
* @return
*/
public static Instances prepareInstances(Instances i){
	
	//select features to use
	i.setClassIndex(9);
	i.deleteAttributeAt(8);
	i.deleteAttributeAt(7);
	i.deleteAttributeAt(6);
	i.deleteAttributeAt(2);
	i.deleteAttributeAt(1);
	
	//scale the values
	for(int k=0;k<i.numInstances();k++){
		Instance j = i.instance(k);
		j.setValue(0, j.value(0)/1400.0);
		j.setValue(2, j.value(2)/50);
		j.setValue(3, j.value(3)/100.0);
		j.setValue(4, j.value(4)/100.0 - 4);			
	}
	
	return i;
}

Source File: CitationKNN.java From tsml with GNU General Public License v3.0

6 votes

/** 
 * Updates the normalization of each attribute.
 * 
 * @param bag the exemplar to update the normalization for
 */
public void updateNormalization(Instance bag){
  int i, k;
  double min, max;
  Instances instances;
  Instance instance;
  // compute the min/max of each feature
  for (i = 0; i < m_TrainBags.attribute(1).relation().numAttributes(); i++) {
    min = m_Min[i] / m_MinNorm;
    max = m_Max[i] / m_MaxNorm;

    instances = bag.relationalValue(1);
    for (k=0;k<instances.numInstances();k++) {
      instance = instances.instance(k);
      if(instance.value(i) < min)
        min = instance.value(i);
      if(instance.value(i) > max)
        max = instance.value(i);
    }
    m_Min[i] = min * m_MinNorm;
    m_Max[i] = max * m_MaxNorm;
    m_Diffs[i]= max * m_MaxNorm - min * m_MinNorm;
  }
}

Source File: MergeInfrequentNominalValues.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Processes the given data.
 *
 * @param instances   the data to process
 * @return            the modified data
 * @throws Exception  in case the processing goes wrong
 */
public Instances process(Instances instances) throws Exception {

  // Generate the output and return it
  Instances result = new Instances(getOutputFormat(), instances.numInstances());
  for (int i = 0; i < instances.numInstances(); i++) {
    Instance inst = instances.instance(i);
    double[] newData = new double[instances.numAttributes()];
    for (int j = 0; j < instances.numAttributes(); j++) {
      if (m_AttToBeModified[j] && !inst.isMissing(j)) {
        newData[j] = m_NewValues[j][(int)inst.value(j)];
      } else {
        newData[j] = inst.value(j);
      }
    }
    DenseInstance instNew = new DenseInstance(1.0, newData);
    instNew.setDataset(result);
    
    // copy possible strings, relational values...
    copyValues(instNew, false, inst.dataset(), getOutputFormat());

    // Add instance to output
    result.add(instNew);
  }
  return result;
}

Source File: AbstractEnsemble.java From tsml with GNU General Public License v3.0

6 votes

/**
     * @return the predictions of each individual module, i.e [0] = first module's vote, [1] = second...
     */
    public double[] classifyInstanceByConstituents(Instance instance) throws Exception{
        Instance ins = instance;
        if(this.transform!=null){
            Instances rawContainer = new Instances(instance.dataset(),0);
            rawContainer.add(instance);
//            transform.setInputFormat(rawContainer);
//            Instances converted = Filter.useFilter(rawContainer,transform);


            Instances converted = transform.process(rawContainer);
            ins = converted.instance(0);
        }

        double[] predsByClassifier = new double[modules.length];

        for(int i=0;i<modules.length;i++)
            predsByClassifier[i] = modules[i].getClassifier().classifyInstance(ins);

        return predsByClassifier;
    }

Source File: MultivariateProcessing.java From tsml with GNU General Public License v3.0

6 votes

/**A getting started with relational attributes in Weka. Once you have the basics
         * there are a range of tools for manipulating them in
         * package utilities.multivariate_tools
         *
         * See https://weka.wikispaces.com/Multi-instance+classification
         * for more
         * */
        public static void gettingStarted(){
//Load a multivariate data set
            String path="\\\\cmptscsvr.cmp.uea.ac.uk\\ueatsc\\Data\\Multivariate\\univariateConcatExample";
            Instances train =DatasetLoading.loadData(path);
            System.out.println(" univariate data = "+train);
            path="\\\\cmptscsvr.cmp.uea.ac.uk\\ueatsc\\Data\\Multivariate\\multivariateConcatExample";
            train =DatasetLoading.loadData(path);
            System.out.println(" multivariate data = "+train);
//Recover the first instance
            Instance first=train.instance(0);
//Split into separate dimensions
            Instances split=first.relationalValue(0);
            System.out.println(" A single multivariate case split into 3 instances with no class values= "+split);
            for(Instance ins:split)
                System.out.println("Dimension of first case =" +ins);
//Extract as arrays
            double[][] d = new double[split.numInstances()][];
            for(int i=0;i<split.numInstances();i++)
                d[i]=split.instance(i).toDoubleArray();


        }

Source File: CoverTree.java From tsml with GNU General Public License v3.0

5 votes

/** 
 * Builds the tree on the given set of instances.
 * P.S.: For internal use only. Outside classes 
 * should call setInstances(). 
 * @param insts The instances on which to build 
 * the cover tree.
 * @throws Exception If the supplied set of 
 * Instances is empty, or if there are missing
 * values. 
 */
protected void buildCoverTree(Instances insts) throws Exception {
  if (insts.numInstances() == 0)
    throw new Exception(
 "CoverTree: Empty set of instances. Cannot build tree.");
  checkMissing(insts);
  if (m_EuclideanDistance == null)
    m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(insts);
  else
    m_EuclideanDistance.setInstances(insts);
  
  Stack<DistanceNode> point_set = new Stack<DistanceNode>();
  Stack<DistanceNode> consumed_set = new Stack<DistanceNode>();

  Instance point_p = insts.instance(0); int p_idx = 0;
  double max_dist=-1, dist=0.0; Instance max_q=point_p;
  
  for (int i = 1; i < insts.numInstances(); i++) {
    DistanceNode temp = new DistanceNode();
    temp.dist = new Stack<Double>();
    dist = Math.sqrt(m_DistanceFunction.distance(point_p, insts.instance(i), Double.POSITIVE_INFINITY));
    if(dist > max_dist) {
      max_dist = dist; max_q = insts.instance(i);
    }
    temp.dist.push(dist);
    temp.idx = i;
    point_set.push(temp);
  }
  
    max_dist = max_set(point_set);
    m_Root = batch_insert(p_idx, get_scale(max_dist), get_scale(max_dist),
                          point_set, consumed_set);
}

Source File: NormalizeCase.java From tsml with GNU General Public License v3.0

5 votes

public void standardNorm(Instances r) throws Exception{
	double mean,sum,sumSq,stdev,x;
	int size=r.numAttributes();
	int classIndex=r.classIndex();
	if(classIndex>=0)
		size--;
	for(int i=0;i<r.numInstances();i++)
	{
		sum=sumSq=mean=stdev=0;
		for(int j=0;j<r.numAttributes();j++){
                           if(j!=classIndex && !r.attribute(j).isNominal()){// Ignore all nominal atts
                               x=r.instance(i).value(j);
                               sum+=x;
                               sumSq+=x*x;
                           }
                       }
                       stdev=(sumSq-sum*sum/size)/size;
                       mean=sum/size;
                       stdev=Math.sqrt(stdev);
                       if(stdev==0)
                           if (throwErrorOnZeroVariance)
                               throw new Exception("Cannot normalise a series with zero variance! Instance number ="+i+" mean ="+mean+" sum = "+sum+" sum sq = "+sumSq+" instance ="+r.instance(i));
                           else {
                               System.out.println("Warning: instance with zero variance found, leaving it alone. relation="+r.relationName()+" instInd="+i+" inst=\n"+r.get(i));
                               continue;
                           } 
                               
                       for(int j=0;j<r.numAttributes();j++){
                           if(j!=classIndex&& !r.attribute(j).isNominal()){
                                   x=r.instance(i).value(j);
                                   r.instance(i).setValue(j,(x-mean)/(stdev));
                           }
                       }
	}
	
}

Source File: DataProcessing.java From tsml with GNU General Public License v3.0

5 votes

public static void makeSingleDimensionFiles(String path, String[] probs,boolean overwrite){
        for(String prob: probs){
            System.out.println("Processing "+prob);
            if(prob.equals("InsectWingbeat")||prob.equals("FaceDetection")|| prob.equals("DuckDuckGeese"))
                continue;
            File f= new File(path+prob+"\\"+prob+"Dimension"+(1)+"_TRAIN.arff");

            if(f.exists()&&!overwrite)
                continue;
            Instances train =DatasetLoading.loadDataNullable(path+prob+"\\"+prob+"_TRAIN");
            Instances test =DatasetLoading.loadDataNullable(path+prob+"\\"+prob+"_TEST");
            System.out.println("PROBLEM "+prob);        
            System.out.println("Num train instances ="+train.numInstances());
            System.out.println("Num test instances ="+test.numInstances());
            System.out.println("num attributes (should be 2!)="+train.numAttributes());
            System.out.println("num classes="+train.numClasses());
            Instance temp=train.instance(0);
            Instances x= temp.relationalValue(0);
            System.out.println(" number of dimensions "+x.numInstances());
            System.out.println(" number of attributes per dimension "+x.numAttributes());
            Instances[] splitTest=MultivariateInstanceTools.splitMultivariateInstances(test);
            Instances[] splitTrain=MultivariateInstanceTools.splitMultivariateInstances(train);
            System.out.println(" Num split files ="+splitTest.length);
            for(int i=0;i<splitTrain.length;i++){
                System.out.println("Number of test instances = "+splitTest[i].numInstances());
                OutFile outTrain=new OutFile(path+prob+"\\"+prob+"Dimension"+(i+1)+"_TRAIN.arff");
                outTrain.writeLine(splitTrain[i].toString()+"");
                OutFile outTest=new OutFile(path+prob+"\\"+prob+"Dimension"+(i+1)+"_TEST.arff");
                outTest.writeLine(splitTest[i].toString()+"");
            }
            
//            System.out.println(" Object type ="+x);

        }   
    }

Source File: CitationKNN.java From tsml with GNU General Public License v3.0

5 votes

/** 
 * Calculates the normalization of each attribute.
 */
public void preprocessData(){
  int i,j, k;
  double min, max;
  Instances instances;
  Instance instance;
  // compute the min/max of each feature

  for (i=0;i<m_Attributes.numAttributes();i++) {
    min=Double.POSITIVE_INFINITY ;
    max=Double.NEGATIVE_INFINITY ;
    for(j = 0; j < m_TrainBags.numInstances(); j++){
      instances = m_TrainBags.instance(j).relationalValue(1);
      for (k=0;k<instances.numInstances();k++) {
        instance = instances.instance(k);
        if(instance.value(i) < min)
          min= instance.value(i);
        if(instance.value(i) > max)
          max= instance.value(i);
      }
    }
    m_Min[i] = min * m_MinNorm;
    m_Max[i] = max * m_MaxNorm;
    m_Diffs[i]= max * m_MaxNorm - min * m_MinNorm;
  }	    

}

Source File: Utils.java From wekaDeeplearning4j with GNU General Public License v3.0

5 votes

/**
 * Appends the input Instances classes to the INDArray
 * @param result activations
 * @param input original Instances
 * @return activations with class value appended
 */
public static INDArray appendClasses(INDArray result, Instances input) {
  INDArray classes = Nd4j.zeros(result.shape()[0], 1);
  for (int i = 0; i < classes.length(); i++) {
    Instance inst = input.instance(i);
    classes.putScalar(i, inst.classValue());
  }
  return Nd4j.concat(1, result, classes);
}

Source File: FastElasticEnsemble.java From tsml with GNU General Public License v3.0

4 votes

public double classifyInstance(final Instance instance, final int queryIndex, final SequenceStatsCache cache) throws Exception{
    if(classifiers==null){
        throw new Exception("Error: classifier not built");
    }
    Instance derIns = null;
    if(this.usesDer){
        Instances temp = new Instances(derTrain,1);
        temp.add(instance);
        temp = df.process(temp);
        derIns = temp.instance(0);
    }

    double bsfVote = -1;
    double[] classTotals = new double[train.numClasses()];
    ArrayList<Double> bsfClassVal = null;

    double pred;
    this.previousPredictions = new double[this.classifiers.length];

    for(int c = 0; c < classifiers.length; c++){
        if(isDerivative(classifiersToUse[c])){
            pred = classifiers[c].classifyInstance(derTrain, derIns, queryIndex, cache);
        }else{
            pred = classifiers[c].classifyInstance(train, instance, queryIndex, cache);
        }
        previousPredictions[c] = pred;

        try{
            classTotals[(int)pred] += cvAccs[c];
        }catch(Exception e){
            System.out.println("cv accs "+cvAccs.length);
            System.out.println(pred);
            throw e;
        }

        if(classTotals[(int)pred] > bsfVote){
            bsfClassVal = new ArrayList<>();
            bsfClassVal.add(pred);
            bsfVote = classTotals[(int)pred];
        }else if(classTotals[(int)pred] == bsfVote){
            bsfClassVal.add(pred);
        }
    }

    if(bsfClassVal.size()>1){
        return bsfClassVal.get(new Random(46).nextInt(bsfClassVal.size()));
    }
    return bsfClassVal.get(0);
}

Source File: PLST.java From meka with GNU General Public License v3.0

4 votes

/**
    * Transforms the instance in the prediction process before given to the internal multi-label
    * or multi-target classifier. The instance is passed having the original set of labels, these
    * must be replaced with the transformed labels (attributes) so that the internla classifier
    * can predict them.
    *
    * @param x The instance to transform. Consists of features and labels.
    * @return The transformed instance. Consists of features and transformed labels.
    */
   @Override
   public Instance transformInstance(Instance x) throws Exception{
Instances tmpInst = new Instances(x.dataset());

tmpInst.delete();
tmpInst.add(x);

Instances features = this.extractPart(tmpInst, false);

Instances labels = new Instances(this.m_PatternInstances);

labels.add(new DenseInstance(labels.numAttributes()));

Instances result = Instances.mergeInstances(labels, features);

result.setClassIndex(labels.numAttributes());

return result.instance(0);
   }

Source File: IBk.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Turn the list of nearest neighbors into a probability distribution.
  *
  * @param neighbours the list of nearest neighboring instances
  * @param distances the distances of the neighbors
  * @return the probability distribution
  * @throws Exception if computation goes wrong or has no class attribute
  */
 protected double [] makeDistribution(Instances neighbours, double[] distances)
   throws Exception {

   double total = 0, weight;
   double [] distribution = new double [m_NumClasses];
   
   // Set up a correction to the estimator
   if (m_ClassType == Attribute.NOMINAL) {
     for(int i = 0; i < m_NumClasses; i++) {
distribution[i] = 1.0 / Math.max(1,m_Train.numInstances());
     }
     total = (double)m_NumClasses / Math.max(1,m_Train.numInstances());
   }

   for(int i=0; i < neighbours.numInstances(); i++) {
     // Collect class counts
     Instance current = neighbours.instance(i);
     distances[i] = distances[i]*distances[i];
     distances[i] = Math.sqrt(distances[i]/m_NumAttributesUsed);
     switch (m_DistanceWeighting) {
       case WEIGHT_INVERSE:
         weight = 1.0 / (distances[i] + 0.001); // to avoid div by zero
         break;
       case WEIGHT_SIMILARITY:
         weight = 1.0 - distances[i];
         break;
       default:                                 // WEIGHT_NONE:
         weight = 1.0;
         break;
     }
     weight *= current.weight();
     try {
       switch (m_ClassType) {
         case Attribute.NOMINAL:
           distribution[(int)current.classValue()] += weight;
           break;
         case Attribute.NUMERIC:
           distribution[0] += current.classValue() * weight;
           break;
       }
     } catch (Exception ex) {
       throw new Error("Data has no class attribute!");
     }
     total += weight;      
   }

   // Normalise distribution
   if (total > 0) {
     Utils.normalize(distribution, total);
   }
   return distribution;
 }

Source File: SimpleKMeansWithSilhouette.java From apogen with Apache License 2.0

4 votes

/**
 * Initialize using the k-means++ method
 * 
 * @param data
 *            the training data
 * @throws Exception
 *             if a problem occurs
 */
protected void kMeansPlusPlusInit(Instances data) throws Exception {
	Random randomO = new Random(getSeed());
	HashMap<DecisionTableHashKey, String> initC = new HashMap<DecisionTableHashKey, String>();

	// choose initial center uniformly at random
	int index = randomO.nextInt(data.numInstances());
	m_ClusterCentroids.add(data.instance(index));
	DecisionTableHashKey hk = new DecisionTableHashKey(data.instance(index), data.numAttributes(), true);
	initC.put(hk, null);

	int iteration = 0;
	int remainingInstances = data.numInstances() - 1;
	if (m_NumClusters > 1) {
		// proceed with selecting the rest

		// distances to the initial randomly chose center
		double[] distances = new double[data.numInstances()];
		double[] cumProbs = new double[data.numInstances()];
		for (int i = 0; i < data.numInstances(); i++) {
			distances[i] = m_DistanceFunction.distance(data.instance(i), m_ClusterCentroids.instance(iteration));
		}

		// now choose the remaining cluster centers
		for (int i = 1; i < m_NumClusters; i++) {

			// distances converted to probabilities
			double[] weights = new double[data.numInstances()];
			System.arraycopy(distances, 0, weights, 0, distances.length);
			Utils.normalize(weights);

			double sumOfProbs = 0;
			for (int k = 0; k < data.numInstances(); k++) {
				sumOfProbs += weights[k];
				cumProbs[k] = sumOfProbs;
			}

			cumProbs[data.numInstances() - 1] = 1.0; // make sure there are no
														// rounding issues

			// choose a random instance
			double prob = randomO.nextDouble();
			for (int k = 0; k < cumProbs.length; k++) {
				if (prob < cumProbs[k]) {
					Instance candidateCenter = data.instance(k);
					hk = new DecisionTableHashKey(candidateCenter, data.numAttributes(), true);
					if (!initC.containsKey(hk)) {
						initC.put(hk, null);
						m_ClusterCentroids.add(candidateCenter);
					} else {
						// we shouldn't get here because any instance that is a duplicate
						// of
						// an already chosen cluster center should have zero distance (and
						// hence
						// zero probability of getting chosen) to that center.
						System.err.println("We shouldn't get here....");
					}
					remainingInstances--;
					break;
				}
			}
			iteration++;

			if (remainingInstances == 0) {
				break;
			}

			// prepare to choose the next cluster center.
			// check distances against the new cluster center to see if it is closer
			for (int k = 0; k < data.numInstances(); k++) {
				if (distances[k] > 0) {
					double newDist = m_DistanceFunction.distance(data.instance(k),
							m_ClusterCentroids.instance(iteration));
					if (newDist < distances[k]) {
						distances[k] = newDist;
					}
				}
			}
		}
	}
}

Source File: NBTreeSplit.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Creates split on numeric attribute.
  *
  * @exception Exception if something goes wrong
  */
 private void handleNumericAttribute(Instances trainInstances)
      throws Exception {

   m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights, true);
   m_c45S.buildClassifier(trainInstances);
   if (m_c45S.numSubsets() == 0) {
     return;
   }
   m_errors = 0;

   Instances [] trainingSets = new Instances [m_complexityIndex];
   trainingSets[0] = new Instances(trainInstances, 0);
   trainingSets[1] = new Instances(trainInstances, 0);
   int subset = -1;
   
   // populate the subsets
   for (int i = 0; i < trainInstances.numInstances(); i++) {
     Instance instance = trainInstances.instance(i);
     subset = m_c45S.whichSubset(instance);
     if (subset != -1) {
trainingSets[subset].add((Instance)instance.copy());
     } else {
double [] weights = m_c45S.weights(instance);
for (int j = 0; j < m_complexityIndex; j++) {
  Instance temp = (Instance)instance.copy();
  if (weights.length == m_complexityIndex) {
    temp.setWeight(temp.weight() * weights[j]);
  } else {
    temp.setWeight(temp.weight() / m_complexityIndex);
  }
  trainingSets[j].add(temp); 
}
     }
   }
   
   /*    // compute weights (weights of instances per subset
   m_weights = new double [m_complexityIndex];
   for (int i = 0; i < m_complexityIndex; i++) {
     m_weights[i] = trainingSets[i].sumOfWeights();
   }
   Utils.normalize(m_weights); */

   Random r = new Random(1);
   int minNumCount = 0;
   for (int i = 0; i < m_complexityIndex; i++) {
     if (trainingSets[i].numInstances() > 5) {
minNumCount++;
// Discretize the sets
	Discretize disc = new Discretize();
disc.setInputFormat(trainingSets[i]);
trainingSets[i] = Filter.useFilter(trainingSets[i], disc);

trainingSets[i].randomize(r);
trainingSets[i].stratify(5);
NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable();
fullModel.buildClassifier(trainingSets[i]);

// add the errors for this branch of the split
m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r);
     } else {
for (int j = 0; j < trainingSets[i].numInstances(); j++) {
  m_errors += trainingSets[i].instance(j).weight();
}
     }
   }
   
   // Check if minimum number of Instances in at least two
   // subsets.
   if (minNumCount > 1) {
     m_numSubsets = m_complexityIndex;
   }
 }

Source File: NaiveDTW.java From tsml with GNU General Public License v3.0

4 votes

@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
	
	warpingMatrix = new double[maxLength][maxLength];
	U = new double[maxLength];
	L = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[maxWindow+1][train.length];
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}

Source File: SimpleCart.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Split data into two subsets and store sorted indices and weights for two
  * successor nodes.
  * 
  * @param subsetIndices 	sorted indecis of instances for each attribute 
  * 				for two successor node
  * @param subsetWeights 	weights of instances for each attribute for 
  * 				two successor node
  * @param att 		attribute the split based on
  * @param splitPoint 		split point the split based on if att is numeric
  * @param splitStr 		split subset the split based on if att is nominal
  * @param sortedIndices 	sorted indices of the instances to be split
  * @param weights 		weights of the instances to bes split
  * @param data 		training data
  * @throws Exception 		if something goes wrong  
  */
 protected void splitData(int[][][] subsetIndices, double[][][] subsetWeights,
     Attribute att, double splitPoint, String splitStr, int[][] sortedIndices,
     double[][] weights, Instances data) throws Exception {

   int j;
   // For each attribute
   for (int i = 0; i < data.numAttributes(); i++) {
     if (i==data.classIndex()) continue;
     int[] num = new int[2];
     for (int k = 0; k < 2; k++) {
subsetIndices[k][i] = new int[sortedIndices[i].length];
subsetWeights[k][i] = new double[weights[i].length];
     }

     for (j = 0; j < sortedIndices[i].length; j++) {
Instance inst = data.instance(sortedIndices[i][j]);
if (inst.isMissing(att)) {
  // Split instance up
  for (int k = 0; k < 2; k++) {
    if (m_Props[k] > 0) {
      subsetIndices[k][i][num[k]] = sortedIndices[i][j];
      subsetWeights[k][i][num[k]] = m_Props[k] * weights[i][j];
      num[k]++;
    }
  }
} else {
  int subset;
  if (att.isNumeric())  {
    subset = (inst.value(att) < splitPoint) ? 0 : 1;
  } else { // nominal attribute
    if (splitStr.indexOf
	("(" + att.value((int)inst.value(att.index()))+")")!=-1) {
      subset = 0;
    } else subset = 1;
  }
  subsetIndices[subset][i][num[subset]] = sortedIndices[i][j];
  subsetWeights[subset][i][num[subset]] = weights[i][j];
  num[subset]++;
}
     }

     // Trim arrays
     for (int k = 0; k < 2; k++) {
int[] copy = new int[num[k]];
System.arraycopy(subsetIndices[k][i], 0, copy, 0, num[k]);
subsetIndices[k][i] = copy;
double[] copyWeights = new double[num[k]];
System.arraycopy(subsetWeights[k][i], 0 ,copyWeights, 0, num[k]);
subsetWeights[k][i] = copyWeights;
     }
   }
 }

Source File: UCRSuite.java From tsml with GNU General Public License v3.0

4 votes

@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
   	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
			
	warpingMatrix = new double[maxLength][maxLength];
	U = new double[maxLength];
	L = new double[maxLength];
	U1 = new double[maxLength];
	L1 = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[maxWindow+1][train.length];

	cache = new SequenceStatsCache(train, maxWindow);
	
	lazyUCR = new LazyAssessNNEarlyAbandon[train.length][train.length];
	
	for (int i = 0; i < train.length; i++) {
		for (int j  = 0; j < train.length; j++) {
			lazyUCR[i][j] = new LazyAssessNNEarlyAbandon(cache);
		}
	}
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}

Source File: LbKeoghPrunedDTW.java From tsml with GNU General Public License v3.0

4 votes

@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
	warpingMatrix = new double[maxLength][maxLength];	
	U = new double[maxLength];
	L = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[train.length][train.length];
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}

Java Code Examples for weka.core.Instances#instance()