Java Code Examples for weka.core.Instances#instance()

The following examples show how to use weka.core.Instances#instance() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataProcessing.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public static void testSimpleClassifier() throws Exception{
    String path="Z:\\Data\\MultivariateTSCProblems\\";
    for(int i=15;i<DatasetLists.mtscProblems2018.length;i++){
        String prob=DatasetLists.mtscProblems2018[i]; 
        System.out.println("PROBLEM "+prob);        
        Instances train =DatasetLoading.loadDataNullable(path+prob+"\\"+prob+"_TRAIN");
        Instances test =DatasetLoading.loadDataNullable(path+prob+"\\"+prob+"_TEST");
        System.out.println("Num train instances ="+train.numInstances());
        System.out.println("Num test instances ="+test.numInstances());
        System.out.println("num attributes (should be 2!)="+train.numAttributes());
        System.out.println("num classes="+train.numClasses());
        Instance temp=train.instance(0);
        Instances x= temp.relationalValue(0);
        System.out.println(" number of dimensions "+x.numInstances());
        System.out.println(" number of attributes per dimension "+x.numAttributes());
        NN_ED_I nb = new NN_ED_I();
        nb.buildClassifier(train);
        double a=ClassifierTools.accuracy(test, nb);
        System.out.println("Problem ="+prob+" 1-NN ED accuracy  ="+a);

    }
    
    
    
}
 
Example 2
Source File: Tools.java    From gsn with GNU General Public License v3.0 6 votes vote down vote up
/**
* pre-process the data be normalizing and removing unused attributes
* @param i
* @return
*/
public static Instances prepareInstances(Instances i){
	
	//select features to use
	i.setClassIndex(9);
	i.deleteAttributeAt(8);
	i.deleteAttributeAt(7);
	i.deleteAttributeAt(6);
	i.deleteAttributeAt(2);
	i.deleteAttributeAt(1);
	
	//scale the values
	for(int k=0;k<i.numInstances();k++){
		Instance j = i.instance(k);
		j.setValue(0, j.value(0)/1400.0);
		j.setValue(2, j.value(2)/50);
		j.setValue(3, j.value(3)/100.0);
		j.setValue(4, j.value(4)/100.0 - 4);			
	}
	
	return i;
}
 
Example 3
Source File: CitationKNN.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/** 
 * Updates the normalization of each attribute.
 * 
 * @param bag the exemplar to update the normalization for
 */
public void updateNormalization(Instance bag){
  int i, k;
  double min, max;
  Instances instances;
  Instance instance;
  // compute the min/max of each feature
  for (i = 0; i < m_TrainBags.attribute(1).relation().numAttributes(); i++) {
    min = m_Min[i] / m_MinNorm;
    max = m_Max[i] / m_MaxNorm;

    instances = bag.relationalValue(1);
    for (k=0;k<instances.numInstances();k++) {
      instance = instances.instance(k);
      if(instance.value(i) < min)
        min = instance.value(i);
      if(instance.value(i) > max)
        max = instance.value(i);
    }
    m_Min[i] = min * m_MinNorm;
    m_Max[i] = max * m_MaxNorm;
    m_Diffs[i]= max * m_MaxNorm - min * m_MinNorm;
  }
}
 
Example 4
Source File: MergeInfrequentNominalValues.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Processes the given data.
 *
 * @param instances   the data to process
 * @return            the modified data
 * @throws Exception  in case the processing goes wrong
 */
public Instances process(Instances instances) throws Exception {

  // Generate the output and return it
  Instances result = new Instances(getOutputFormat(), instances.numInstances());
  for (int i = 0; i < instances.numInstances(); i++) {
    Instance inst = instances.instance(i);
    double[] newData = new double[instances.numAttributes()];
    for (int j = 0; j < instances.numAttributes(); j++) {
      if (m_AttToBeModified[j] && !inst.isMissing(j)) {
        newData[j] = m_NewValues[j][(int)inst.value(j)];
      } else {
        newData[j] = inst.value(j);
      }
    }
    DenseInstance instNew = new DenseInstance(1.0, newData);
    instNew.setDataset(result);
    
    // copy possible strings, relational values...
    copyValues(instNew, false, inst.dataset(), getOutputFormat());

    // Add instance to output
    result.add(instNew);
  }
  return result;
}
 
Example 5
Source File: AbstractEnsemble.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
     * @return the predictions of each individual module, i.e [0] = first module's vote, [1] = second...
     */
    public double[] classifyInstanceByConstituents(Instance instance) throws Exception{
        Instance ins = instance;
        if(this.transform!=null){
            Instances rawContainer = new Instances(instance.dataset(),0);
            rawContainer.add(instance);
//            transform.setInputFormat(rawContainer);
//            Instances converted = Filter.useFilter(rawContainer,transform);


            Instances converted = transform.process(rawContainer);
            ins = converted.instance(0);
        }

        double[] predsByClassifier = new double[modules.length];

        for(int i=0;i<modules.length;i++)
            predsByClassifier[i] = modules[i].getClassifier().classifyInstance(ins);

        return predsByClassifier;
    }
 
Example 6
Source File: MultivariateProcessing.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**A getting started with relational attributes in Weka. Once you have the basics
         * there are a range of tools for manipulating them in
         * package utilities.multivariate_tools
         *
         * See https://weka.wikispaces.com/Multi-instance+classification
         * for more
         * */
        public static void gettingStarted(){
//Load a multivariate data set
            String path="\\\\cmptscsvr.cmp.uea.ac.uk\\ueatsc\\Data\\Multivariate\\univariateConcatExample";
            Instances train =DatasetLoading.loadData(path);
            System.out.println(" univariate data = "+train);
            path="\\\\cmptscsvr.cmp.uea.ac.uk\\ueatsc\\Data\\Multivariate\\multivariateConcatExample";
            train =DatasetLoading.loadData(path);
            System.out.println(" multivariate data = "+train);
//Recover the first instance
            Instance first=train.instance(0);
//Split into separate dimensions
            Instances split=first.relationalValue(0);
            System.out.println(" A single multivariate case split into 3 instances with no class values= "+split);
            for(Instance ins:split)
                System.out.println("Dimension of first case =" +ins);
//Extract as arrays
            double[][] d = new double[split.numInstances()][];
            for(int i=0;i<split.numInstances();i++)
                d[i]=split.instance(i).toDoubleArray();


        }
 
Example 7
Source File: CoverTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * Builds the tree on the given set of instances.
 * P.S.: For internal use only. Outside classes 
 * should call setInstances(). 
 * @param insts The instances on which to build 
 * the cover tree.
 * @throws Exception If the supplied set of 
 * Instances is empty, or if there are missing
 * values. 
 */
protected void buildCoverTree(Instances insts) throws Exception {
  if (insts.numInstances() == 0)
    throw new Exception(
 "CoverTree: Empty set of instances. Cannot build tree.");
  checkMissing(insts);
  if (m_EuclideanDistance == null)
    m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(insts);
  else
    m_EuclideanDistance.setInstances(insts);
  
  Stack<DistanceNode> point_set = new Stack<DistanceNode>();
  Stack<DistanceNode> consumed_set = new Stack<DistanceNode>();

  Instance point_p = insts.instance(0); int p_idx = 0;
  double max_dist=-1, dist=0.0; Instance max_q=point_p;
  
  for (int i = 1; i < insts.numInstances(); i++) {
    DistanceNode temp = new DistanceNode();
    temp.dist = new Stack<Double>();
    dist = Math.sqrt(m_DistanceFunction.distance(point_p, insts.instance(i), Double.POSITIVE_INFINITY));
    if(dist > max_dist) {
      max_dist = dist; max_q = insts.instance(i);
    }
    temp.dist.push(dist);
    temp.idx = i;
    point_set.push(temp);
  }
  
    max_dist = max_set(point_set);
    m_Root = batch_insert(p_idx, get_scale(max_dist), get_scale(max_dist),
                          point_set, consumed_set);
}
 
Example 8
Source File: NormalizeCase.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public void standardNorm(Instances r) throws Exception{
	double mean,sum,sumSq,stdev,x;
	int size=r.numAttributes();
	int classIndex=r.classIndex();
	if(classIndex>=0)
		size--;
	for(int i=0;i<r.numInstances();i++)
	{
		sum=sumSq=mean=stdev=0;
		for(int j=0;j<r.numAttributes();j++){
                           if(j!=classIndex && !r.attribute(j).isNominal()){// Ignore all nominal atts
                               x=r.instance(i).value(j);
                               sum+=x;
                               sumSq+=x*x;
                           }
                       }
                       stdev=(sumSq-sum*sum/size)/size;
                       mean=sum/size;
                       stdev=Math.sqrt(stdev);
                       if(stdev==0)
                           if (throwErrorOnZeroVariance)
                               throw new Exception("Cannot normalise a series with zero variance! Instance number ="+i+" mean ="+mean+" sum = "+sum+" sum sq = "+sumSq+" instance ="+r.instance(i));
                           else {
                               System.out.println("Warning: instance with zero variance found, leaving it alone. relation="+r.relationName()+" instInd="+i+" inst=\n"+r.get(i));
                               continue;
                           } 
                               
                       for(int j=0;j<r.numAttributes();j++){
                           if(j!=classIndex&& !r.attribute(j).isNominal()){
                                   x=r.instance(i).value(j);
                                   r.instance(i).setValue(j,(x-mean)/(stdev));
                           }
                       }
	}
	
}
 
Example 9
Source File: DataProcessing.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public static void makeSingleDimensionFiles(String path, String[] probs,boolean overwrite){
        for(String prob: probs){
            System.out.println("Processing "+prob);
            if(prob.equals("InsectWingbeat")||prob.equals("FaceDetection")|| prob.equals("DuckDuckGeese"))
                continue;
            File f= new File(path+prob+"\\"+prob+"Dimension"+(1)+"_TRAIN.arff");

            if(f.exists()&&!overwrite)
                continue;
            Instances train =DatasetLoading.loadDataNullable(path+prob+"\\"+prob+"_TRAIN");
            Instances test =DatasetLoading.loadDataNullable(path+prob+"\\"+prob+"_TEST");
            System.out.println("PROBLEM "+prob);        
            System.out.println("Num train instances ="+train.numInstances());
            System.out.println("Num test instances ="+test.numInstances());
            System.out.println("num attributes (should be 2!)="+train.numAttributes());
            System.out.println("num classes="+train.numClasses());
            Instance temp=train.instance(0);
            Instances x= temp.relationalValue(0);
            System.out.println(" number of dimensions "+x.numInstances());
            System.out.println(" number of attributes per dimension "+x.numAttributes());
            Instances[] splitTest=MultivariateInstanceTools.splitMultivariateInstances(test);
            Instances[] splitTrain=MultivariateInstanceTools.splitMultivariateInstances(train);
            System.out.println(" Num split files ="+splitTest.length);
            for(int i=0;i<splitTrain.length;i++){
                System.out.println("Number of test instances = "+splitTest[i].numInstances());
                OutFile outTrain=new OutFile(path+prob+"\\"+prob+"Dimension"+(i+1)+"_TRAIN.arff");
                outTrain.writeLine(splitTrain[i].toString()+"");
                OutFile outTest=new OutFile(path+prob+"\\"+prob+"Dimension"+(i+1)+"_TEST.arff");
                outTest.writeLine(splitTest[i].toString()+"");
            }
            
//            System.out.println(" Object type ="+x);

        }   
    }
 
Example 10
Source File: CitationKNN.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * Calculates the normalization of each attribute.
 */
public void preprocessData(){
  int i,j, k;
  double min, max;
  Instances instances;
  Instance instance;
  // compute the min/max of each feature

  for (i=0;i<m_Attributes.numAttributes();i++) {
    min=Double.POSITIVE_INFINITY ;
    max=Double.NEGATIVE_INFINITY ;
    for(j = 0; j < m_TrainBags.numInstances(); j++){
      instances = m_TrainBags.instance(j).relationalValue(1);
      for (k=0;k<instances.numInstances();k++) {
        instance = instances.instance(k);
        if(instance.value(i) < min)
          min= instance.value(i);
        if(instance.value(i) > max)
          max= instance.value(i);
      }
    }
    m_Min[i] = min * m_MinNorm;
    m_Max[i] = max * m_MaxNorm;
    m_Diffs[i]= max * m_MaxNorm - min * m_MinNorm;
  }	    

}
 
Example 11
Source File: Utils.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Appends the input Instances classes to the INDArray
 * @param result activations
 * @param input original Instances
 * @return activations with class value appended
 */
public static INDArray appendClasses(INDArray result, Instances input) {
  INDArray classes = Nd4j.zeros(result.shape()[0], 1);
  for (int i = 0; i < classes.length(); i++) {
    Instance inst = input.instance(i);
    classes.putScalar(i, inst.classValue());
  }
  return Nd4j.concat(1, result, classes);
}
 
Example 12
Source File: FastElasticEnsemble.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public double classifyInstance(final Instance instance, final int queryIndex, final SequenceStatsCache cache) throws Exception{
    if(classifiers==null){
        throw new Exception("Error: classifier not built");
    }
    Instance derIns = null;
    if(this.usesDer){
        Instances temp = new Instances(derTrain,1);
        temp.add(instance);
        temp = df.process(temp);
        derIns = temp.instance(0);
    }

    double bsfVote = -1;
    double[] classTotals = new double[train.numClasses()];
    ArrayList<Double> bsfClassVal = null;

    double pred;
    this.previousPredictions = new double[this.classifiers.length];

    for(int c = 0; c < classifiers.length; c++){
        if(isDerivative(classifiersToUse[c])){
            pred = classifiers[c].classifyInstance(derTrain, derIns, queryIndex, cache);
        }else{
            pred = classifiers[c].classifyInstance(train, instance, queryIndex, cache);
        }
        previousPredictions[c] = pred;

        try{
            classTotals[(int)pred] += cvAccs[c];
        }catch(Exception e){
            System.out.println("cv accs "+cvAccs.length);
            System.out.println(pred);
            throw e;
        }

        if(classTotals[(int)pred] > bsfVote){
            bsfClassVal = new ArrayList<>();
            bsfClassVal.add(pred);
            bsfVote = classTotals[(int)pred];
        }else if(classTotals[(int)pred] == bsfVote){
            bsfClassVal.add(pred);
        }
    }

    if(bsfClassVal.size()>1){
        return bsfClassVal.get(new Random(46).nextInt(bsfClassVal.size()));
    }
    return bsfClassVal.get(0);
}
 
Example 13
Source File: PLST.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
/**
    * Transforms the instance in the prediction process before given to the internal multi-label
    * or multi-target classifier. The instance is passed having the original set of labels, these
    * must be replaced with the transformed labels (attributes) so that the internla classifier
    * can predict them.
    *
    * @param x The instance to transform. Consists of features and labels.
    * @return The transformed instance. Consists of features and transformed labels.
    */
   @Override
   public Instance transformInstance(Instance x) throws Exception{
Instances tmpInst = new Instances(x.dataset());

tmpInst.delete();
tmpInst.add(x);

Instances features = this.extractPart(tmpInst, false);

Instances labels = new Instances(this.m_PatternInstances);

labels.add(new DenseInstance(labels.numAttributes()));

Instances result = Instances.mergeInstances(labels, features);

result.setClassIndex(labels.numAttributes());

return result.instance(0);
   }
 
Example 14
Source File: IBk.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Turn the list of nearest neighbors into a probability distribution.
  *
  * @param neighbours the list of nearest neighboring instances
  * @param distances the distances of the neighbors
  * @return the probability distribution
  * @throws Exception if computation goes wrong or has no class attribute
  */
 protected double [] makeDistribution(Instances neighbours, double[] distances)
   throws Exception {

   double total = 0, weight;
   double [] distribution = new double [m_NumClasses];
   
   // Set up a correction to the estimator
   if (m_ClassType == Attribute.NOMINAL) {
     for(int i = 0; i < m_NumClasses; i++) {
distribution[i] = 1.0 / Math.max(1,m_Train.numInstances());
     }
     total = (double)m_NumClasses / Math.max(1,m_Train.numInstances());
   }

   for(int i=0; i < neighbours.numInstances(); i++) {
     // Collect class counts
     Instance current = neighbours.instance(i);
     distances[i] = distances[i]*distances[i];
     distances[i] = Math.sqrt(distances[i]/m_NumAttributesUsed);
     switch (m_DistanceWeighting) {
       case WEIGHT_INVERSE:
         weight = 1.0 / (distances[i] + 0.001); // to avoid div by zero
         break;
       case WEIGHT_SIMILARITY:
         weight = 1.0 - distances[i];
         break;
       default:                                 // WEIGHT_NONE:
         weight = 1.0;
         break;
     }
     weight *= current.weight();
     try {
       switch (m_ClassType) {
         case Attribute.NOMINAL:
           distribution[(int)current.classValue()] += weight;
           break;
         case Attribute.NUMERIC:
           distribution[0] += current.classValue() * weight;
           break;
       }
     } catch (Exception ex) {
       throw new Error("Data has no class attribute!");
     }
     total += weight;      
   }

   // Normalise distribution
   if (total > 0) {
     Utils.normalize(distribution, total);
   }
   return distribution;
 }
 
Example 15
Source File: SimpleKMeansWithSilhouette.java    From apogen with Apache License 2.0 4 votes vote down vote up
/**
 * Initialize using the k-means++ method
 * 
 * @param data
 *            the training data
 * @throws Exception
 *             if a problem occurs
 */
protected void kMeansPlusPlusInit(Instances data) throws Exception {
	Random randomO = new Random(getSeed());
	HashMap<DecisionTableHashKey, String> initC = new HashMap<DecisionTableHashKey, String>();

	// choose initial center uniformly at random
	int index = randomO.nextInt(data.numInstances());
	m_ClusterCentroids.add(data.instance(index));
	DecisionTableHashKey hk = new DecisionTableHashKey(data.instance(index), data.numAttributes(), true);
	initC.put(hk, null);

	int iteration = 0;
	int remainingInstances = data.numInstances() - 1;
	if (m_NumClusters > 1) {
		// proceed with selecting the rest

		// distances to the initial randomly chose center
		double[] distances = new double[data.numInstances()];
		double[] cumProbs = new double[data.numInstances()];
		for (int i = 0; i < data.numInstances(); i++) {
			distances[i] = m_DistanceFunction.distance(data.instance(i), m_ClusterCentroids.instance(iteration));
		}

		// now choose the remaining cluster centers
		for (int i = 1; i < m_NumClusters; i++) {

			// distances converted to probabilities
			double[] weights = new double[data.numInstances()];
			System.arraycopy(distances, 0, weights, 0, distances.length);
			Utils.normalize(weights);

			double sumOfProbs = 0;
			for (int k = 0; k < data.numInstances(); k++) {
				sumOfProbs += weights[k];
				cumProbs[k] = sumOfProbs;
			}

			cumProbs[data.numInstances() - 1] = 1.0; // make sure there are no
														// rounding issues

			// choose a random instance
			double prob = randomO.nextDouble();
			for (int k = 0; k < cumProbs.length; k++) {
				if (prob < cumProbs[k]) {
					Instance candidateCenter = data.instance(k);
					hk = new DecisionTableHashKey(candidateCenter, data.numAttributes(), true);
					if (!initC.containsKey(hk)) {
						initC.put(hk, null);
						m_ClusterCentroids.add(candidateCenter);
					} else {
						// we shouldn't get here because any instance that is a duplicate
						// of
						// an already chosen cluster center should have zero distance (and
						// hence
						// zero probability of getting chosen) to that center.
						System.err.println("We shouldn't get here....");
					}
					remainingInstances--;
					break;
				}
			}
			iteration++;

			if (remainingInstances == 0) {
				break;
			}

			// prepare to choose the next cluster center.
			// check distances against the new cluster center to see if it is closer
			for (int k = 0; k < data.numInstances(); k++) {
				if (distances[k] > 0) {
					double newDist = m_DistanceFunction.distance(data.instance(k),
							m_ClusterCentroids.instance(iteration));
					if (newDist < distances[k]) {
						distances[k] = newDist;
					}
				}
			}
		}
	}
}
 
Example 16
Source File: NBTreeSplit.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Creates split on numeric attribute.
  *
  * @exception Exception if something goes wrong
  */
 private void handleNumericAttribute(Instances trainInstances)
      throws Exception {

   m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights, true);
   m_c45S.buildClassifier(trainInstances);
   if (m_c45S.numSubsets() == 0) {
     return;
   }
   m_errors = 0;

   Instances [] trainingSets = new Instances [m_complexityIndex];
   trainingSets[0] = new Instances(trainInstances, 0);
   trainingSets[1] = new Instances(trainInstances, 0);
   int subset = -1;
   
   // populate the subsets
   for (int i = 0; i < trainInstances.numInstances(); i++) {
     Instance instance = trainInstances.instance(i);
     subset = m_c45S.whichSubset(instance);
     if (subset != -1) {
trainingSets[subset].add((Instance)instance.copy());
     } else {
double [] weights = m_c45S.weights(instance);
for (int j = 0; j < m_complexityIndex; j++) {
  Instance temp = (Instance)instance.copy();
  if (weights.length == m_complexityIndex) {
    temp.setWeight(temp.weight() * weights[j]);
  } else {
    temp.setWeight(temp.weight() / m_complexityIndex);
  }
  trainingSets[j].add(temp); 
}
     }
   }
   
   /*    // compute weights (weights of instances per subset
   m_weights = new double [m_complexityIndex];
   for (int i = 0; i < m_complexityIndex; i++) {
     m_weights[i] = trainingSets[i].sumOfWeights();
   }
   Utils.normalize(m_weights); */

   Random r = new Random(1);
   int minNumCount = 0;
   for (int i = 0; i < m_complexityIndex; i++) {
     if (trainingSets[i].numInstances() > 5) {
minNumCount++;
// Discretize the sets
	Discretize disc = new Discretize();
disc.setInputFormat(trainingSets[i]);
trainingSets[i] = Filter.useFilter(trainingSets[i], disc);

trainingSets[i].randomize(r);
trainingSets[i].stratify(5);
NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable();
fullModel.buildClassifier(trainingSets[i]);

// add the errors for this branch of the split
m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r);
     } else {
for (int j = 0; j < trainingSets[i].numInstances(); j++) {
  m_errors += trainingSets[i].instance(j).weight();
}
     }
   }
   
   // Check if minimum number of Instances in at least two
   // subsets.
   if (minNumCount > 1) {
     m_numSubsets = m_complexityIndex;
   }
 }
 
Example 17
Source File: NaiveDTW.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
	
	warpingMatrix = new double[maxLength][maxLength];
	U = new double[maxLength];
	L = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[maxWindow+1][train.length];
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}
 
Example 18
Source File: SimpleCart.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Split data into two subsets and store sorted indices and weights for two
  * successor nodes.
  * 
  * @param subsetIndices 	sorted indecis of instances for each attribute 
  * 				for two successor node
  * @param subsetWeights 	weights of instances for each attribute for 
  * 				two successor node
  * @param att 		attribute the split based on
  * @param splitPoint 		split point the split based on if att is numeric
  * @param splitStr 		split subset the split based on if att is nominal
  * @param sortedIndices 	sorted indices of the instances to be split
  * @param weights 		weights of the instances to bes split
  * @param data 		training data
  * @throws Exception 		if something goes wrong  
  */
 protected void splitData(int[][][] subsetIndices, double[][][] subsetWeights,
     Attribute att, double splitPoint, String splitStr, int[][] sortedIndices,
     double[][] weights, Instances data) throws Exception {

   int j;
   // For each attribute
   for (int i = 0; i < data.numAttributes(); i++) {
     if (i==data.classIndex()) continue;
     int[] num = new int[2];
     for (int k = 0; k < 2; k++) {
subsetIndices[k][i] = new int[sortedIndices[i].length];
subsetWeights[k][i] = new double[weights[i].length];
     }

     for (j = 0; j < sortedIndices[i].length; j++) {
Instance inst = data.instance(sortedIndices[i][j]);
if (inst.isMissing(att)) {
  // Split instance up
  for (int k = 0; k < 2; k++) {
    if (m_Props[k] > 0) {
      subsetIndices[k][i][num[k]] = sortedIndices[i][j];
      subsetWeights[k][i][num[k]] = m_Props[k] * weights[i][j];
      num[k]++;
    }
  }
} else {
  int subset;
  if (att.isNumeric())  {
    subset = (inst.value(att) < splitPoint) ? 0 : 1;
  } else { // nominal attribute
    if (splitStr.indexOf
	("(" + att.value((int)inst.value(att.index()))+")")!=-1) {
      subset = 0;
    } else subset = 1;
  }
  subsetIndices[subset][i][num[subset]] = sortedIndices[i][j];
  subsetWeights[subset][i][num[subset]] = weights[i][j];
  num[subset]++;
}
     }

     // Trim arrays
     for (int k = 0; k < 2; k++) {
int[] copy = new int[num[k]];
System.arraycopy(subsetIndices[k][i], 0, copy, 0, num[k]);
subsetIndices[k][i] = copy;
double[] copyWeights = new double[num[k]];
System.arraycopy(subsetWeights[k][i], 0 ,copyWeights, 0, num[k]);
subsetWeights[k][i] = copyWeights;
     }
   }
 }
 
Example 19
Source File: UCRSuite.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
   	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
			
	warpingMatrix = new double[maxLength][maxLength];
	U = new double[maxLength];
	L = new double[maxLength];
	U1 = new double[maxLength];
	L1 = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[maxWindow+1][train.length];

	cache = new SequenceStatsCache(train, maxWindow);
	
	lazyUCR = new LazyAssessNNEarlyAbandon[train.length][train.length];
	
	for (int i = 0; i < train.length; i++) {
		for (int j  = 0; j < train.length; j++) {
			lazyUCR[i][j] = new LazyAssessNNEarlyAbandon(cache);
		}
	}
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}
 
Example 20
Source File: LbKeoghPrunedDTW.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
	warpingMatrix = new double[maxLength][maxLength];	
	U = new double[maxLength];
	L = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[train.length][train.length];
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}