Java Code Examples for weka.core.Instances#classAttribute()

The following examples show how to use weka.core.Instances#classAttribute() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Returns true if there is at least one nominal attribute in the given dataset that has more than 2 values.
 *
 * @param wekaInstances
 *            dataset that is checked
 * @param ignoreClassAttribute
 *            if true class attribute is ignored.
 */
public static boolean needsBinarization(final Instances wekaInstances, final boolean ignoreClassAttribute) {
	Attribute classAttribute = wekaInstances.classAttribute();
	if (!ignoreClassAttribute && classAttribute.isNominal() && classAttribute.numValues() >= 3) {
		return true;
	}
	// iterate over every attribute and check.
	for (Enumeration<Attribute> attributeEnum = wekaInstances.enumerateAttributes(); attributeEnum.hasMoreElements();) {
		Attribute currentAttr = attributeEnum.nextElement();
		if (!currentAttr.isNominal()) {
			continue; // ignore attributes that aren't nominal.
		}
		if (currentAttr == classAttribute) {
			// ignore class attribute (already checked in case ignoreClassAttribute==true):
			continue;
		}
		if (currentAttr.numValues() >= 3) {
			return true;
		}
	}
	return false;
}
 
Example 2
Source File: CaRuleGeneration.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Converts the header info of the given set of instances into a set 
  * of item sets (singletons). The ordering of values in the header file 
  * determines the lexicographic order.
  *
  * @param instances the set of instances whose header info is to be used
  * @return a set of item sets, each containing a single item
  * @exception Exception if singletons can't be generated successfully
  */
 public static FastVector singletons(Instances instances) throws Exception {

   FastVector setOfItemSets = new FastVector();
   ItemSet current;

   if(instances.classIndex() == -1)
     throw new UnassignedClassException("Class index is negative (not set)!");
   Attribute att = instances.classAttribute();
   for (int i = 0; i < instances.numAttributes(); i++) {
     if (instances.attribute(i).isNumeric())
throw new Exception("Can't handle numeric attributes!");
     if(i != instances.classIndex()){
for (int j = 0; j < instances.attribute(i).numValues(); j++) {
  current = new ItemSet(instances.numInstances());
  int[] currentItems = new int[instances.numAttributes()];
  for (int k = 0; k < instances.numAttributes(); k++)
    currentItems[k] = -1;
  currentItems[i] = j;
  current.setItem(currentItems);
  setOfItemSets.addElement(current);
}
     }
   }
   return setOfItemSets;
 }
 
Example 3
Source File: PerformanceKnowledgeBase.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public Instance getInstanceForIndividualCI(final String benchmarkName, final ComponentInstance ci, final double score) {
	Instances instancesInd = this.performanceInstancesIndividualComponents.get(benchmarkName).get(ci.getComponent().getName());
	DenseInstance instanceInd = new DenseInstance(instancesInd.numAttributes());
	for (int i = 0; i < instancesInd.numAttributes() - 1; i++) {
		Attribute attr = instancesInd.attribute(i);
		String attrFQN = attr.name();
		String attrName = attrFQN.substring(attrFQN.indexOf("::") + 2);
		Parameter param = ci.getComponent().getParameterWithName(attrName);
		String value;
		if (ci.getParametersThatHaveBeenSetExplicitly().contains(param)) {
			value = ci.getParameterValues().get(param.getName());
		} else {
			value = param.getDefaultValue().toString();
		}
		if (value != null) {
			if (param.isCategorical()) {
				boolean attrContainsValue = false;
				Enumeration<Object> possibleValues = attr.enumerateValues();
				while (possibleValues.hasMoreElements() && !attrContainsValue) {
					Object o = possibleValues.nextElement();
					if (o.equals(value)) {
						attrContainsValue = true;
					}
				}
				if (attrContainsValue) {
					instanceInd.setValue(attr, value);
				}
			} else if (param.isNumeric()) {
				double finalValue = Double.parseDouble(value);
				instanceInd.setValue(attr, finalValue);
			}
		}
	}
	Attribute scoreAttrInd = instancesInd.classAttribute();
	instanceInd.setValue(scoreAttrInd, score);
	return instanceInd;
}
 
Example 4
Source File: PLSNominalClassifier.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
    Instances train = new Instances(data);
    
    numClasses = train.numClasses();
    classind = train.classIndex();
    classAttribute = train.classAttribute();
    
    FastVector<Attribute> atts = new FastVector<>(train.numAttributes());
    for (int i = 0; i < train.numAttributes(); i++) {
        if (i != classind)
            atts.add(train.attribute(i));
        else {
            //class attribute
            Attribute numericClassAtt = new Attribute(train.attribute(i).name());
            atts.add(numericClassAtt);
        }
    }
    
    Instances temp = new Instances(train.relationName(), atts, train.numInstances());
    temp.setClassIndex(classind);
    
    for (int i = 0; i < train.numInstances(); i++) {
        temp.add(new DenseInstance(1.0, train.instance(i).toDoubleArray()));
        temp.instance(i).setClassValue(train.instance(i).classValue());
    }
    
    train = temp;
    
    //datset is in the proper format, now do the model fitting as normal
    super.buildClassifier(train);
}
 
Example 5
Source File: ClassOrder.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Sets the format of the input instances.
 *
 * @param instanceInfo an Instances object containing the input instance
 * structure (any instances contained in the object are ignored - only the
 * structure is required).
 * @return true if the outputFormat may be collected immediately
 * @throws Exception if no class index set or class not nominal
 */
public boolean setInputFormat(Instances instanceInfo) throws Exception {     

  super.setInputFormat(new Instances(instanceInfo, 0));	

  m_ClassAttribute = instanceInfo.classAttribute();	
  m_Random = new Random(m_Seed);
  m_Converter = null;
  
  int numClasses = instanceInfo.numClasses();
  m_ClassCounts = new double[numClasses];	
  return false;
}
 
Example 6
Source File: OneR.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Constructor for numeric attribute.
 * 
 * @param data the data to work with
 * @param attribute the attribute to use
 * @param nBreaks the break point
 * @throws Exception if something goes wrong
 */
public OneRRule(Instances data, Attribute attribute, int nBreaks) throws Exception {

  m_class = data.classAttribute();
  m_numInst = data.numInstances();
  m_attr = attribute;
  m_correct = 0;
  m_classifications = new int[nBreaks];
  m_breakpoints = new double[nBreaks - 1]; // last breakpoint is infinity
}
 
Example 7
Source File: OneR.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Constructor for nominal attribute.
 * 
 * @param data the data to work with
 * @param attribute the attribute to use
 * @throws Exception if something goes wrong
 */
public OneRRule(Instances data, Attribute attribute) throws Exception {

  m_class = data.classAttribute();
  m_numInst = data.numInstances();
  m_attr = attribute;
  m_correct = 0;
  m_classifications = new int[m_attr.numValues()];
}
 
Example 8
Source File: SimpleCart.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Make the node leaf node.
 * 
 * @param data 	trainging data
 */
protected void makeLeaf(Instances data) {
  m_Attribute = null;
  m_isLeaf = true;
  m_ClassValue=Utils.maxIndex(m_ClassProbs);
  m_ClassAttribute = data.classAttribute();
}
 
Example 9
Source File: Ridor.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Builds a single rule learner with REP dealing with 2 classes.
    * This rule learner always tries to predict the class with label 
    * m_Class.
    *
    * @param instances the training data
    * @throws Exception if classifier can't be built successfully
    */
   public void buildClassifier(Instances instances) throws Exception {
     m_ClassAttribute = instances.classAttribute();
     if (!m_ClassAttribute.isNominal()) 
throw new UnsupportedClassTypeException(" Only nominal class, please.");
     if(instances.numClasses() != 2)
throw new Exception(" Only 2 classes, please.");
    
     Instances data = new Instances(instances);
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" No training data.");
    
     data.deleteWithMissingClass();
     if(Utils.eq(data.sumOfWeights(),0))
throw new Exception(" The class labels of all the training data are missing.");	
    
     if(data.numInstances() < m_Folds)
throw new Exception(" Not enough data for REP.");
    
     m_Antds = new FastVector();	
    
     /* Split data into Grow and Prune*/
     m_Random = new Random(m_Seed);
     data.randomize(m_Random);
     data.stratify(m_Folds);
     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);
    
     grow(growData);      // Build this rule
    
     prune(pruneData);    // Prune this rule
   }
 
Example 10
Source File: BFTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Make the node leaf node.
 * 
 * @param data 	training data
 */
protected void makeLeaf(Instances data) {
  m_Attribute = null;
  m_isLeaf = true;
  m_ClassValue=Utils.maxIndex(m_ClassProbs);
  m_ClassAttribute = data.classAttribute();
}
 
Example 11
Source File: Id3.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Method for building an Id3 tree.
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

  // Check if no instances have reached this node.
  if (data.numInstances() == 0) {
    m_Attribute = null;
    m_ClassValue = Utils.missingValue();
    m_Distribution = new double[data.numClasses()];
    return;
  }

  // Compute attribute with maximum information gain.
  double[] infoGains = new double[data.numAttributes()];
  Enumeration attEnum = data.enumerateAttributes();
  while (attEnum.hasMoreElements()) {
    Attribute att = (Attribute) attEnum.nextElement();
    infoGains[att.index()] = computeInfoGain(data, att);
  }
  m_Attribute = data.attribute(Utils.maxIndex(infoGains));
  
  // Make leaf if information gain is zero. 
  // Otherwise create successors.
  if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
    m_Attribute = null;
    m_Distribution = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
      m_Distribution[(int) inst.classValue()]++;
    }
    Utils.normalize(m_Distribution);
    m_ClassValue = Utils.maxIndex(m_Distribution);
    m_ClassAttribute = data.classAttribute();
  } else {
    Instances[] splitData = splitData(data, m_Attribute);
    m_Successors = new Id3[m_Attribute.numValues()];
    for (int j = 0; j < m_Attribute.numValues(); j++) {
      m_Successors[j] = new Id3();
      m_Successors[j].makeTree(splitData[j]);
    }
  }
}
 
Example 12
Source File: ConjunctiveRule.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Builds a single rule learner with REP dealing with nominal classes or
  * numeric classes.
  * For nominal classes, this rule learner predicts a distribution on
  * the classes.
  * For numeric classes, this learner predicts a single value.
  *
  * @param instances the training data
  * @throws Exception if classifier can't be built successfully
  */
 public void buildClassifier(Instances instances) throws Exception {
   // can classifier handle the data?
   getCapabilities().testWithFail(instances);

   // remove instances with missing class
   Instances data = new Instances(instances);
   data.deleteWithMissingClass();
   
   if(data.numInstances() < m_Folds)
     throw new Exception("Not enough data for REP.");

   m_ClassAttribute = data.classAttribute();
   if(m_ClassAttribute.isNominal())
     m_NumClasses = m_ClassAttribute.numValues();
   else
     m_NumClasses = 1;

   m_Antds = new FastVector();
   m_DefDstr = new double[m_NumClasses];
   m_Cnsqt = new double[m_NumClasses];
   m_Targets = new FastVector();	    
   m_Random = new Random(m_Seed);
   
   if(m_NumAntds != -1){
     grow(data);
   }
   else{

     data.randomize(m_Random);

     // Split data into Grow and Prune	   
     data.stratify(m_Folds);

     Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
     Instances pruneData=data.testCV(m_Folds, m_Folds-1);

     grow(growData);      // Build this rule  
     prune(pruneData);    // Prune this rule		  	  
   }

   if(m_ClassAttribute.isNominal()){			   
     Utils.normalize(m_Cnsqt);
     if(Utils.gr(Utils.sum(m_DefDstr), 0))
Utils.normalize(m_DefDstr);
   }	
 }
 
Example 13
Source File: LbKeoghPrunedDTW.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
	warpingMatrix = new double[maxLength][maxLength];	
	U = new double[maxLength];
	L = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[train.length][train.length];
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}
 
Example 14
Source File: DataStore.java    From AIDR with GNU Affero General Public License v3.0 4 votes vote down vote up
static Instances createFormattedInstances(Instances headerSet,
		ArrayList<String[]> wordVectors, ArrayList<String> labels)
				throws Exception {

	if (wordVectors.size() != labels.size()) {
		throw new Exception();
	}

	// Build a dictionary based on words in the documents, and transform
	// documents into word vectors
	HashSet<String> uniqueWords = new HashSet<String>();
	for (String[] words : wordVectors) {
		uniqueWords.addAll(Arrays.asList(words));
	}

	// Create the dataset
	Instances instances = new Instances(headerSet, wordVectors.size());
	double[] missingVal = new double[headerSet.numAttributes()];

	// Set class index
	instances.setClassIndex(headerSet.numAttributes() - 1);
	Attribute classAttribute = instances.classAttribute();

	// Get valid class labels
	HashSet<String> classValues = new HashSet<String>();
	Enumeration<?> classEnum = classAttribute.enumerateValues();
	while (classEnum.hasMoreElements()) {
		classValues.add((String) classEnum.nextElement());
	}

	// Add each document as an instance
	for (int i = 0; i < wordVectors.size(); i++) {

		if (!classValues.contains(labels.get(i))) {
			logger.error("New class label found in evaluation set. Discarding value.");
			continue;
			/*
			 * TODO: Handle unseen labels in a better way, as this will
			 * over-estimate classification performance. Adding new values
			 * to class attributes requires recreation of the header and
			 * copying of all data to a new Instances. See:
			 * http://comments.gmane.org/gmane.comp.ai.weka/7806
			 */
		}

		Instance item = new DenseInstance(instances.numAttributes());
		item.setDataset(instances);
		// Words
		for (String word : wordVectors.get(i)) {
			Attribute attribute = instances.attribute(word);
			if (attribute != null) {
				item.setValue(attribute, 1);
			}
		}

		item.setValue(classAttribute, labels.get(i));
		item.replaceMissingValues(missingVal);
		instances.add(item);
	}

	return instances;
}
 
Example 15
Source File: NaiveDTW.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
	
	warpingMatrix = new double[maxLength][maxLength];
	U = new double[maxLength];
	L = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[maxWindow+1][train.length];
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}
 
Example 16
Source File: WindowSearcher.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * This is similar to buildClassifier but it is an estimate.
 * This is used for large dataset where it takes very long to run.
 * The main purpose of this is to get the run time and not actually search for the best window.
 * We use this to draw Figure 1 of our SDM18 paper
 *
 * @param data
 * @param estimate
 * @throws Exception
 */
public void buildClassifierEstimate(Instances data, int estimate) throws Exception {
    // Initialise training dataset
    Attribute classAttribute = data.classAttribute();

    classedData = new HashMap <>();
    classedDataIndices = new HashMap <>();
    for (int c = 0; c < data.numClasses(); c++) {
        classedData.put(data.classAttribute().value(c), new ArrayList <SymbolicSequence>());
        classedDataIndices.put(data.classAttribute().value(c), new ArrayList <Integer>());
    }

    train = new SymbolicSequence[data.numInstances()];
    classMap = new String[train.length];
    maxLength = 0;
    for (int i = 0; i < train.length; i++) {
        Instance sample = data.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        maxLength = Math.max(maxLength, sequence.length);
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }
        train[i] = new SymbolicSequence(sequence);
        String clas = sample.stringValue(classAttribute);
        classMap[i] = clas;
        classedData.get(clas).add(train[i]);
        classedDataIndices.get(clas).add(i);
    }

    warpingMatrix = new double[maxLength][maxLength];
    U = new double[maxLength];
    L = new double[maxLength];

    maxWindow = Math.round(1 * maxLength);
    searchResults = new String[maxWindow + 1];
    nns = new int[maxWindow + 1][train.length];
    dist = new double[maxWindow + 1][train.length];

    int[] nErrors = new int[maxWindow + 1];
    double[] score = new double[maxWindow + 1];
    double bestScore = Double.MAX_VALUE;
    double minD;
    bestWarpingWindow = -1;

    // Start searching for the best window.
    // Only loop through a given size of the dataset, but still search for NN from the whole train
    // for every sequence in train, we find NN for all window
    // then in the end, update the best score
    for (int i = 0; i < estimate; i++) {
        SymbolicSequence testSeq = train[i];

        for (int w = 0; w <= maxWindow; w++) {
            testSeq.LB_KeoghFillUL(w, U, L);

            minD = Double.MAX_VALUE;
            String classValue = null;
            for (int j = 0; j < train.length; j++) {
                if (i == j)
                    continue;
                SymbolicSequence trainSeq = train[j];
                if (SymbolicSequence.LB_KeoghPreFilled(trainSeq, U, L) < minD) {
                    double tmpD = testSeq.DTW(trainSeq, w, warpingMatrix);
                    if (tmpD < minD) {
                        minD = tmpD;
                        classValue = classMap[j];
                        nns[w][i] = j;
                    }
                    dist[w][j] = tmpD * tmpD;
                }
            }
            if (classValue == null || !classValue.equals(classMap[i])) {
                nErrors[w]++;
            }
            score[w] = 1.0 * nErrors[w] / train.length;
        }
    }

    for (int w = 0; w < maxWindow; w++) {
        if (score[w] < bestScore) {
            bestScore = score[w];
            bestWarpingWindow = w;
        }
    }

    // Saving best windows found
    System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1 - bestScore));
}
 
Example 17
Source File: WindowSearcher.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
    // Initialise training dataset
    Attribute classAttribute = data.classAttribute();

    classedData = new HashMap <>();
    classedDataIndices = new HashMap <>();
    for (int c = 0; c < data.numClasses(); c++) {
        classedData.put(data.classAttribute().value(c), new ArrayList <SymbolicSequence>());
        classedDataIndices.put(data.classAttribute().value(c), new ArrayList <Integer>());
    }

    train = new SymbolicSequence[data.numInstances()];
    classMap = new String[train.length];
    maxLength = 0;
    for (int i = 0; i < train.length; i++) {
        Instance sample = data.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        maxLength = Math.max(maxLength, sequence.length);
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }
        train[i] = new SymbolicSequence(sequence);
        String clas = sample.stringValue(classAttribute);
        classMap[i] = clas;
        classedData.get(clas).add(train[i]);
        classedDataIndices.get(clas).add(i);
    }

    warpingMatrix = new double[maxLength][maxLength];
    U = new double[maxLength];
    L = new double[maxLength];

    maxWindow = Math.round(1 * maxLength);
    nns = new int[maxWindow + 1][train.length];
    dist = new double[maxWindow + 1][train.length];

    // Start searching for the best window
    searchBestWarpingWindow();

    // if we are doing length, find the best window in percentage
    if (bestWindowPercent < 0)
        bestWindowPercent = lengthToPercent(bestWarpingWindow);

    // Saving best windows found
    System.out.println("Windows found=" + bestWarpingWindow +
            "(" + bestWindowPercent + ") Best Acc=" + (1 - bestScore));
}
 
Example 18
Source File: UCRSuitePrunedDTW.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
			
	warpingMatrix = new double[maxLength][maxLength];
	U = new double[maxLength];
	L = new double[maxLength];
	U1 = new double[maxLength];
	L1 = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[train.length][train.length];

	cache = new SequenceStatsCache(train, maxWindow);
	
	lazyUCR = new LazyAssessNNEarlyAbandon[train.length][train.length];
	
	for (int i = 0; i < train.length; i++) {
		for (int j  = 0; j < train.length; j++) {
			lazyUCR[i][j] = new LazyAssessNNEarlyAbandon(cache);
		}
	}
	
	// Start searching for the best window
	searchBestWarpingWindow();

	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}
 
Example 19
Source File: Trillion.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();

	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}

	U = new double[maxLength];
	L = new double[maxLength];
	maxWindow = Math.round(1 * maxLength);
	cache = new SequenceStatsCache(train, maxWindow);

	int nbErrors = 0;
	double score;
	bestScore = Double.MAX_VALUE;
	bestWarpingWindow=-1;

	// Start searching for the best window
	for (int w = 0; w <= maxWindow; w++) {
		currentW = w;
		nbErrors = 0;
		for (int i = 0; i < train.length; i++) {
			query = train[i];
			indexQuery = i;
			bestMinDist = Double.MAX_VALUE;
			String classValue = null;
			for (int j = 0; j < train.length; j++) {
				if (i==j)
					continue;
				reference = train[j];
				indexReference = j;

				// LB Kim
				doLBKim();
				if (minDist < bestMinDist) {
					minDist = 0;
					indexStoppedLB = 0;
					// LB Keogh(Q,R)
					doLBKeoghQR(bestMinDist);
					if (minDist < bestMinDist) {
						minDist = 0;
						indexStoppedLB = 0;
						// LB Keogh(R,Q)
						doLBKeoghRQ(bestMinDist);
						if (minDist < bestMinDist) {
							// DTW
							double res = query.DTW(reference, currentW);
							minDist = res * res;
							if(minDist < bestMinDist){
								bestMinDist = minDist;
								classValue = classMap[j];
							}
						}
					}
				}
			}
			if (classValue == null || !classValue.equals(classMap[i])) {
				nbErrors++;
			}
		}
		score = 1.0 * nbErrors / train.length;
		if (score < bestScore) {
			bestScore = score;
			bestWarpingWindow = w;
		}
	}

	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}
 
Example 20
Source File: UCRSuite.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
   	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
			
	warpingMatrix = new double[maxLength][maxLength];
	U = new double[maxLength];
	L = new double[maxLength];
	U1 = new double[maxLength];
	L1 = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[maxWindow+1][train.length];

	cache = new SequenceStatsCache(train, maxWindow);
	
	lazyUCR = new LazyAssessNNEarlyAbandon[train.length][train.length];
	
	for (int i = 0; i < train.length; i++) {
		for (int j  = 0; j < train.length; j++) {
			lazyUCR[i][j] = new LazyAssessNNEarlyAbandon(cache);
		}
	}
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}