Java Code Examples for weka.core.Instances#addAll()

The following examples show how to use weka.core.Instances#addAll() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: UnsupervisedShapelets.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception{
    String dataset = "Trace";
    Instances inst = DatasetLoading.loadDataNullable("Z:\\ArchiveData\\Univariate_arff\\"+dataset+"\\"+dataset+"_TRAIN.arff");
    Instances inst2 = DatasetLoading.loadDataNullable("Z:\\ArchiveData\\Univariate_arff\\"+dataset+"\\"+dataset+"_TEST.arff");
    inst.setClassIndex(inst.numAttributes()-1);
    inst.addAll(inst2);

    UnsupervisedShapelets us = new UnsupervisedShapelets();
    us.seed = 0;
    us.k = inst.numClasses();
    us.buildClusterer(inst);

    System.out.println(us.clusters.length);
    System.out.println(Arrays.toString(us.assignments));
    System.out.println(Arrays.toString(us.clusters));
    System.out.println(randIndex(us.assignments, inst));
}
 
Example 2
Source File: DictClusterer.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception{
        String dataset = "Trace";
        Instances inst = DatasetLoading.loadDataNullable("D:\\CMP Machine Learning\\Datasets\\TSC Archive\\" + dataset + "/" + dataset + "_TRAIN.arff");
        Instances inst2 = DatasetLoading.loadDataNullable("D:\\CMP Machine Learning\\Datasets\\TSC Archive\\" + dataset + "/" + dataset + "_TEST.arff");
//        Instances inst = ClassifierTools.loadData("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TRAIN.arff");
//        Instances inst2 = ClassifierTools.loadData("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TEST.arff");
        inst.setClassIndex(inst.numAttributes()-1);
        inst.addAll(inst2);

        DictClusterer k = new DictClusterer();
        k.seed = 0;
        k.k = inst.numClasses();
        k.buildClusterer(inst);

        System.out.println(k.clusters.length);
        System.out.println(Arrays.toString(k.clusters));
        System.out.println(randIndex(k.assignments, inst));
    }
 
Example 3
Source File: TTC.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception{
        String dataset = "Trace";
        Instances inst = DatasetLoading.loadDataNullable("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TRAIN.arff");
        Instances inst2 = DatasetLoading.loadDataNullable("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TEST.arff");
//        Instances inst = ClassifierTools.loadData("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TRAIN.arff");
//        Instances inst2 = ClassifierTools.loadData("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TEST.arff");
        inst.setClassIndex(inst.numAttributes()-1);
        inst.addAll(inst2);

        TTC k = new TTC();
        k.seed = 0;
        k.k = inst.numClasses();
        k.buildClusterer(inst);

        System.out.println(k.clusters.length);
        System.out.println(Arrays.toString(k.clusters));
        System.out.println(randIndex(k.assignments, inst));
    }
 
Example 4
Source File: InstanceTools.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
* 
* @param all full data set
* @param seed random seed so that the split can be exactly duplicated
* @param propInTrain proportion of data for training
* @return 
*/
   public static Instances[] resampleInstances(Instances all, long seed, double propInTrain){
       ClassCounts classDist = new TreeSetClassCounts(all);
       Map<Double, Instances> classBins = createClassInstancesMap(all);
      
       Random r = new Random(seed);
       //empty instances.
       Instances outputTrain = new Instances(all, 0);
       Instances outputTest = new Instances(all, 0);

       Iterator<Double> keys = classBins.keySet().iterator();
       while(keys.hasNext()){  //For each class value
           double classVal = keys.next();
           //Get the number of this class to put in train and test
           int classCount = classDist.get(classVal);
           int occurences=(int)(classCount*propInTrain);
           Instances bin = classBins.get(classVal);
           bin.randomize(r); //randomise the instances in this class.

           outputTrain.addAll(bin.subList(0,occurences));//copy the first portion of the bin into the train set
           outputTest.addAll(bin.subList(occurences, bin.size()));//copy the remaining portion of the bin into the test set.
       }

       return new Instances[]{outputTrain,outputTest};
   }
 
Example 5
Source File: InstanceTools.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public static Instances subSampleFixedProportion(Instances data, double proportion, long seed){
   Map<Double, Instances> classBins = createClassInstancesMap(data);
   ClassCounts trainDistribution = new TreeSetClassCounts(data);
   
   Random r = new Random(seed);

   //empty instances.
   Instances output = new Instances(data, 0);

   Iterator<Double> keys = trainDistribution.keySet().iterator();
   while(keys.hasNext()){
       double classVal = keys.next();
       int occurences = trainDistribution.get(classVal);
       int numInstances = (int) (proportion * occurences);
       Instances bin = classBins.get(classVal);
       bin.randomize(r); //randomise the bin.

       output.addAll(bin.subList(0,numInstances));//copy the first portion of the bin into the train set
   }
   return output; 
}
 
Example 6
Source File: AllPairsTable.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
public AllPairsTable(final Instances training, final Instances validation, final Classifier c) throws Exception {
	Collection<String> classes = WekaUtil.getClassesActuallyContainedInDataset(training);
	for (Collection<String> set : SetUtil.getAllPossibleSubsetsWithSize(classes, 2)) {
		List<String> pair = set.stream().sorted().collect(Collectors.toList());
		String a = pair.get(0);
		String b = pair.get(1);
		Instances trainingData = WekaUtil.getInstancesOfClass(training, a);
		trainingData.addAll(WekaUtil.getInstancesOfClass(training, b));

		c.buildClassifier(trainingData);

		Instances validationData = WekaUtil.getInstancesOfClass(validation, a);
		validationData.addAll(WekaUtil.getInstancesOfClass(validation, b));
		Evaluation eval = new Evaluation(trainingData);
		eval.evaluateModel(c, validationData);


		if (!this.separabilities.containsKey(a)) {
			this.separabilities.put(a, new HashMap<>());
		}
		this.separabilities.get(a).put(b, eval.pctCorrect() / 100);

	}
	this.classCount = WekaUtil.getNumberOfInstancesPerClass(training);
	this.sum = training.size();
}
 
Example 7
Source File: ExtendedRandomTreeTest.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
public Instances getTrainingData() {
	List<Instance> instances = new ArrayList<>();
	for (double i = lowerBound; i < upperBound; i += stepSize) {
		Instance instance = new DenseInstance(2);
		instance.setValue(0, i);
		instance.setValue(1, this.fun.apply(i));
		instances.add(instance);
	}
	ArrayList<Attribute> attributes = new ArrayList<>();
	attributes.add(0, new Attribute("xVal"));
	attributes.add(1, new Attribute("yVal"));
	Instances inst = new Instances("test", attributes, instances.size());
	inst.addAll(instances);
	inst.setClassIndex(1);
	return inst;
}
 
Example 8
Source File: ExtendedM5TreeTest.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
public Instances getTrainingData() {
	List<Instance> instances = new ArrayList<>();
	for (double i = lowerBound; i < upperBound; i += stepSize) {
		Instance instance = new DenseInstance(2);
		instance.setValue(0, i);
		instance.setValue(1, this.fun.apply(i));
		instances.add(instance);
	}
	ArrayList<Attribute> attributes = new ArrayList<>();
	attributes.add(0, new Attribute("xVal"));
	attributes.add(1, new Attribute("yVal"));
	Instances inst = new Instances("test", attributes, instances.size());
	inst.addAll(instances);
	inst.setClassIndex(1);
	return inst;
}
 
Example 9
Source File: KShape.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) throws Exception{
//        double[] d = {1,2,3,4,5,6,7,8,9,10};
//        DenseInstance inst1 = new DenseInstance(1, d);
//        
//        double[] d2 = {-1,-1,-1,1,1,1,2,2,2,2,3,3,3};
//        DenseInstance inst2 = new DenseInstance(1, d2);
//        
//        ArrayList<Attribute> atts = new ArrayList();
//        for (int i = 0; i < d2.length; i++){
//            atts.add(new Attribute("att" + i));
//        }
//        Instances data = new Instances("test", atts, 0);
//        inst1.setDataset(data);
//        inst2.setDataset(data);
//        
//        SBD sbd = new SBD(inst1, inst2);
//        
//        System.out.println(sbd.dist);
//        System.out.println(sbd.yShift);

        String dataset = "Trace";
        Instances inst = DatasetLoading.loadDataNullable("Z:\\ArchiveData\\Univariate_arff\\" + dataset + "/" + dataset + "_TRAIN.arff");
        Instances inst2 = DatasetLoading.loadDataNullable("Z:\\ArchiveData\\Univariate_arff\\" + dataset + "/" + dataset + "_TEST.arff");
//        Instances inst = ClassifierTools.loadData("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TRAIN.arff");
//        Instances inst2 = ClassifierTools.loadData("Z:\\Data\\TSCProblems2018\\" + dataset + "/" + dataset + "_TEST.arff");
        inst.setClassIndex(inst.numAttributes()-1);
        inst.addAll(inst2);

        KShape k = new KShape();
        k.seed = 0;
        k.k = inst.numClasses();
        k.buildClusterer(inst);

        System.out.println(k.clusters.length);
        System.out.println(Arrays.toString(k.assignments));
        System.out.println(Arrays.toString(k.clusters));
        System.out.println(randIndex(k.assignments, inst));
    }
 
Example 10
Source File: DatasetLoading.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * If the dataset loaded has a first attribute whose name _contains_ the string "experimentsSplitAttribute".toLowerCase()
 * then it will be assumed that we want to perform a leave out one X cross validation. Instances are sampled such that fold N is comprised of
 * a test set with all instances with first-attribute equal to the Nth unique value in a sorted list of first-attributes. The train
 * set would be all other instances. The first attribute would then be removed from all instances, so that they are not given
 * to the classifier to potentially learn from. It is up to the user to ensure the the foldID requested is within the range of possible
 * values 1 to numUniqueFirstAttValues
 *
 * @return new Instances[] { trainSet, testSet };
 */
public static Instances[] splitDatasetByFirstAttribute(Instances all, int foldId) {
    TreeMap<Double, Integer> splitVariables = new TreeMap<>();
    for (int i = 0; i < all.numInstances(); i++) {
        //even if it's a string attribute, this val corresponds to the index into the array of possible strings for this att
        double key= all.instance(i).value(0);
        Integer val = splitVariables.get(key);
        if (val == null)
            val = 0;
        splitVariables.put(key, ++val);
    }

    //find the split attribute value to keep for testing this fold
    double idToReserveForTestSet = -1;
    int testSize = -1;
    int c = 0;
    for (Map.Entry<Double, Integer> splitVariable : splitVariables.entrySet()) {
        if (c++ == foldId) {
            idToReserveForTestSet = splitVariable.getKey();
            testSize = splitVariable.getValue();
        }
    }

    //make the split
    Instances train = new Instances(all, all.size() - testSize);
    Instances test  = new Instances(all, testSize);
    for (int i = 0; i < all.numInstances(); i++)
        if (all.instance(i).value(0) == idToReserveForTestSet)
            test.add(all.instance(i));
    train.addAll(all);

    //delete the split attribute
    train.deleteAttributeAt(0);
    test.deleteAttributeAt(0);

    return new Instances[] { train, test };
}
 
Example 11
Source File: InstanceTools.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * Modified from Aaron's shapelet resampling code in development.ReasamplingExperiments. Used to resample
 * train and test instances while maintaining original train/test class distributions
 * 
 * @param train Input training instances
 * @param test Input test instances
 * @param seed Used to create reproducible folds by using a consistent seed value
 * @return Instances[] with two elements; [0] is the output training instances, [1] output test instances
 */
public static Instances[] resampleTrainAndTestInstances(Instances train, Instances test, long seed){
    if(seed==0){    //For consistency, I have made this clone the data. Its not necessary generally, but not doing it introduced a bug indiagnostics elsewhere
        Instances newTrain = new Instances(train);
        Instances newTest = new Instances(test);
        return new Instances[]{newTrain,newTest};
    }
    Instances all = new Instances(train);
    all.addAll(test);
    ClassCounts trainDistribution = new TreeSetClassCounts(train);
    
    Map<Double, Instances> classBins = createClassInstancesMap(all);
   
    Random r = new Random(seed);

    //empty instances.
    Instances outputTrain = new Instances(all, 0);
    Instances outputTest = new Instances(all, 0);

    Iterator<Double> keys = classBins.keySet().iterator();
    while(keys.hasNext()){
        double classVal = keys.next();
        int occurences = trainDistribution.get(classVal);
        Instances bin = classBins.get(classVal);
        bin.randomize(r); //randomise the bin.

        outputTrain.addAll(bin.subList(0,occurences));//copy the first portion of the bin into the train set
        outputTest.addAll(bin.subList(occurences, bin.size()));//copy the remaining portion of the bin into the test set.
    }

    return new Instances[]{outputTrain,outputTest};
}
 
Example 12
Source File: RISE.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static void main(String[] arg) throws Exception{

        Instances dataTrain = loadDataNullable("Z:/ArchiveData/Univariate_arff" + "/" + DatasetLists.newProblems27[2] + "/" + DatasetLists.newProblems27[2] + "_TRAIN");
        Instances dataTest = loadDataNullable("Z:/ArchiveData/Univariate_arff" + "/" + DatasetLists.newProblems27[2] + "/" + DatasetLists.newProblems27[2] + "_TEST");
        Instances data = dataTrain;
        data.addAll(dataTest);

        ClassifierResults cr = null;
        SingleSampleEvaluator sse = new SingleSampleEvaluator();
        sse.setPropInstancesInTrain(0.5);
        sse.setSeed(0);

        RISE RISE = null;
        System.out.println("Dataset name: " + data.relationName());
        System.out.println("Numer of cases: " + data.size());
        System.out.println("Number of attributes: " + (data.numAttributes() - 1));
        System.out.println("Number of classes: " + data.classAttribute().numValues());
        System.out.println("\n");
        try {
            RISE = new RISE();
            RISE.setTransforms("PS");
            cr = sse.evaluate(RISE, data);
            System.out.println("PS");
            System.out.println("Accuracy: " + cr.getAcc());
            System.out.println("Build time (ns): " + cr.getBuildTimeInNanos());

            /*RISE = new RISE();
            cr = sse.evaluate(RISE, data);
            System.out.println("ACF_FFT");
            RISE.setTransforms("ACF", "FFT");
            System.out.println("Accuracy: " + cr.getAcc());
            System.out.println("Build time (ns): " + cr.getBuildTimeInNanos());*/
        } catch (Exception e) {
            e.printStackTrace();
        }

        /*Instances train=DatasetLoading.loadDataNullable("C:\\Users\\ajb\\Dropbox\\TSC Problems\\ItalyPowerDemand\\ItalyPowerDemand_TRAIN");
        Instances test=DatasetLoading.loadDataNullable("C:\\Users\\ajb\\Dropbox\\TSC Problems\\ItalyPowerDemand\\ItalyPowerDemand_TEST");
        RISE rif = new RISE();
        rif.setTransforms("ACF","AR","AFC");
        for(Filter f: rif.filters)
            System.out.println(f);
        String[] temp={"PS","Autocorellation","BOB","PACF"};
        rif.setTransforms(temp);
        for(Filter f: rif.filters)
            System.out.println(f);
        System.exit(0);

        rif.buildClassifier(train);
        System.out.println("build ok:");
        double a=ClassifierTools.accuracy(test, rif);
        System.out.println(" Accuracy ="+a);*/
/*
        //Get the class values as a fast vector
        Attribute target =data.attribute(data.classIndex());

        FastVector vals=new FastVector(target.numValues());
        for(int j=0;j<target.numValues();j++)
                vals.addElement(target.value(j));
        atts.addElement(new Attribute(data.attribute(data.classIndex()).name(),vals));
//Does this create the actual instances?
        Instances result = new Instances("Tree",atts,data.numInstances());
        for(int i=0;i<data.numInstances();i++){
            DenseInstance in=new DenseInstance(result.numAttributes());
            result.add(in);
        }
        result.setClassIndex(result.numAttributes()-1);
        Instances testHolder =new Instances(result,10);
//For each tree
        System.out.println("Train size "+result.numInstances());
        System.out.println("Test size "+testHolder.numInstances());
*/
    }
 
Example 13
Source File: Ex06_Clusterers.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    
    // We'll use this data throughout, see Ex01_Datahandling
    int seed = 0;
    Instances[] trainTest = DatasetLoading.sampleItalyPowerDemand(seed);
    Instances inst = trainTest[0];
    Instances inst2 = trainTest[1];
    inst.addAll(inst2);

    // Create an object from one of the time series or vector clusters implemented.
    // Call the buildClusterer method with your data. Most clusters will need the number of clusters k to be set.
    UnsupervisedShapelets us = new UnsupervisedShapelets();
    us.setNumberOfClusters(inst.numClasses());
    us.buildClusterer(inst);

    // You can find the cluster assignments for each data instance by calling getAssignments().
    // The index of assignments array will match the Instances object, i.e. index 0 with value 1 == first instance
    // of data assigned to cluster 1.
    int[] tsAssignments = us.getAssignments();
    System.out.println("UnsupervisedShapelets cluster assignments:");
    System.out.println(Arrays.toString(tsAssignments));

    // A popular metric for cluster evaluation is the Rand index. A utility method is available for calculating
    // this.
    double tsRandIndex = ClusteringUtilities.randIndex(tsAssignments, inst);
    System.out.println("UnsupervisedShapelets Rand index:");
    System.out.println(tsRandIndex);

    // weka also implements a range of clustering algorithms. Any class value must be removed prior to use.
    Instances copy = new Instances(inst);
    deleteClassAttribute(copy);
    SimpleKMeans km = new SimpleKMeans();
    km.setNumClusters(inst.numClasses());
    km.setPreserveInstancesOrder(true);
    km.buildClusterer(copy);

    int[] wekaAssignments = km.getAssignments();
    System.out.println("SimpleKMeans cluster assignments:");
    System.out.println(Arrays.toString(wekaAssignments));

    double wekaRandIndex = ClusteringUtilities.randIndex(wekaAssignments, inst);
    System.out.println("SimpleKMeans Rand index:");
    System.out.println(wekaRandIndex);
}
 
Example 14
Source File: ArrayUtilities.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static Instances toInstances(Instance... instances) {
    Instances collection = new Instances(instances[0].dataset(), 0);
    collection.addAll(Arrays.asList(instances));
    return collection;
}
 
Example 15
Source File: Utilities.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static Instances toInstances(Instance... instances) {
    Instances result = new Instances(instances[0].dataset(), 0);
    result.addAll(Utilities.asList(instances));
    return result;
}