Java Code Examples for weka.core.Instances#size()

The following examples show how to use weka.core.Instances#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CnnTextEmbeddingInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Test getDataSetIterator
 */
@Test
public void testGetIteratorNumericClass() throws Exception {
  final Instances data = makeData();
  final int batchSize = 1;
  final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize);

  Set<Double> labels = new HashSet<>();
  for (int i = 0; i < data.size(); i++) {
    Instance inst = data.get(i);
    double label = inst.value(data.classIndex());
    final DataSet next = Utils.getNext(it);
    double itLabel = next.getLabels().getDouble(0);
    Assert.assertEquals(label, itLabel, 1e-5);
    labels.add(label);
  }
}
 
Example 2
Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Test getDataSetIterator
 */
@Test
public void testGetIteratorNumericClass() throws Exception {
  final Instances data = DatasetLoader.loadAngerMeta();
  final int batchSize = 1;
  final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize);

  Set<Double> labels = new HashSet<>();
  for (int i = 0; i < data.size(); i++) {
    Instance inst = data.get(i);
    double label = inst.value(data.classIndex());
    final DataSet next = Utils.getNext(it);
    double itLabel = next.getLabels().getDouble(0);
    Assert.assertEquals(label, itLabel, 1e-5);
    labels.add(label);
  }
}
 
Example 3
Source File: Dl4JMlpFilterTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
protected void checkLayer(Dl4jMlpClassifier clf, Instances instances, String[] transformationLayerNames,
    String clfPath, boolean useZooModel) throws Exception {
  Instances activationsExpected = clf.getActivationsAtLayers(transformationLayerNames, instances);
  Dl4jMlpFilter filter = new Dl4jMlpFilter();
  // Load the MNIST III if we're being called on the MNIST dataset (dataset is in meta format (String, class))
  if (ImageInstanceIterator.isMetaArff(instances))
    filter.setInstanceIterator(DatasetLoader.loadMiniMnistImageIterator());
  filter.setSerializedModelFile(new File(clfPath));
  filter.setTransformationLayerNames(transformationLayerNames);
  filter.setInputFormat(instances);
  filter.setPoolingType(PoolingType.NONE);

  Instances activationsActual = Filter.useFilter(instances, filter);

  for (int i = 0; i < activationsActual.size(); i++) {
    Instance expected = activationsExpected.get(i);
    Instance actual = activationsActual.get(i);
    for (int j = 0; j < expected.numAttributes(); j++) {
      assertEquals(expected.value(j), actual.value(j), 1e-6);
    }
  }
}
 
Example 4
Source File: DataSetUtils.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
public static Instance matrixToInstance(final INDArray instance, final Instances refInstances) {
	if (instance == null || refInstances == null) {
		throw new IllegalArgumentException("Parameter 'instance' and 'refInstances' must not be null!");
	}

	// Create attributes
	final ArrayList<Attribute> attributes = new ArrayList<>();
	for (int i = 0; i < instance.length(); i++) {
		final Attribute newAtt = new Attribute("val" + i);
		attributes.add(newAtt);
	}

	final List<String> classValues = IntStream.range(0, refInstances.classAttribute().numValues()).asDoubleStream().mapToObj(String::valueOf).collect(Collectors.toList());
	final Attribute classAtt = new Attribute(CLASS_ATT_NAME, classValues);
	attributes.add(classAtt);

	final Instances result = new Instances(INSTANCES_DS_NAME, attributes, refInstances.size());
	result.setClassIndex(result.numAttributes() - 1);

	// Initialize instance
	final Instance inst = new DenseInstance(1, ArrayUtils.addAll(Nd4j.toFlattened(instance).toDoubleVector(), 0));
	inst.setDataset(result);

	return inst;
}
 
Example 5
Source File: ExactIntervalAugSpaceSampler.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public Instance augSpaceSample() {
	Instances preciseInsts = this.getPreciseInsts();
	int numInsts = preciseInsts.size();
	ArrayList<Instance> sampledPoints = new ArrayList<>();

	Instance x1 = preciseInsts.get(this.getRng().nextInt(numInsts));
	Instance x2 = preciseInsts.get(this.getRng().nextInt(numInsts));

	// Assume last attribute is the class
	int numFeatures = preciseInsts.numAttributes() - 1;

	for (Instance inst : preciseInsts) {
		boolean inInterval = true;
		for (int att = 0; att < numFeatures && inInterval; att++) {
			if (inst.value(att) < Math.min(x1.value(att), x2.value(att)) || inst.value(att) > Math.max(x1.value(att), x2.value(att))) {
				inInterval = false;
			}
		}
		if (inInterval) {
			sampledPoints.add(inst);
		}
	}

	return generateAugPoint(sampledPoints);
}
 
Example 6
Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Test getDataSetIterator
 */
@Test
public void testGetIteratorNumericClass() throws Exception {
  final Instances data = DatasetLoader.loadAngerMeta();
  final int batchSize = 1;
  final DataSetIterator it = this.cteii.getDataSetIterator(data, SEED, batchSize);

  Set<Double> labels = new HashSet<>();
  for (int i = 0; i < data.size(); i++) {
    Instance inst = data.get(i);
    double label = inst.value(data.classIndex());
    final DataSet next = Utils.getNext(it);
    double itLabel = next.getLabels().getDouble(0);
    Assert.assertEquals(label, itLabel, 1e-5);
    labels.add(label);
  }
}
 
Example 7
Source File: WekaInstancesTester.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
@Test
public void testEqualnessOfTwoCopiesOfSameDataset() throws Exception {
	Instances ds1 = new Instances(new FileReader(this.dataset));
	ds1.setClassIndex(ds1.numAttributes() - 1);
	WekaInstances wrapped1 = new WekaInstances(ds1);

	Instances ds2 = new Instances(new FileReader(this.dataset));
	ds2.setClassIndex(ds2.numAttributes() - 1);
	WekaInstances wrapped2 = new WekaInstances(ds2);

	/* first conduct an instance-wise comparison and a mutual containment check */
	int n = ds1.size();
	assertEquals("Copy of dataset has different length than the original.", n, ds2.size());
	for (int i = 0; i < n; i++) {
		IWekaInstance i1 = wrapped1.get(i);
		IWekaInstance i2 = wrapped2.get(i);
		assertEquals("Hash codes of single instance don't match!", i1.hashCode(), i2.hashCode());
		assertEquals("Comparing the instances with equals yields false.", i1, i2);
		assertTrue("The second dataset does not contain " + i1 + ", which is contained in the first.", wrapped2.contains(i1));
		assertTrue("The first dataset does not contain " + i2 + ", which is contained in the second.", wrapped1.contains(i2));
	}

	/* now compare the entire dataset */
	assertEquals("Hash codes of entire dataset don't match!", wrapped1.hashCode(), wrapped2.hashCode());
	assertEquals("Comparing the datasets with equals yields false.", wrapped1, wrapped2);
}
 
Example 8
Source File: RLTunedKNNSetup.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void accept(Instances trainData) {
    neighbourCount = new Box<>(1); // must start at 1 otherwise the loocv produces no train estimate
    paramCount = new Box<>(0);
    longestExploreTimeNanos = 0;
    id = 0;
    longestExploitTimeNanos = 0;
    nextImproveableBenchmarks = new HashSet<>();
    improveableBenchmarks = new HashSet<>();
    unimproveableBenchmarks = new HashSet<>();
    switchImproveableBenchmarks();
    finalBenchmarks = PrunedMultimap.desc(ArrayList::new);
    finalBenchmarks.setSoftLimit(1);
    final int seed = rlTunedClassifier.getSeed();
    paramSpace = paramSpaceBuilder.apply(trainData);
    paramSetIterator = new RandomListIterator<>(this.paramSpace, seed).setRemovedOnNext(true);
    fullParamSpaceSize = this.paramSpace.size();
    fullNeighbourhoodSize = trainData.size(); // todo check all seeds set
    maxNeighbourhoodSize = findLimit(fullNeighbourhoodSize, neighbourhoodSizeLimit, neighbourhoodSizeLimitPercentage);
    maxParamSpaceSize = findLimit(fullParamSpaceSize, paramSpaceSizeLimit, paramSpaceSizeLimitPercentage);
    if(!incrementalMode) {
        neighbourCount.set(maxNeighbourhoodSize);
    }
    // transform classifiers into benchmarks
    explorer = new ParamExplorer();
    // setup an iterator to improve benchmarks
    exploiter = new NeighbourExploiter();
    stategy = new LeeStategy();
    agent = new KnnAgent();
    // set corresponding iterators in the incremental tuned classifier
    rlTunedClassifier.setAgent(agent);
    rlTunedClassifier.setEnsembler(Ensembler.single());
    // todo make sure the seeds are set for everything
}
 
Example 9
Source File: PartitionedMultiFilter.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * tests the data whether the filter can actually handle it.
  *
  * @param instanceInfo	the data to test
  * @throws Exception		if the test fails
  */
 protected void testInputFormat(Instances instanceInfo) throws Exception {
   for (int i = 0; i < getRanges().length; i++) {
     Instances newi = new Instances(instanceInfo, 0);
     if (instanceInfo.size() > 0){
newi.add((Instance)instanceInfo.get(0).copy());
     }
     Range range = getRanges()[i];
     range.setUpper(instanceInfo.numAttributes() - 1);
     Instances subset = generateSubset(newi, range);
     getFilters()[i].setInputFormat(subset);
   }
 }
 
Example 10
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static double[] getClassesAsArray(final Instances inst) {
	int n = inst.size();
	double[] vec = new double[n];
	for (int i = 0; i < n; i++) {
		vec[i] = inst.get(i).classValue();
	}
	return vec;
}
 
Example 11
Source File: RankingByPairwiseComparison.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public void fit(final Instances dataset, final int labels) throws Exception {
	this.labelIndices = getLabelIndices(labels, dataset);
	this.labelIndices.stream().map(x -> dataset.attribute(x).name()).forEach(this.labelSet::add);
	Instances plainPWDataset = this.applyFiltersToDataset(dataset);

	try {
		for (int i = 0; i < this.labelIndices.size() - 1; i++) {
			for (int j = i + 1; j < this.labelIndices.size(); j++) {

				PairWiseClassifier pwc = new PairWiseClassifier();
				pwc.a = dataset.attribute(this.labelIndices.get(i)).name();
				pwc.b = dataset.attribute(this.labelIndices.get(j)).name();

				pwc.c = AbstractClassifier.forName(this.config.getBaseLearner(), null);

				Instances pwDataset = new Instances(plainPWDataset);

				for (int k = 0; k < pwDataset.size(); k++) {
					String value;
					if (dataset.get(k).value(this.labelIndices.get(i)) > dataset.get(k).value(this.labelIndices.get(j))) {
						value = "true";
					} else {
						value = "false";
					}
					pwDataset.get(k).setValue(pwDataset.numAttributes() - 1, value);
				}
				pwDataset.setClassIndex(pwDataset.numAttributes() - 1);

				pwc.c.buildClassifier(pwDataset);
				this.pwClassifiers.add(pwc);
			}
		}
	} catch (Exception e) {
		throw new TrainingException("Could not build ranker", e);
	}
}
 
Example 12
Source File: RandomStratifiedIndexSampler.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public void setInstances(Instances instances) {
    instancesByClass = indexByClass(instances);
    classDistribution = classDistribution(instances);
    classSamplingProbabilities = classDistribution(instances);
    count = 0;
    maxCount = instances.size();
}
 
Example 13
Source File: RandomStratifiedSampler.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public void setInstances(Instances instances) {
    instancesByClass = instancesByClass(instances);
    classDistribution = classDistribution(instances);
    classSamplingProbabilities = classDistribution(instances);
    count = 0;
    maxCount = instances.size();
}
 
Example 14
Source File: InstanceTools.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public static List<List<Integer>> indexByClass(Instances instances) {
    List<List<Integer>> instancesByClass = new ArrayList<>();
    int numClasses = instances.get(0).numClasses();
    for(int i = 0; i < numClasses; i++) {
        instancesByClass.add(new ArrayList());
    }
    for(int i = 0; i < instances.size(); i++) {
        instancesByClass.get((int) instances.get(i).classValue()).add(i);
    }
    return instancesByClass;
}
 
Example 15
Source File: MLPipeline.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public double[] classifyInstances(final Instances arg0) throws Exception {
	int n = arg0.size();
	double[] answers = new double[n];
	for (int i = 0; i < n; i++) {
		answers[i] = this.classifyInstance(arg0.get(i));
	}
	return answers;
}
 
Example 16
Source File: TableMaker.java    From NLIWOD with GNU Affero General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {				 
	Path datapath= Paths.get("./src/main/resources/old/Qald6Logs.arff");
	BufferedReader reader = new BufferedReader(new FileReader(datapath.toString()));
	ArffReader arff = new ArffReader(reader);
	Instances data = arff.getData();
	data.setClassIndex(6);
	
	//Change To Classifier of Choice
	PSt Classifier = new PSt();
	Classifier.buildClassifier(data);

	
	JSONObject qald6test = Utils.loadTestQuestions();
		JSONArray questions = (JSONArray) qald6test.get("questions");
		ArrayList<String> testQuestions = Lists.newArrayList();
		for(int i = 0; i < questions.size(); i++){
			JSONObject questionData = (JSONObject) questions.get(i);
			JSONArray questionStrings = (JSONArray) questionData.get("question");
			JSONObject questionEnglish = (JSONObject) questionStrings.get(0);
			testQuestions.add((String) questionEnglish.get("string"));
		}
	ArrayList<String> systems = Lists.newArrayList("KWGAnswer", "NbFramework", "PersianQA", "SemGraphQA", "UIQA_withoutManualEntries", "UTQA_English" );
	double avef = 0;
	double[] systemavef = {0,0,0,0,0,0,0};
	for(int i=0; i<data.size(); i++){
		String tmp = "";
		tmp += i +"\t &" + testQuestions.get(i);
		double bestf = 0;
		for(String system: systems){
			double p = Float.parseFloat(Utils.loadSystemP(system).get(i));				
			double r = Float.parseFloat(Utils.loadSystemR(system).get(i));
			double f = 0;
			if(!(p==0&&r==0)){
				f = 2*p*r/(p+r);
			}
			if(f > bestf){
				bestf = f;
			}
			tmp += "\t &" + Math.floor(f * 100) / 100;
			systemavef[systems.indexOf(system)] += f/data.size();
		}
		systemavef[6] += bestf/data.size();
		tmp += "\t &" + Math.floor(bestf * 100) / 100;
		double[] confidences = Classifier.distributionForInstance(data.get(i));
		System.out.println(Arrays.toString(confidences));
		int argmax = -1;
		double max = -1;
			for(int j = 0; j < 6; j++){
				if(confidences[j]>max){
					max = confidences[j];
					argmax = j;
				}
			}
			
		String sys2ask = systems.get(systems.size() - argmax -1);
		double systemp = Float.parseFloat(Utils.loadSystemP(sys2ask).get(i));				
		double systemr = Float.parseFloat(Utils.loadSystemR(sys2ask).get(i));
		double systemf = 0;
		if(!(systemp==0&&systemr==0)){
			systemf = 2*systemp*systemr/(systemp+systemr);
		}
		avef += systemf;
		tmp += "\t &" + Math.floor(systemf * 100) / 100;

		tmp += "\\\\";
		System.out.println(tmp);
	}
	System.out.println(Arrays.toString(systemavef));
	System.out.println(avef/data.size());
}
 
Example 17
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public static double getRelativeNumberOfInstancesFromClass(final Instances data, final Collection<String> cs) {
	return getNumberOfInstancesFromClass(data, cs) / (1f * data.size());
}
 
Example 18
Source File: SequenceStatsCache.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public SequenceStatsCache(final Instances train, final int startingWindow) {
    this.train = train;
    int nSequences = train.size();
    int length = train.numAttributes() - 1;
    this.LEs = new ArrayList<>(nSequences);
    this.UEs = new ArrayList<>(nSequences);
    this.lastWindowComputed = new double[nSequences];
    this.lastERPWindowComputed = new double[nSequences];
    this.lastLCSSWindowComputed = new double[nSequences];
    Arrays.fill(this.lastWindowComputed, -1);
    Arrays.fill(this.lastERPWindowComputed, -1);
    Arrays.fill(this.lastLCSSWindowComputed, -1);
    this.currentWindow = startingWindow;
    this.mins = new double[nSequences];
    this.maxs = new double[nSequences];
    this.indexMins = new int[nSequences];
    this.indexMaxs = new int[nSequences];
    this.isMinFirst = new boolean[nSequences];
    this.isMinLast = new boolean[nSequences];
    this.isMaxFirst = new boolean[nSequences];
    this.isMaxLast = new boolean[nSequences];
    this.indicesSortedByAbsoluteValue = new IndexedDouble[nSequences][length];
    for (int i = 0; i < train.size(); i++) {
        double min = Double.POSITIVE_INFINITY;
        double max = Double.NEGATIVE_INFINITY;
        int indexMin = -1, indexMax = -1;
        for (int j = 0; j < train.numAttributes() - 1; j++) {
            double val = train.get(i).value(j);
            if (val > max) {
                max = val;
                indexMax = j;
            }
            if (val < min) {
                min = val;
                indexMin = j;
            }
            indicesSortedByAbsoluteValue[i][j] = new IndexedDouble(j, Math.abs(val));
        }
        indexMaxs[i] = indexMax;
        indexMins[i] = indexMin;
        mins[i] = min;
        maxs[i] = max;
        isMinFirst[i] = (indexMin == 0);
        isMinLast[i] = (indexMin == (train.numAttributes() - 2));
        isMaxFirst[i] = (indexMax == 0);
        isMaxLast[i] = (indexMax == (train.numAttributes() - 2));
        Arrays.sort(indicesSortedByAbsoluteValue[i], (v1, v2) -> -Double.compare(v1.value, v2.value));
        this.LEs.add(new double[length]);
        this.UEs.add(new double[length]);
    }
}
 
Example 19
Source File: UnsupervisedShapelets.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
private void extractUShapelets(Instances data){
    int[] shapeletLengths = {25, 50};

    if (data.numAttributes() < 50){
        shapeletLengths = new int[]{data.numAttributes()/2};
    }

    shapelets = new ArrayList();
    numInstances = data.size();
    Instance inst = data.firstInstance();
    boolean finished = false;

    while (!finished){
        ArrayList<UShapelet> shapeletCandidates = new ArrayList();

        //Finds all candidate shapelets on the selected instance
        for (int i = 0; i < shapeletLengths.length; i++){
            for (int n = 0; n < inst.numAttributes() - shapeletLengths[i]; n++){
                UShapelet candidate = new UShapelet(n, shapeletLengths[i], inst);
                candidate.computeGap(data);
                shapeletCandidates.add(candidate);
            }
        }

        double maxGap = -1;
        int maxGapIndex = -1;

        //Finds the shapelet with the highest gap value
        for (int i = 0; i < shapeletCandidates.size(); i++){
            if (shapeletCandidates.get(i).gap > maxGap){
                maxGap = shapeletCandidates.get(i).gap;
                maxGapIndex = i;
            }
        }

        //Adds the shapelet with the best gap value to the pool of shapelets
        UShapelet best = shapeletCandidates.get(maxGapIndex);
        shapelets.add(best);

        double[] distances = best.computeDistances(data);
        ArrayList<Double> lesserDists = new ArrayList();
        double maxDist = -1;
        int maxDistIndex = -1;

        //Finds the instance with the max dist to the shapelet and all with a dist lower than the distance used
        //to generate the gap value.
        for (int i = 0; i < distances.length; i++){
            if (distances[i] < best.dt){
                lesserDists.add(distances[i]);
            }
            else if (distances[i] > maxDist){
                maxDist = distances[i];
                maxDistIndex = i;
            }
        }

        //Use max dist instance to generate new shapelet and remove low distance instances
        if (lesserDists.size() == 1){
            finished = true;
        }
        else{
            inst = data.get(maxDistIndex);

            double mean = mean(lesserDists);
            double cutoff = mean + standardDeviation(lesserDists, mean);

            Instances newData = new Instances(data, 0);

            for (int i = 0; i < data.numInstances(); i++){
                if (distances[i] >= cutoff){
                    newData.add(data.get(i));
                }
            }

            data = newData;

            if (data.size() == 1){
                finished = true;
            }
        }
    }
}
 
Example 20
Source File: CrossValidationExperiments.java    From NLIWOD with GNU Affero General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {		

		Path datapath= Paths.get("./src/main/resources/old/Qald6Logs.arff");
		BufferedReader reader = new BufferedReader(new FileReader(datapath.toString()));
		ArffReader arff = new ArffReader(reader);
		Instances data = arff.getData();
		data.setClassIndex(6);
		
		ArrayList<String> systems = Lists.newArrayList("KWGAnswer", "NbFramework", "PersianQA", "SemGraphQA", "UIQA_withoutManualEntries", "UTQA_English" );


		int seed = 133;
		// Change to 100 for leave-one-out CV
		int folds = 10;
		
		Random rand = new Random(seed);
		Instances randData = new Instances(data);
		randData.randomize(rand);

		float cv_ave_f = 0;
		
		for(int n=0; n < folds; n++){
		    Instances train = randData.trainCV(folds,  n);
		    Instances test = randData.testCV(folds,  n);
		    
		    //Change to the Classifier of your choice
			CDN Classifier = new CDN();
			Classifier.buildClassifier(train);
			

			float ave_p = 0;
			float ave_r = 0;
	
			for(int j = 0; j < test.size(); j++){
				Instance ins = test.get(j);
				int k = 0; 
				for(int l=0; l < data.size(); l++){
					Instance tmp = data.get(l);
					if(tmp.toString().equals(ins.toString())){
						k = l;
					}
				}		
				double[] confidences = Classifier.distributionForInstance(ins);
				int argmax = -1;
				double max = -1;
					for(int i = 0; i < 6; i++){
						if(confidences[i]>max){
							max = confidences[i];
							argmax = i;
						}
				}
				String sys2ask = systems.get(systems.size() - argmax -1);
				ave_p += Float.parseFloat(Utils.loadSystemP(sys2ask).get(k));				
				ave_r += Float.parseFloat(Utils.loadSystemR(sys2ask).get(k));
			}
			
			double p = ave_p/test.size();
			double r = ave_r/test.size();
			double fmeasure = 0;
			if(p>0&&r>0){fmeasure = 2*p*r/(p + r);}
			System.out.println("macro F on fold " + n + ": " + fmeasure);
			
			cv_ave_f += fmeasure/folds;
						
		}
		System.out.println("macro F average: " + cv_ave_f);
		System.out.println('\n');
	}