Java Code Examples for weka.core.Instances#setClassIndex()

The following examples show how to use weka.core.Instances#setClassIndex() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MultiResponseModelTrees.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
    //creating the 2class version of the insts
    numericClassInsts = new Instances(data);
    numericClassInsts.setClassIndex(0); //temporary
    numericClassInsts.deleteAttributeAt(numericClassInsts.numAttributes()-1);
    Attribute newClassAtt = new Attribute("newClassVal"); //numeric class
    numericClassInsts.insertAttributeAt(newClassAtt, numericClassInsts.numAttributes());
    numericClassInsts.setClassIndex(numericClassInsts.numAttributes()-1); //temporary

    //and building the regressors
    regressors = new M5P[data.numClasses()];
    double[] trueClassVals = data.attributeToDoubleArray(data.classIndex());
    for (int c = 0; c < data.numClasses(); c++) {

        for (int i = 0; i < numericClassInsts.numInstances(); i++) {
            //if this inst is of the class we're currently handling (c), set new class val to 1 else 0
            double cval = trueClassVals[i] == c ? 1 : 0; 
            numericClassInsts.instance(i).setClassValue(cval);
        }    

        regressors[c] = new M5P();
        regressors[c].buildClassifier(numericClassInsts);
    }
}
 
Example 2
Source File: DatasetLoader.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
public static Instances loadAngerMetaClassification() throws Exception {
  final Instances data = DatasetLoader
      .loadArff("src/test/resources/numeric/anger.meta.arff");
  ArrayList<Attribute> atts = new ArrayList<>();
  atts.add(data.attribute(0));
  Attribute cls = new Attribute("cls", Arrays.asList("0", "1"));
  atts.add(cls);
  Instances dataDiscretized = new Instances("anger-classification", atts, data.numInstances());
  dataDiscretized.setClassIndex(1);
  for (Instance datum : data) {
    Instance cpy = (Instance) datum.copy();
    cpy.setDataset(dataDiscretized);
    cpy.setValue(0, datum.stringValue(0));
    cpy.setValue(1, datum.classValue() > 0.5 ? "1" : "0");
    dataDiscretized.add(cpy);
  }
  return dataDiscretized;
}
 
Example 3
Source File: DatasetLoader.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
public static Instances loadAngerMetaClassification() throws Exception {
  final Instances data = DatasetLoader
      .loadArff("src/test/resources/numeric/anger.meta.arff");
  ArrayList<Attribute> atts = new ArrayList<>();
  atts.add(data.attribute(0));
  Attribute cls = new Attribute("cls", Arrays.asList("0", "1"));
  atts.add(cls);
  Instances dataDiscretized = new Instances("anger-classification", atts, data.numInstances());
  dataDiscretized.setClassIndex(1);
  for (Instance datum : data) {
    Instance cpy = (Instance) datum.copy();
    cpy.setDataset(dataDiscretized);
    cpy.setValue(0, datum.stringValue(0));
    cpy.setValue(1, datum.classValue() > 0.5 ? "1" : "0");
    dataDiscretized.add(cpy);
  }
  return dataDiscretized;
}
 
Example 4
Source File: Reorder.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Sets the format of the input instances.
  *
  * @param instanceInfo an Instances object containing the input instance
  * structure (any instances contained in the object are ignored - only the
  * structure is required).
  * @return true if the outputFormat may be collected immediately
  * @throws Exception if a problem occurs setting the input format
  */
 public boolean setInputFormat(Instances instanceInfo) throws Exception {
   super.setInputFormat(instanceInfo);
   
   FastVector attributes = new FastVector();
   int outputClass = -1;
   m_SelectedAttributes = determineIndices(instanceInfo.numAttributes());
   for (int i = 0; i < m_SelectedAttributes.length; i++) {
     int current = m_SelectedAttributes[i];
     if (instanceInfo.classIndex() == current) {
outputClass = attributes.size();
     }
     Attribute keep = (Attribute)instanceInfo.attribute(current).copy();
     attributes.addElement(keep);
   }
   
   initInputLocators(instanceInfo, m_SelectedAttributes);

   Instances outputFormat = new Instances(instanceInfo.relationName(),
				   attributes, 0); 
   outputFormat.setClassIndex(outputClass);
   setOutputFormat(outputFormat);
   
   return true;
 }
 
Example 5
Source File: WekaUtilTester.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
@Test
public void checkSplit() throws Exception {

	Instances inst = new Instances(new BufferedReader(new FileReader(VOWEL_ARFF)));
	inst.setClassIndex(inst.numAttributes() - 1);
	for (Classifier c : this.portfolio) {

		/* eval for CV */
		inst.stratify(10);
		Instances train = inst.trainCV(10, 0);
		Instances test = inst.testCV(10, 0);
		Assert.assertEquals(train.size() + test.size(), inst.size());
		Evaluation eval = new Evaluation(train);
		eval.crossValidateModel(c, inst, 10, new Random(0));

		c.buildClassifier(train);
		eval.evaluateModel(c, test);
		System.out.println(eval.pctCorrect());
	}
}
 
Example 6
Source File: DatasetLoader.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Load the mnist minimal meta arff file
 *
 * @return Mnist minimal meta data as Instances
 * @throws Exception IO error.
 */
public static Instances loadCSV(String path) throws Exception {
  CSVLoader csv = new CSVLoader();
  csv.setSource(new File(path));
  Instances data = csv.getDataSet();
  data.setClassIndex(data.numAttributes() - 1);
  return data;
}
 
Example 7
Source File: WekaDeeplearning4jExamples.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
private static void dl4jResnet50() throws Exception {
        String folderPath = "src/test/resources/nominal/plant-seedlings-small";
        ImageDirectoryLoader loader = new ImageDirectoryLoader();
        loader.setInputDirectory(new File(folderPath));
        Instances inst = loader.getDataSet();
        inst.setClassIndex(1);

        Dl4jMlpClassifier classifier = new Dl4jMlpClassifier();
        classifier.setNumEpochs(3);

        KerasEfficientNet kerasEfficientNet = new KerasEfficientNet();
        kerasEfficientNet.setVariation(EfficientNet.VARIATION.EFFICIENTNET_B1);
        classifier.setZooModel(kerasEfficientNet);

        ImageInstanceIterator iterator = new ImageInstanceIterator();
        iterator.setImagesLocation(new File(folderPath));

        classifier.setInstanceIterator(iterator);

        // Stratify and split the data
        Random rand = new Random(0);
        inst.randomize(rand);
        inst.stratify(5);
        Instances train = inst.trainCV(5, 0);
        Instances test = inst.testCV(5, 0);

// Build the classifier on the training data
        classifier.buildClassifier(train);

// Evaluate the model on test data
        Evaluation eval = new Evaluation(test);
        eval.evaluateModel(classifier, test);

// Output some summary statistics
        System.out.println(eval.toSummaryString());
        System.out.println(eval.toMatrixString());
    }
 
Example 8
Source File: SentimentAnalyser.java    From sentiment-analysis with Apache License 2.0 5 votes vote down vote up
/**Decides upon a "disagreed" document by applying the learned model based on the previously build model.*/
private String clarifyOnModel(String tweet){
	String out = "";
	
	// get the text-based representation of the document
       double[] instanceValues = new double[2];
       instanceValues[0] = test.attribute(0).addStringValue(tweet);
       test.add(new SparseInstance(1.0, instanceValues));
       try{
       	stwv.setInputFormat(test);
       	Instances newData = Filter.useFilter(test, stwv);
   		
       	// re-order attributes so that they are compatible with the training set's ones
       	Instances test_instance = reformatText(newData);
       	
       	// find the polarity of the document based on the previously built model
       	test_instance.setClassIndex(0);
       	double[] preds = multiNB.distributionForInstance(test_instance.get(0));
       	if (preds[0]>0.5)
       		out = "light positive";
       	else
       		out = "light negative";
       } catch (Exception e){
       	e.printStackTrace();
       }
       test.remove(0);
	return out;
}
 
Example 9
Source File: PartitionMembership.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Signify that this batch of input to the filter is finished.
 *
 * @return true if there are instances pending output
 * @throws IllegalStateException if no input structure has been defined 
 */  
public boolean batchFinished() throws Exception {
  
  if (getInputFormat() == null) {
    throw new IllegalStateException("No input instance format defined");
  }
  
  if (outputFormatPeek() == null) {
    Instances toFilter = getInputFormat();
    
    // Build the partition generator
    m_partitionGenerator.generatePartition(toFilter);

    // Create output dataset
    FastVector attInfo = new FastVector();
    for (int i = 0; i < m_partitionGenerator.numElements(); i++) {
      attInfo.addElement(new Attribute("partition_" + i));
    }
    if (toFilter.classIndex() >= 0) {
      attInfo.addElement(toFilter.classAttribute().copy());
    }
    attInfo.trimToSize();
    Instances filtered = new Instances(toFilter.relationName() + "_partitionMembership",
                                       attInfo, 0);
    if (toFilter.classIndex() >= 0) {
      filtered.setClassIndex(filtered.numAttributes() - 1);
    }
    setOutputFormat(filtered);
    
    // build new dataset
    for (int i = 0; i < toFilter.numInstances(); i++) {
      convertInstance(toFilter.instance(i));
    }
  }
  flushInput();
  
  m_NewBatch = true;
  return (numPendingOutput() != 0);
}
 
Example 10
Source File: Main-SVG.java    From Java-for-Data-Science with MIT License 5 votes vote down vote up
public Main() {
    try {
        BufferedReader datafile;
        datafile = readDataFile("camping.txt");
        Instances data = new Instances(datafile);
        data.setClassIndex(data.numAttributes() - 1);

        Instances trainingData = new Instances(data, 0, 14);
        Instances testingData = new Instances(data, 14, 5);
        Evaluation evaluation = new Evaluation(trainingData);

        SMO smo = new SMO();
        smo.buildClassifier(data);

        evaluation.evaluateModel(smo, testingData);
        System.out.println(evaluation.toSummaryString());

        // Test instance 
        Instance instance = new DenseInstance(3);
        instance.setValue(data.attribute("age"), 78);
        instance.setValue(data.attribute("income"), 125700);
        instance.setValue(data.attribute("camps"), 1);            
        instance.setDataset(data);
        System.out.println("The instance: " + instance);
        System.out.println(smo.classifyInstance(instance));
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}
 
Example 11
Source File: COEDEvaluationTest.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void coedEvalTest() throws Exception {
	logger.info("Starting COED evaluation test...");

	/* load dataset and create a train-test-split */
	OpenmlConnector connector = new OpenmlConnector();
	DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
	File file = ds.getDataset(DataSetUtils.API_KEY);
	Instances data = new Instances(new BufferedReader(new FileReader(file)));
	data.setClassIndex(data.numAttributes() - 1);
	List<Instances> split = WekaUtil.getStratifiedSplit(data, 42, .01f);

	fail("This is a reminder to say that this test yet does not check anything. Result of the computation is: " + EvaluationUtils.calculateCOEDForBatch(split.get(0)));
}
 
Example 12
Source File: LearnPatternSimilarityClassifier.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Predicts the class by generated segment and segment difference features based
 * on <code>segments</code> and <code>segmentsDifference</code>. The induced
 * instances are propagated to the forest of {@link RandomRegressionTree}s
 * <code>trees</code>. The predicted leaf nodes are used within a 1NN search on
 * the training leaf nodes to find the nearest instance and taking its class as
 * prediction value.
 *
 * @param univInstance
 *            Univariate instance to be predicted
 *
 */
@Override
public Integer predict(final double[] univInstance) throws PredictionException {
	if (!this.isTrained()) {
		throw new PredictionException("Model has not been built before!");
	}

	if (univInstance == null) {
		throw new IllegalArgumentException("Instance to be predicted must not be null or empty!");
	}

	int[][] leafNodeCounts = new int[this.trees.length][];

	for (int i = 0; i < this.trees.length; i++) {

		// Generate subseries features
		Instances seqInstances = new Instances("SeqFeatures", new ArrayList<>(this.attributes), this.lengthPerTree[i]);

		for (int len = 0; len < this.lengthPerTree[i]; len++) {
			Instance instance = LearnPatternSimilarityLearningAlgorithm.generateSubseriesFeatureInstance(univInstance, this.segments[i], this.segmentsDifference[i], len);
			seqInstances.add(instance);
		}

		seqInstances.setClassIndex(this.classAttIndexPerTree[i]);
		leafNodeCounts[i] = new int[this.trees[i].getNosLeafNodes()];

		for (int inst = 0; inst < seqInstances.numInstances(); inst++) {
			LearnPatternSimilarityLearningAlgorithm.collectLeafCounts(leafNodeCounts[i], seqInstances.get(inst), this.trees[i]);
		}
	}
	return this.trainTargets[this.findNearestInstanceIndex(leafNodeCounts)];
}
 
Example 13
Source File: MLPlan4BigFileInputTester.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void test() throws Exception {
	// MLPlan4BigFileInput mlplan = new MLPlan4BigFileInput(new File("testrsc/openml/41103.arff"));

	String origDataSrcName = "testrsc/openml/1240.arff";

	if (true) {
		Instances data = new Instances(new FileReader(new File(origDataSrcName)));
		data.setClassIndex(data.numAttributes() - 1);
		List<Instances> split = WekaUtil.getStratifiedSplit(data, 0, .7f);
		ArffSaver saver = new ArffSaver();
		saver.setInstances(split.get(0));
		saver.setFile(new File(origDataSrcName + ".train"));
		saver.writeBatch();
		saver.setInstances(split.get(1));
		saver.setFile(new File(origDataSrcName + ".test"));
		saver.writeBatch();
		System.exit(0);
	}

	MLPlan4BigFileInput mlplan = new MLPlan4BigFileInput(new File(origDataSrcName + ".train"));
	mlplan.setTimeout(new Timeout(5, TimeUnit.MINUTES));
	mlplan.setLoggerName("testedalgorithm");
	long start = System.currentTimeMillis();
	Classifier c = mlplan.call();
	System.out.println("Observed output: " + c + " after " + (System.currentTimeMillis() - start) + "ms. Now validating the model");

	/* check quality */
	Instances testData = new Instances(new FileReader(new File(origDataSrcName + ".test")));
	testData.setClassIndex(testData.numAttributes() - 1);
	Evaluation eval = new Evaluation(testData);
	eval.evaluateModel(c, testData);
	System.out.println(eval.toSummaryString());

	assertNotNull(c);
}
 
Example 14
Source File: DataSetUtils.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static DataSet getDataSetByID(final int datasetID) throws Exception {
	OpenmlConnector connector = new OpenmlConnector();
	DataSetDescription ds = connector.dataGet(datasetID);
	File file = connector.datasetGet(ds);
	Instances data = new Instances(new BufferedReader(new FileReader(file)));
	data.setClassIndex(data.numAttributes() - 1);

	List<INDArray> indArrayList = new LinkedList<>();
	for (Instance i : data) {
		indArrayList.add(instanceToMatrixByDataSet(i, datasetID));
	}

	return new DataSet(data, indArrayList);
}
 
Example 15
Source File: CnnTextEmbeddingInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Test batch correct creation.
 *
 * @throws Exception IO error.
 */
@Test
public void testBatches() throws Exception {

  // Data
  Instances data = makeData();
  data.setClassIndex(data.numAttributes() - 1);

  final int seed = 1;
  for (int batchSize : new int[]{1, 2, 5, 10}) {
    final int actual = countIterations(data, cteii, seed, batchSize);
    final int expected = (int) Math.ceil(data.numInstances() / ((double) batchSize));
    Assert.assertEquals(expected, actual);
  }
}
 
Example 16
Source File: MLPlanGraphGeneratorTest.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Pair<IGraphGenerator<TFDNode, String>, Integer>> getGraphGenerators() throws IOException {

	/* extract graph generator from mlplan */
	Instances data = new Instances(new FileReader("testrsc/car.arff"));
	data.setClassIndex(data.numAttributes() - 1);
	MLPlan<IWekaClassifier> mlplan = new MLPlanWekaBuilder().withDataset(new WekaInstances(data)).build();
	IGraphGenerator<TFDNode, String> graphGenerator = mlplan.getSearchProblemInputGenerator().getGraphGenerator();

	/* generate the actual input for the test */
	List<Pair<IGraphGenerator<TFDNode, String>, Integer>> gg = new ArrayList<>();
	gg.add(new Pair<>(graphGenerator, 10000));
	return gg;
}
 
Example 17
Source File: MLCBMaD.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
@Override
   public Instance transformInstance(Instance x) throws Exception{
Instances tmpInst = new Instances(x.dataset());

tmpInst.delete();
tmpInst.add(x);

Instances features = this.extractPart(tmpInst, false);

Instances pseudoLabels = new Instances(this.compressedMatrix);
Instance tmpin = pseudoLabels.instance(0);
pseudoLabels.delete();

pseudoLabels.add(tmpin);

for ( int i = 0; i< pseudoLabels.classIndex(); i++) {
    pseudoLabels.instance(0).setMissing(i);
}

Instances newDataSet = Instances.mergeInstances(pseudoLabels, features);
newDataSet.setClassIndex(this.size);


return newDataSet.instance(0);
   }
 
Example 18
Source File: RacedIncrementalLogitBoost.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/** 
    * performs a boosting iteration, returning a new model for the committee
    * 
    * @param data the data to boost on
    * @return the new model
    * @throws Exception if anything goes wrong
    */
   protected Classifier[] boost(Instances data) throws Exception {
     
     Classifier[] newModel = AbstractClassifier.makeCopies(m_Classifier, m_NumClasses);
     
     // Create a copy of the data with the class transformed into numeric
     Instances boostData = new Instances(data);
     boostData.deleteWithMissingClass();
     int numInstances = boostData.numInstances();
     
     // Temporarily unset the class index
     int classIndex = data.classIndex();
     boostData.setClassIndex(-1);
     boostData.deleteAttributeAt(classIndex);
     boostData.insertAttributeAt(new Attribute("'pseudo class'"), classIndex);
     boostData.setClassIndex(classIndex);
     double [][] trainFs = new double [numInstances][m_NumClasses];
     double [][] trainYs = new double [numInstances][m_NumClasses];
     for (int j = 0; j < m_NumClasses; j++) {
for (int i = 0, k = 0; i < numInstances; i++, k++) {
  while (data.instance(k).classIsMissing()) k++;
  trainYs[i][j] = (data.instance(k).classValue() == j) ? 1 : 0;
}
     }
     
     // Evaluate / increment trainFs from the classifiers
     for (int x = 0; x < m_models.size(); x++) {
for (int i = 0; i < numInstances; i++) {
  double [] pred = new double [m_NumClasses];
  double predSum = 0;
  Classifier[] model = (Classifier[]) m_models.elementAt(x);
  for (int j = 0; j < m_NumClasses; j++) {
    pred[j] = model[j].classifyInstance(boostData.instance(i));
    predSum += pred[j];
  }
  predSum /= m_NumClasses;
  for (int j = 0; j < m_NumClasses; j++) {
    trainFs[i][j] += (pred[j] - predSum) * (m_NumClasses-1) 
      / m_NumClasses;
  }
}
     }

     for (int j = 0; j < m_NumClasses; j++) {

// Set instance pseudoclass and weights
for (int i = 0; i < numInstances; i++) {
  double p = RtoP(trainFs[i], j);
  Instance current = boostData.instance(i);
  double z, actual = trainYs[i][j];
  if (actual == 1) {
    z = 1.0 / p;
    if (z > Z_MAX) { // threshold
      z = Z_MAX;
    }
  } else if (actual == 0) {
    z = -1.0 / (1.0 - p);
    if (z < -Z_MAX) { // threshold
      z = -Z_MAX;
    }
  } else {
    z = (actual - p) / (p * (1 - p));
  }

  double w = (actual - p) / z;
  current.setValue(classIndex, z);
  current.setWeight(numInstances * w);
}

Instances trainData = boostData;
if (m_UseResampling) {
  double[] weights = new double[boostData.numInstances()];
  for (int kk = 0; kk < weights.length; kk++) {
    weights[kk] = boostData.instance(kk).weight();
  }
  trainData = boostData.resampleWithWeights(m_RandomInstance, 
					    weights);
}

// Build the classifier
newModel[j].buildClassifier(trainData);
     }      
     
     return newModel;
   }
 
Example 19
Source File: CNode.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Main - run some tests.
 */
public static void main(String args[]) throws Exception {
	Instances D = new Instances(new FileReader(args[0]));
	Instance x = D.lastInstance();
	D.remove(D.numInstances()-1);
	int L = Integer.parseInt(args[1]);
	D.setClassIndex(L);
	double y[] = new double[L];
	Random r = new Random();
	int s[] = new int[]{1,0,2};
	int PA_J[][] = new int[][]{
		{},{},{0,1},
	};

	//MLUtils.randomize(s,r);
	// MUST GO IN TREE ORDER !!
	for(int j : s) {
		int pa_j[] = PA_J[j];
		System.out.println("PARENTS = "+Arrays.toString(pa_j));
		//MLUtils.randomize(pa_j,r);
		System.out.println("**** TRAINING ***");
		CNode n = new CNode(j,null,pa_j);
		n.build(D,new SMO());
		/*
		 */
		//Instances D_ = n.transform(D);
		//n.T = D_;
		System.out.println("============== D_"+j+" / class = "+n.T.classIndex()+" =");
		System.out.println(""+n.T);
		System.out.println("**** TESTING ****");
		/*
		Instance x_ = MLUtils.setTemplate(x,(Instance)D_.firstInstance().copy(),D_);
		for(int pa : pa_j) {
			//System.out.println(""+map[pa]);
			x_.setValue(n.map[pa],y[pa]);
		}
		//x_.setDataset(T);
		x_.setClassMissing();
		 */
		//n.T = D_;
		Instance x_ = n.transform(x,y);
		System.out.println(""+x_);
		y[j] = 1;
	}
}
 
Example 20
Source File: LabelWordVectors.java    From AffectiveTweets with GNU General Public License v3.0 2 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat)
		throws Exception {

	ArrayList<Attribute> atts = new ArrayList<Attribute>();

	// Adds all attributes of the inputformat
	for (int i = 0; i < inputFormat.numAttributes(); i++) {
		atts.add(inputFormat.attribute(i));
	}


	// The dictionaries of the lexicons are intialized only in the first batch
	if(!this.isFirstBatchDone())
		this.initializeDicts();



	for(ArffLexiconWordLabeller lexEval:this.lexiconLabs){
		for(Attribute att:lexEval.getAttributes()){
			if(att.isNumeric())
				atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name()));
			else if(att.isNominal()){
				List<String> attValues=new ArrayList<String>();

				for(int i=0;i<att.numValues();i++){
					attValues.add(att.value(i));
				}					

				atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name(),attValues));

			}


		}

	}




	Instances result = new Instances(inputFormat.relationName(), atts, 0);

	// set the class index
	result.setClassIndex(inputFormat.classIndex());



	return result;
}