weka.core.converters.ConverterUtils.DataSource#getDataSet

Source File: TestWekaBayes.java From Java-Data-Analysis with MIT License

8 votes

public static void main(String[] args) throws Exception {
//        ConverterUtils.DataSource source = new ConverterUtils.DataSource("data/AnonFruit.arff");
        DataSource source = new DataSource("data/AnonFruit.arff");
        Instances train = source.getDataSet();
        train.setClassIndex(3);  // target attribute: (Sweet)
        //build model
        NaiveBayes model=new NaiveBayes();
        model.buildClassifier(train);

        //use
        Instances test = train;
        Evaluation eval = new Evaluation(test);
        eval.evaluateModel(model,test);
        List <Prediction> predictions = eval.predictions();
        int k = 0;
        for (Instance instance : test) {
            double actual = instance.classValue();
            double prediction = eval.evaluateModelOnce(model, instance);
            System.out.printf("%2d.%4.0f%4.0f", ++k, actual, prediction);
            System.out.println(prediction != actual? " *": "");
        }
    }

Source File: SaveModel.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License

7 votes

/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try {
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/SaveModel/segment-challenge.arff");
        Instances dt = src.getDataSet();
        dt.setClassIndex(dt.numAttributes() - 1);

        String[] options = new String[4];
        options[0] = "-C";
        options[1] = "0.1";
        options[2] = "-M";
        options[3] = "2";
        J48 mytree = new J48();
        mytree.setOptions(options);
        mytree.buildClassifier(dt);
        
        weka.core.SerializationHelper.write("/Users/admin/Documents/NetBeansProjects/SaveModel/myDT.model", mytree);
    }
    catch (Exception e) {
        System.out.println("Error!!!!\n" + e.getMessage());
    }
}

Source File: FilterAttribute.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License

7 votes

/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff");
        Instances dt = src.getDataSet();
        
        String[] op = new String[]{"-R","2-4"};
        Remove rmv = new Remove();
        rmv.setOptions(op);
        rmv.setInputFormat(dt);
        Instances nd = Filter.useFilter(dt, rmv);
        
        ArffSaver s = new ArffSaver();
        s.setInstances(nd);
        s.setFile(new File("fw.arff"));
        s.writeBatch();
    }
    catch(Exception e){
        System.out.println(e.getMessage());
    }
}

Source File: Trainer.java From sentiment-analysis with Apache License 2.0

6 votes

/**Returns the Combined (text+POS) Representations.*/
private Instances getComplex(String fileComplex) throws Exception{
	DataSource ds = new DataSource(fileComplex);
	Instances data =  ds.getDataSet();
	data.setClassIndex(1);
	StringToWordVector filter = new StringToWordVector();
	filter.setInputFormat(data);
	filter.setLowerCaseTokens(true);
	filter.setMinTermFreq(1);
	filter.setUseStoplist(false);
	filter.setTFTransform(false);
	filter.setIDFTransform(false);		
	filter.setWordsToKeep(1000000000);
	NGramTokenizer tokenizer = new NGramTokenizer();
	tokenizer.setNGramMinSize(2);
	tokenizer.setNGramMaxSize(2);
	filter.setTokenizer(tokenizer);	
	Instances newData = weka.filters.Filter.useFilter(data, filter);
	return newData;
}

Source File: ExtrapolatedSaturationPointEvaluationTester.java From AILibs with GNU Affero General Public License v3.0

6 votes

@Test
public void testClassifierEvaluationAtSaturationPoint() throws Exception {
	// Load dataset from OpenML and create stratified split
	Instances dataset = null;
	OpenmlConnector client = new OpenmlConnector();
	DataSetDescription description = client.dataGet(42);
	File file = client.datasetGet(description);
	DataSource source = new DataSource(file.getCanonicalPath());
	dataset = source.getDataSet();
	dataset.setClassIndex(dataset.numAttributes() - 1);
	Attribute targetAttribute = dataset.attribute(description.getDefault_target_attribute());
	dataset.setClassIndex(targetAttribute.index());
	this.createSplit(new WekaInstances(dataset), 0.8, 123l);

	// Test classifier evaluation at saturation point
	ExtrapolatedSaturationPointEvaluator evaluator = new ExtrapolatedSaturationPointEvaluator(new int[] { 8, 16, 64, 128 }, new SystematicSamplingFactory<>(), this.train, 0.7,
			new InversePowerLawExtrapolationMethod(), 123l, this.test, EClassificationPerformanceMeasure.ERRORRATE);
	evaluator.setEpsilon(0.0005d);
	double evaluationResult = evaluator.evaluate(new WekaClassifier(new SMO()));
	Assert.assertTrue(evaluationResult > 0 && evaluationResult <= 100);
}

Source File: LoadModel.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License

6 votes

/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        J48 mytree = (J48) weka.core.SerializationHelper.read("/Users/admin/Documents/NetBeansProjects/LoadModel/myDT.model");
        
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/LoadModel/segment-test.arff");
        Instances tdt = src1.getDataSet();
        tdt.setClassIndex(tdt.numAttributes() - 1);
        
        System.out.println("ActualClass \t ActualValue \t PredictedValue \t PredictedClass");
        for (int i = 0; i < tdt.numInstances(); i++) {
            String act = tdt.instance(i).stringValue(tdt.instance(i).numAttributes() - 1);
            double actual = tdt.instance(i).classValue();
            Instance inst = tdt.instance(i);
            double predict = mytree.classifyInstance(inst);
            String pred = inst.toString(inst.numAttributes() - 1);
            System.out.println(act + " \t\t " + actual + " \t\t " + predict + " \t\t " + pred);
        }
    }
    catch(Exception e){
        System.out.println("Error!!!!\n" + e.getMessage());
    }
}

Source File: AttribSelect.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License

6 votes

/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try {
          DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff");
          Instances dt = src.getDataSet();
          
          AttributeSelection asel = new AttributeSelection();
          
          CfsSubsetEval evl = new CfsSubsetEval();
          GreedyStepwise sh = new GreedyStepwise();
          
          asel.setEvaluator(evl);
          asel.setSearch(sh);
          asel.setInputFormat(dt);
          
          Instances nd = Filter.useFilter(dt, asel);
          ArffSaver as = new ArffSaver();
          as.setInstances(nd);
          as.setFile(new File("weather-sel.arff"));
          as.writeBatch();
    }
    catch(Exception e){
        System.out.println(e.getMessage());
    }
}

Source File: Trainer.java From sentiment-analysis with Apache License 2.0

6 votes

/**Returns the text-based Representations.*/
private Instances getText(String fileText) throws Exception{
	DataSource ds = new DataSource(fileText);
	Instances data =  ds.getDataSet();
	data.setClassIndex(1);
	StringToWordVector filter = new StringToWordVector();
	filter.setInputFormat(data);
	filter.setLowerCaseTokens(true);
	filter.setMinTermFreq(1);
	filter.setUseStoplist(false);
	filter.setTFTransform(false);
	filter.setIDFTransform(false);		
	filter.setWordsToKeep(1000000000);
	NGramTokenizer tokenizer = new NGramTokenizer();
	tokenizer.setNGramMinSize(2);
	tokenizer.setNGramMaxSize(2);
	filter.setTokenizer(tokenizer);	
	Instances newData = weka.filters.Filter.useFilter(data, filter);
	return newData;
}

Source File: ClusterEval.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License

6 votes

/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.arff");
        Instances dt = src.getDataSet();
        SimpleKMeans model = new SimpleKMeans();
        model.setNumClusters(3);
        model.buildClusterer(dt);
        System.out.println(model);
        
        ClusterEvaluation eval = new ClusterEvaluation();
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.test.arff");
        Instances tdt = src1.getDataSet();
        eval.setClusterer(model);
        eval.evaluateClusterer(tdt);
        
        System.out.println(eval.clusterResultsToString());
        System.out.println("# of clusters: " + eval.getNumClusters());
    }
    catch(Exception e)
    {
        System.out.println(e.getMessage());
    }
}

Source File: InversePowerLawExtrapolationTester.java From AILibs with GNU Affero General Public License v3.0

6 votes

private LearningCurveExtrapolator createExtrapolationMethod(final int[] xValues) throws Exception {
	Instances dataset = null;
	OpenmlConnector client = new OpenmlConnector();
	try {
		DataSetDescription description = client.dataGet(42);
		File file = client.datasetGet(description);
		DataSource source = new DataSource(file.getCanonicalPath());
		dataset = source.getDataSet();
		dataset.setClassIndex(dataset.numAttributes() - 1);
		Attribute targetAttribute = dataset.attribute(description.getDefault_target_attribute());
		dataset.setClassIndex(targetAttribute.index());
	} catch (Exception e) {
		throw new IOException("Could not load data set from OpenML!", e);
	}

	return new LearningCurveExtrapolator(new InversePowerLawExtrapolationMethod(), new WekaClassifier(new J48()), new WekaInstances(dataset), 0.7d, xValues, new SimpleRandomSamplingFactory<>(), 1l);
}

Source File: FileUtils.java From AILibs with GNU Affero General Public License v3.0

5 votes

public static List<Instances> readInstances(final String dirPath, final String dataSetPrefix, final String excludePostfix) {
	List<Instances> results = new ArrayList<>();

	File dir = new File(dirPath);
	File[] dataSetFiles = dir.listFiles((dir1, name) -> {
		if (excludePostfix != null) {
			return name.startsWith(dataSetPrefix) && !name.endsWith(excludePostfix);
		} else {
			return name.startsWith(dataSetPrefix);
		}
	});
	for (File file : dataSetFiles) {
		try {
			DataSource source = new DataSource(new FileInputStream(file));
			Instances insts = source.getDataSet();
			insts.setClassIndex(insts.numAttributes() - 1);

			results.add(insts);

		} catch (Exception e) {
			logger.error("Could not import data set. Reason: {}", e.getMessage());
			return new ArrayList<>();
		}
	}

	return results;
}

Source File: WekaNeurophSample.java From NeurophFramework with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        // create weka dataset from file
        DataSource dataSource = new DataSource("datasets/iris.arff");
        Instances wekaDataset = dataSource.getDataSet();
        wekaDataset.setClassIndex(4);

        // normalize dataset
        Normalize filter = new Normalize();
        filter.setInputFormat(wekaDataset);
        wekaDataset = Filter.useFilter(wekaDataset, filter);    
        
        // convert weka dataset to neuroph dataset
        DataSet neurophDataset = WekaDataSetConverter.convertWekaToNeurophDataset(wekaDataset, 4, 3);

        // convert back neuroph dataset to weka dataset
        Instances testWekaDataset = WekaDataSetConverter.convertNeurophToWekaDataset(neurophDataset);

        // print out all to compare
        System.out.println("Weka data set from file");
        printDataSet(wekaDataset);
        
        System.out.println("Neuroph data set converted from Weka data set");
        printDataSet(neurophDataset);
        
        System.out.println("Weka data set reconverted from Neuroph data set");
        printDataSet(testWekaDataset);

        System.out.println("Testing WekaNeurophClassifier");
        testNeurophWekaClassifier(wekaDataset);
    }

Source File: TestWekaJ48.java From Java-Data-Analysis with MIT License

5 votes

public static void main(String[] args) throws Exception {
    DataSource source = new DataSource("data/AnonFruit.arff");
    Instances instances = source.getDataSet();
    instances.setClassIndex(3);  // target attribute: (Sweet)
    
    J48 j48 = new J48();  // an extension of ID3
    j48.setOptions(new String[]{"-U"});  // use unpruned tree
    j48.buildClassifier(instances);

    for (Instance instance : instances) {
        double prediction = j48.classifyInstance(instance);
        System.out.printf("%4.0f%4.0f%n", 
                instance.classValue(), prediction);
    }
}

Source File: TestDataSource.java From Java-Data-Analysis with MIT License

5 votes

public static void main(String[] args) throws Exception {
    DataSource source = new DataSource("data/fruit.arff");
    
    Instances instances = source.getDataSet();
    instances.setClassIndex(instances.numAttributes() - 1);
    System.out.println(instances.attribute(2));
    
    Instance instance = instances.get(3);
    System.out.println(instance);
    System.out.println(instance.stringValue(0));
    System.out.println(instance.stringValue(2));
}

Source File: APICallClustererUPMiner.java From api-mining with GNU General Public License v3.0

5 votes

/**
 * Cluster API call sequences as described in UPMiner
 *
 * @return Multimap of cluster IDs to API call sequences
 */
public static Multimap<Integer, String> clusterAPICallSeqs(final String arffFile, final double threshold)
		throws Exception {

	// Clusterer settings
	final HierarchicalClusterer clusterer = new HierarchicalClusterer();
	clusterer.setOptions(new String[] { "-L", "COMPLETE" }); // link type
	clusterer.setDebug(true);
	clusterer.setNumClusters(1);
	clusterer.setDistanceFunction(SeqSimilarity);
	clusterer.setDistanceIsBranchLength(false);

	// Read in API call seqs
	final DataSource source = new DataSource(arffFile);
	final Instances data = source.getDataSet();

	// Cluster API call seqs
	clusterer.buildClusterer(data);

	// Assign seqs to clusters based on dendrogram
	final String newick = clusterer.graph().replace("Newick:", "") + ":0";
	if (newick.equals("(no,clusters):0")) // Handle no clusters
		return HashMultimap.create();
	final Multimap<Integer, String> clusters = NewickTreeParser.getClusters(newick, threshold);
	System.out.println("No. clusters: " + clusters.keySet().size());
	final Multimap<Integer, String> assignments = HashMultimap.create();
	for (int i = 0; i < data.numInstances(); i++) {
		for (final int id : clusters.keySet()) {
			if (clusters.get(id).contains(data.instance(i).stringValue(0)))
				assignments.put(id, data.instance(i).stringValue(1));
		}
	}

	// showDendrogram(clusterer);

	return assignments;
}

Source File: Evaluation.java From meka with GNU General Public License v3.0

5 votes

/**
 * loadDataset - load a dataset, given command line options specifying an arff file.
 * @param	options	command line options, specifying dataset filename
 * @param	T		set to 'T' if we want to load a test file (default 't': load train or train-test file)
 * @return	the dataset
 */
public static Instances loadDataset(String options[], char T) throws Exception {

	Instances D = null;
	String filename = Utils.getOption(T, options);

	// Check for filename
	if (filename == null || filename.isEmpty())
		throw new Exception("[Error] You did not specify a dataset!");

	// Check for existence of file
	File file = new File(filename);
	if (!file.exists())
		throw new Exception("[Error] File does not exist: " + filename);
	if (file.isDirectory())
		throw new Exception("[Error] "+filename+ " points to a directory!");

	try {
		DataSource source = new DataSource(filename);
		D = source.getDataSet();
	} catch(Exception e) {
		e.printStackTrace();
		throw new Exception("[Error] Failed to load Instances from file '"+filename+"'.");
	}

	return D;
}

Source File: DevelopClassifier.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License

5 votes

/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/DevelopClassifier/vote.arff");
        Instances dt = src.getDataSet();
        dt.setClassIndex(dt.numAttributes()-1);
        
        String[] options = new String[4];
        options[0] = "-C";
        options[1] = "0.1";
        options[2] = "-M";
        options[3] = "2";
        J48 tree = new J48();
        tree.setOptions(options);
        tree.buildClassifier(dt);
        System.out.println(tree.getCapabilities().toString());
        System.out.println(tree.graph());
        
        //uncomment the following three lines of code for Naive Bayes 
        NaiveBayes nb = new NaiveBayes();
        nb.buildClassifier(dt);
        System.out.println(nb.getCapabilities().toString());
        
        }        
    catch(Exception e){
        System.out.println("Error!!!!\n" + e.getMessage());
    }
}

Source File: AnchorpointsCreationTest.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Test
public void anchorpointsAreCreatedAndHaveTheValues() throws IOException, InvalidAnchorPointsException, AlgorithmException, InterruptedException, ClassNotFoundException, DatasetCreationException {
	int[] xValues = new int[] { 2, 4, 8, 16, 32, 64 };
	Instances dataset = null;
	OpenmlConnector client = new OpenmlConnector();
	try {
		DataSetDescription description = client.dataGet(42);
		File file = client.datasetGet(description);
		DataSource source = new DataSource(file.getCanonicalPath());
		dataset = source.getDataSet();
		dataset.setClassIndex(dataset.numAttributes() - 1);
		Attribute targetAttribute = dataset.attribute(description.getDefault_target_attribute());
		dataset.setClassIndex(targetAttribute.index());
	} catch (Exception e) {
		throw new IOException("Could not load data set from OpenML!", e);
	}

	// final LearningCurveExtrapolationMethod extrapolationMethod, final ISupervisedLearner<I, D> learner, final D dataset, final double trainsplit, final int[] anchorPoints,
	// final ISamplingAlgorithmFactory<?, D, ? extends ASamplingAlgorithm<D>> samplingAlgorithmFactory, final long seed

	WekaInstances simpleDataset = new WekaInstances(dataset);
	LearningCurveExtrapolator extrapolator = new LearningCurveExtrapolator((x, y, ds) -> {
		Assert.assertArrayEquals(x, xValues);
		for (int i = 0; i < y.length; i++) {
			Assert.assertTrue(y[i] > 0.0d);
		}
		return null;
	}, new WekaClassifier(new J48()), simpleDataset, 0.7d, xValues, new SystematicSamplingFactory<>(), 1l);
	extrapolator.extrapolateLearningCurve();
}

Source File: ModelEvaluation.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License

4 votes

/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try {
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ModelEvaluation/segment-challenge.arff");
        Instances dt = src.getDataSet();
        dt.setClassIndex(dt.numAttributes()- 1);

        String[] options = new String[4];
        options[0] = "-C";
        options[1] = "0.1";
        options[2] = "-M";
        options[3] = "2";
        J48 mytree = new J48();
        mytree.setOptions(options);
        mytree.buildClassifier(dt);
        
        Evaluation eval = new Evaluation(dt);
        Random rand = new Random(1);
        
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ModelEvaluation/segment-test.arff");
        Instances tdt = src1.getDataSet();
        tdt.setClassIndex(tdt.numAttributes() - 1);
        
        eval.evaluateModel(mytree, tdt);
        
        System.out.println(eval.toSummaryString("Evaluation results:\n", false));
            System.out.println("Correct % = " + eval.pctCorrect());
            System.out.println("Incorrect % = " + eval.pctIncorrect());
            System.out.println("kappa = " + eval.kappa());
            System.out.println("MAE = " + eval.meanAbsoluteError());
            System.out.println("RMSE = " + eval.rootMeanSquaredError());
            System.out.println("RAE = " + eval.relativeAbsoluteError());
            System.out.println("Precision = " + eval.precision(1));
            System.out.println("Recall = " + eval.recall(1));
            System.out.println("fMeasure = " + eval.fMeasure(1));
            System.out.println(eval.toMatrixString("=== Overall Confusion Matrix ==="));
    } catch (Exception e) {
        System.out.println("Error!!!!\n" + e.getMessage());
    }
}

Source File: ModifiedISACInstanceCollector.java From AILibs with GNU Affero General Public License v3.0

4 votes

private static Instances loadDefaultInstances() throws Exception {
	InputStream inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("metaData_smallDataSets_computed.arff");
	DataSource source = new DataSource(inputStream);
	return source.getDataSet();
}

Java Code Examples for weka.core.converters.ConverterUtils.DataSource#getDataSet()