Java Code Examples for weka.core.converters.ConverterUtils.DataSource#getDataSet()

The following examples show how to use weka.core.converters.ConverterUtils.DataSource#getDataSet() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestWekaBayes.java    From Java-Data-Analysis with MIT License 8 votes vote down vote up
public static void main(String[] args) throws Exception {
//        ConverterUtils.DataSource source = new ConverterUtils.DataSource("data/AnonFruit.arff");
        DataSource source = new DataSource("data/AnonFruit.arff");
        Instances train = source.getDataSet();
        train.setClassIndex(3);  // target attribute: (Sweet)
        //build model
        NaiveBayes model=new NaiveBayes();
        model.buildClassifier(train);

        //use
        Instances test = train;
        Evaluation eval = new Evaluation(test);
        eval.evaluateModel(model,test);
        List <Prediction> predictions = eval.predictions();
        int k = 0;
        for (Instance instance : test) {
            double actual = instance.classValue();
            double prediction = eval.evaluateModelOnce(model, instance);
            System.out.printf("%2d.%4.0f%4.0f", ++k, actual, prediction);
            System.out.println(prediction != actual? " *": "");
        }
    }
 
Example 2
Source File: SaveModel.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 7 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try {
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/SaveModel/segment-challenge.arff");
        Instances dt = src.getDataSet();
        dt.setClassIndex(dt.numAttributes() - 1);

        String[] options = new String[4];
        options[0] = "-C";
        options[1] = "0.1";
        options[2] = "-M";
        options[3] = "2";
        J48 mytree = new J48();
        mytree.setOptions(options);
        mytree.buildClassifier(dt);
        
        weka.core.SerializationHelper.write("/Users/admin/Documents/NetBeansProjects/SaveModel/myDT.model", mytree);
    }
    catch (Exception e) {
        System.out.println("Error!!!!\n" + e.getMessage());
    }
}
 
Example 3
Source File: FilterAttribute.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 7 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff");
        Instances dt = src.getDataSet();
        
        String[] op = new String[]{"-R","2-4"};
        Remove rmv = new Remove();
        rmv.setOptions(op);
        rmv.setInputFormat(dt);
        Instances nd = Filter.useFilter(dt, rmv);
        
        ArffSaver s = new ArffSaver();
        s.setInstances(nd);
        s.setFile(new File("fw.arff"));
        s.writeBatch();
    }
    catch(Exception e){
        System.out.println(e.getMessage());
    }
}
 
Example 4
Source File: Trainer.java    From sentiment-analysis with Apache License 2.0 6 votes vote down vote up
/**Returns the Combined (text+POS) Representations.*/
private Instances getComplex(String fileComplex) throws Exception{
	DataSource ds = new DataSource(fileComplex);
	Instances data =  ds.getDataSet();
	data.setClassIndex(1);
	StringToWordVector filter = new StringToWordVector();
	filter.setInputFormat(data);
	filter.setLowerCaseTokens(true);
	filter.setMinTermFreq(1);
	filter.setUseStoplist(false);
	filter.setTFTransform(false);
	filter.setIDFTransform(false);		
	filter.setWordsToKeep(1000000000);
	NGramTokenizer tokenizer = new NGramTokenizer();
	tokenizer.setNGramMinSize(2);
	tokenizer.setNGramMaxSize(2);
	filter.setTokenizer(tokenizer);	
	Instances newData = weka.filters.Filter.useFilter(data, filter);
	return newData;
}
 
Example 5
Source File: ExtrapolatedSaturationPointEvaluationTester.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
@Test
public void testClassifierEvaluationAtSaturationPoint() throws Exception {
	// Load dataset from OpenML and create stratified split
	Instances dataset = null;
	OpenmlConnector client = new OpenmlConnector();
	DataSetDescription description = client.dataGet(42);
	File file = client.datasetGet(description);
	DataSource source = new DataSource(file.getCanonicalPath());
	dataset = source.getDataSet();
	dataset.setClassIndex(dataset.numAttributes() - 1);
	Attribute targetAttribute = dataset.attribute(description.getDefault_target_attribute());
	dataset.setClassIndex(targetAttribute.index());
	this.createSplit(new WekaInstances(dataset), 0.8, 123l);

	// Test classifier evaluation at saturation point
	ExtrapolatedSaturationPointEvaluator evaluator = new ExtrapolatedSaturationPointEvaluator(new int[] { 8, 16, 64, 128 }, new SystematicSamplingFactory<>(), this.train, 0.7,
			new InversePowerLawExtrapolationMethod(), 123l, this.test, EClassificationPerformanceMeasure.ERRORRATE);
	evaluator.setEpsilon(0.0005d);
	double evaluationResult = evaluator.evaluate(new WekaClassifier(new SMO()));
	Assert.assertTrue(evaluationResult > 0 && evaluationResult <= 100);
}
 
Example 6
Source File: LoadModel.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 6 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        J48 mytree = (J48) weka.core.SerializationHelper.read("/Users/admin/Documents/NetBeansProjects/LoadModel/myDT.model");
        
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/LoadModel/segment-test.arff");
        Instances tdt = src1.getDataSet();
        tdt.setClassIndex(tdt.numAttributes() - 1);
        
        System.out.println("ActualClass \t ActualValue \t PredictedValue \t PredictedClass");
        for (int i = 0; i < tdt.numInstances(); i++) {
            String act = tdt.instance(i).stringValue(tdt.instance(i).numAttributes() - 1);
            double actual = tdt.instance(i).classValue();
            Instance inst = tdt.instance(i);
            double predict = mytree.classifyInstance(inst);
            String pred = inst.toString(inst.numAttributes() - 1);
            System.out.println(act + " \t\t " + actual + " \t\t " + predict + " \t\t " + pred);
        }
    }
    catch(Exception e){
        System.out.println("Error!!!!\n" + e.getMessage());
    }
}
 
Example 7
Source File: AttribSelect.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 6 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try {
          DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff");
          Instances dt = src.getDataSet();
          
          AttributeSelection asel = new AttributeSelection();
          
          CfsSubsetEval evl = new CfsSubsetEval();
          GreedyStepwise sh = new GreedyStepwise();
          
          asel.setEvaluator(evl);
          asel.setSearch(sh);
          asel.setInputFormat(dt);
          
          Instances nd = Filter.useFilter(dt, asel);
          ArffSaver as = new ArffSaver();
          as.setInstances(nd);
          as.setFile(new File("weather-sel.arff"));
          as.writeBatch();
    }
    catch(Exception e){
        System.out.println(e.getMessage());
    }
}
 
Example 8
Source File: Trainer.java    From sentiment-analysis with Apache License 2.0 6 votes vote down vote up
/**Returns the text-based Representations.*/
private Instances getText(String fileText) throws Exception{
	DataSource ds = new DataSource(fileText);
	Instances data =  ds.getDataSet();
	data.setClassIndex(1);
	StringToWordVector filter = new StringToWordVector();
	filter.setInputFormat(data);
	filter.setLowerCaseTokens(true);
	filter.setMinTermFreq(1);
	filter.setUseStoplist(false);
	filter.setTFTransform(false);
	filter.setIDFTransform(false);		
	filter.setWordsToKeep(1000000000);
	NGramTokenizer tokenizer = new NGramTokenizer();
	tokenizer.setNGramMinSize(2);
	tokenizer.setNGramMaxSize(2);
	filter.setTokenizer(tokenizer);	
	Instances newData = weka.filters.Filter.useFilter(data, filter);
	return newData;
}
 
Example 9
Source File: ClusterEval.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 6 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.arff");
        Instances dt = src.getDataSet();
        SimpleKMeans model = new SimpleKMeans();
        model.setNumClusters(3);
        model.buildClusterer(dt);
        System.out.println(model);
        
        ClusterEvaluation eval = new ClusterEvaluation();
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.test.arff");
        Instances tdt = src1.getDataSet();
        eval.setClusterer(model);
        eval.evaluateClusterer(tdt);
        
        System.out.println(eval.clusterResultsToString());
        System.out.println("# of clusters: " + eval.getNumClusters());
    }
    catch(Exception e)
    {
        System.out.println(e.getMessage());
    }
}
 
Example 10
Source File: InversePowerLawExtrapolationTester.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
private LearningCurveExtrapolator createExtrapolationMethod(final int[] xValues) throws Exception {
	Instances dataset = null;
	OpenmlConnector client = new OpenmlConnector();
	try {
		DataSetDescription description = client.dataGet(42);
		File file = client.datasetGet(description);
		DataSource source = new DataSource(file.getCanonicalPath());
		dataset = source.getDataSet();
		dataset.setClassIndex(dataset.numAttributes() - 1);
		Attribute targetAttribute = dataset.attribute(description.getDefault_target_attribute());
		dataset.setClassIndex(targetAttribute.index());
	} catch (Exception e) {
		throw new IOException("Could not load data set from OpenML!", e);
	}

	return new LearningCurveExtrapolator(new InversePowerLawExtrapolationMethod(), new WekaClassifier(new J48()), new WekaInstances(dataset), 0.7d, xValues, new SimpleRandomSamplingFactory<>(), 1l);
}
 
Example 11
Source File: FileUtils.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static List<Instances> readInstances(final String dirPath, final String dataSetPrefix, final String excludePostfix) {
	List<Instances> results = new ArrayList<>();

	File dir = new File(dirPath);
	File[] dataSetFiles = dir.listFiles((dir1, name) -> {
		if (excludePostfix != null) {
			return name.startsWith(dataSetPrefix) && !name.endsWith(excludePostfix);
		} else {
			return name.startsWith(dataSetPrefix);
		}
	});
	for (File file : dataSetFiles) {
		try {
			DataSource source = new DataSource(new FileInputStream(file));
			Instances insts = source.getDataSet();
			insts.setClassIndex(insts.numAttributes() - 1);

			results.add(insts);

		} catch (Exception e) {
			logger.error("Could not import data set. Reason: {}", e.getMessage());
			return new ArrayList<>();
		}
	}

	return results;
}
 
Example 12
Source File: WekaNeurophSample.java    From NeurophFramework with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        // create weka dataset from file
        DataSource dataSource = new DataSource("datasets/iris.arff");
        Instances wekaDataset = dataSource.getDataSet();
        wekaDataset.setClassIndex(4);

        // normalize dataset
        Normalize filter = new Normalize();
        filter.setInputFormat(wekaDataset);
        wekaDataset = Filter.useFilter(wekaDataset, filter);    
        
        // convert weka dataset to neuroph dataset
        DataSet neurophDataset = WekaDataSetConverter.convertWekaToNeurophDataset(wekaDataset, 4, 3);

        // convert back neuroph dataset to weka dataset
        Instances testWekaDataset = WekaDataSetConverter.convertNeurophToWekaDataset(neurophDataset);

        // print out all to compare
        System.out.println("Weka data set from file");
        printDataSet(wekaDataset);
        
        System.out.println("Neuroph data set converted from Weka data set");
        printDataSet(neurophDataset);
        
        System.out.println("Weka data set reconverted from Neuroph data set");
        printDataSet(testWekaDataset);

        System.out.println("Testing WekaNeurophClassifier");
        testNeurophWekaClassifier(wekaDataset);
    }
 
Example 13
Source File: TestWekaJ48.java    From Java-Data-Analysis with MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    DataSource source = new DataSource("data/AnonFruit.arff");
    Instances instances = source.getDataSet();
    instances.setClassIndex(3);  // target attribute: (Sweet)
    
    J48 j48 = new J48();  // an extension of ID3
    j48.setOptions(new String[]{"-U"});  // use unpruned tree
    j48.buildClassifier(instances);

    for (Instance instance : instances) {
        double prediction = j48.classifyInstance(instance);
        System.out.printf("%4.0f%4.0f%n", 
                instance.classValue(), prediction);
    }
}
 
Example 14
Source File: TestDataSource.java    From Java-Data-Analysis with MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    DataSource source = new DataSource("data/fruit.arff");
    
    Instances instances = source.getDataSet();
    instances.setClassIndex(instances.numAttributes() - 1);
    System.out.println(instances.attribute(2));
    
    Instance instance = instances.get(3);
    System.out.println(instance);
    System.out.println(instance.stringValue(0));
    System.out.println(instance.stringValue(2));
}
 
Example 15
Source File: APICallClustererUPMiner.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Cluster API call sequences as described in UPMiner
 *
 * @return Multimap of cluster IDs to API call sequences
 */
public static Multimap<Integer, String> clusterAPICallSeqs(final String arffFile, final double threshold)
		throws Exception {

	// Clusterer settings
	final HierarchicalClusterer clusterer = new HierarchicalClusterer();
	clusterer.setOptions(new String[] { "-L", "COMPLETE" }); // link type
	clusterer.setDebug(true);
	clusterer.setNumClusters(1);
	clusterer.setDistanceFunction(SeqSimilarity);
	clusterer.setDistanceIsBranchLength(false);

	// Read in API call seqs
	final DataSource source = new DataSource(arffFile);
	final Instances data = source.getDataSet();

	// Cluster API call seqs
	clusterer.buildClusterer(data);

	// Assign seqs to clusters based on dendrogram
	final String newick = clusterer.graph().replace("Newick:", "") + ":0";
	if (newick.equals("(no,clusters):0")) // Handle no clusters
		return HashMultimap.create();
	final Multimap<Integer, String> clusters = NewickTreeParser.getClusters(newick, threshold);
	System.out.println("No. clusters: " + clusters.keySet().size());
	final Multimap<Integer, String> assignments = HashMultimap.create();
	for (int i = 0; i < data.numInstances(); i++) {
		for (final int id : clusters.keySet()) {
			if (clusters.get(id).contains(data.instance(i).stringValue(0)))
				assignments.put(id, data.instance(i).stringValue(1));
		}
	}

	// showDendrogram(clusterer);

	return assignments;
}
 
Example 16
Source File: Evaluation.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
 * loadDataset - load a dataset, given command line options specifying an arff file.
 * @param	options	command line options, specifying dataset filename
 * @param	T		set to 'T' if we want to load a test file (default 't': load train or train-test file)
 * @return	the dataset
 */
public static Instances loadDataset(String options[], char T) throws Exception {

	Instances D = null;
	String filename = Utils.getOption(T, options);

	// Check for filename
	if (filename == null || filename.isEmpty())
		throw new Exception("[Error] You did not specify a dataset!");

	// Check for existence of file
	File file = new File(filename);
	if (!file.exists())
		throw new Exception("[Error] File does not exist: " + filename);
	if (file.isDirectory())
		throw new Exception("[Error] "+filename+ " points to a directory!");

	try {
		DataSource source = new DataSource(filename);
		D = source.getDataSet();
	} catch(Exception e) {
		e.printStackTrace();
		throw new Exception("[Error] Failed to load Instances from file '"+filename+"'.");
	}

	return D;
}
 
Example 17
Source File: DevelopClassifier.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 5 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/DevelopClassifier/vote.arff");
        Instances dt = src.getDataSet();
        dt.setClassIndex(dt.numAttributes()-1);
        
        String[] options = new String[4];
        options[0] = "-C";
        options[1] = "0.1";
        options[2] = "-M";
        options[3] = "2";
        J48 tree = new J48();
        tree.setOptions(options);
        tree.buildClassifier(dt);
        System.out.println(tree.getCapabilities().toString());
        System.out.println(tree.graph());
        
        //uncomment the following three lines of code for Naive Bayes 
        NaiveBayes nb = new NaiveBayes();
        nb.buildClassifier(dt);
        System.out.println(nb.getCapabilities().toString());
        
        }        
    catch(Exception e){
        System.out.println("Error!!!!\n" + e.getMessage());
    }
}
 
Example 18
Source File: AnchorpointsCreationTest.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void anchorpointsAreCreatedAndHaveTheValues() throws IOException, InvalidAnchorPointsException, AlgorithmException, InterruptedException, ClassNotFoundException, DatasetCreationException {
	int[] xValues = new int[] { 2, 4, 8, 16, 32, 64 };
	Instances dataset = null;
	OpenmlConnector client = new OpenmlConnector();
	try {
		DataSetDescription description = client.dataGet(42);
		File file = client.datasetGet(description);
		DataSource source = new DataSource(file.getCanonicalPath());
		dataset = source.getDataSet();
		dataset.setClassIndex(dataset.numAttributes() - 1);
		Attribute targetAttribute = dataset.attribute(description.getDefault_target_attribute());
		dataset.setClassIndex(targetAttribute.index());
	} catch (Exception e) {
		throw new IOException("Could not load data set from OpenML!", e);
	}

	// final LearningCurveExtrapolationMethod extrapolationMethod, final ISupervisedLearner<I, D> learner, final D dataset, final double trainsplit, final int[] anchorPoints,
	// final ISamplingAlgorithmFactory<?, D, ? extends ASamplingAlgorithm<D>> samplingAlgorithmFactory, final long seed

	WekaInstances simpleDataset = new WekaInstances(dataset);
	LearningCurveExtrapolator extrapolator = new LearningCurveExtrapolator((x, y, ds) -> {
		Assert.assertArrayEquals(x, xValues);
		for (int i = 0; i < y.length; i++) {
			Assert.assertTrue(y[i] > 0.0d);
		}
		return null;
	}, new WekaClassifier(new J48()), simpleDataset, 0.7d, xValues, new SystematicSamplingFactory<>(), 1l);
	extrapolator.extrapolateLearningCurve();
}
 
Example 19
Source File: ModelEvaluation.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 4 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try {
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ModelEvaluation/segment-challenge.arff");
        Instances dt = src.getDataSet();
        dt.setClassIndex(dt.numAttributes()- 1);

        String[] options = new String[4];
        options[0] = "-C";
        options[1] = "0.1";
        options[2] = "-M";
        options[3] = "2";
        J48 mytree = new J48();
        mytree.setOptions(options);
        mytree.buildClassifier(dt);
        
        Evaluation eval = new Evaluation(dt);
        Random rand = new Random(1);
        
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ModelEvaluation/segment-test.arff");
        Instances tdt = src1.getDataSet();
        tdt.setClassIndex(tdt.numAttributes() - 1);
        
        eval.evaluateModel(mytree, tdt);
        
        System.out.println(eval.toSummaryString("Evaluation results:\n", false));
            System.out.println("Correct % = " + eval.pctCorrect());
            System.out.println("Incorrect % = " + eval.pctIncorrect());
            System.out.println("kappa = " + eval.kappa());
            System.out.println("MAE = " + eval.meanAbsoluteError());
            System.out.println("RMSE = " + eval.rootMeanSquaredError());
            System.out.println("RAE = " + eval.relativeAbsoluteError());
            System.out.println("Precision = " + eval.precision(1));
            System.out.println("Recall = " + eval.recall(1));
            System.out.println("fMeasure = " + eval.fMeasure(1));
            System.out.println(eval.toMatrixString("=== Overall Confusion Matrix ==="));
    } catch (Exception e) {
        System.out.println("Error!!!!\n" + e.getMessage());
    }
}
 
Example 20
Source File: ModifiedISACInstanceCollector.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
private static Instances loadDefaultInstances() throws Exception {
	InputStream inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("metaData_smallDataSets_computed.arff");
	DataSource source = new DataSource(inputStream);
	return source.getDataSet();
}