Java Code Examples for weka.core.converters.ConverterUtils.DataSource#getDataSet()
The following examples show how to use
weka.core.converters.ConverterUtils.DataSource#getDataSet() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestWekaBayes.java From Java-Data-Analysis with MIT License | 8 votes |
public static void main(String[] args) throws Exception { // ConverterUtils.DataSource source = new ConverterUtils.DataSource("data/AnonFruit.arff"); DataSource source = new DataSource("data/AnonFruit.arff"); Instances train = source.getDataSet(); train.setClassIndex(3); // target attribute: (Sweet) //build model NaiveBayes model=new NaiveBayes(); model.buildClassifier(train); //use Instances test = train; Evaluation eval = new Evaluation(test); eval.evaluateModel(model,test); List <Prediction> predictions = eval.predictions(); int k = 0; for (Instance instance : test) { double actual = instance.classValue(); double prediction = eval.evaluateModelOnce(model, instance); System.out.printf("%2d.%4.0f%4.0f", ++k, actual, prediction); System.out.println(prediction != actual? " *": ""); } }
Example 2
Source File: SaveModel.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License | 7 votes |
/** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here try { DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/SaveModel/segment-challenge.arff"); Instances dt = src.getDataSet(); dt.setClassIndex(dt.numAttributes() - 1); String[] options = new String[4]; options[0] = "-C"; options[1] = "0.1"; options[2] = "-M"; options[3] = "2"; J48 mytree = new J48(); mytree.setOptions(options); mytree.buildClassifier(dt); weka.core.SerializationHelper.write("/Users/admin/Documents/NetBeansProjects/SaveModel/myDT.model", mytree); } catch (Exception e) { System.out.println("Error!!!!\n" + e.getMessage()); } }
Example 3
Source File: FilterAttribute.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License | 7 votes |
/** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here try{ DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff"); Instances dt = src.getDataSet(); String[] op = new String[]{"-R","2-4"}; Remove rmv = new Remove(); rmv.setOptions(op); rmv.setInputFormat(dt); Instances nd = Filter.useFilter(dt, rmv); ArffSaver s = new ArffSaver(); s.setInstances(nd); s.setFile(new File("fw.arff")); s.writeBatch(); } catch(Exception e){ System.out.println(e.getMessage()); } }
Example 4
Source File: Trainer.java From sentiment-analysis with Apache License 2.0 | 6 votes |
/**Returns the Combined (text+POS) Representations.*/ private Instances getComplex(String fileComplex) throws Exception{ DataSource ds = new DataSource(fileComplex); Instances data = ds.getDataSet(); data.setClassIndex(1); StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(data); filter.setLowerCaseTokens(true); filter.setMinTermFreq(1); filter.setUseStoplist(false); filter.setTFTransform(false); filter.setIDFTransform(false); filter.setWordsToKeep(1000000000); NGramTokenizer tokenizer = new NGramTokenizer(); tokenizer.setNGramMinSize(2); tokenizer.setNGramMaxSize(2); filter.setTokenizer(tokenizer); Instances newData = weka.filters.Filter.useFilter(data, filter); return newData; }
Example 5
Source File: ExtrapolatedSaturationPointEvaluationTester.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
@Test public void testClassifierEvaluationAtSaturationPoint() throws Exception { // Load dataset from OpenML and create stratified split Instances dataset = null; OpenmlConnector client = new OpenmlConnector(); DataSetDescription description = client.dataGet(42); File file = client.datasetGet(description); DataSource source = new DataSource(file.getCanonicalPath()); dataset = source.getDataSet(); dataset.setClassIndex(dataset.numAttributes() - 1); Attribute targetAttribute = dataset.attribute(description.getDefault_target_attribute()); dataset.setClassIndex(targetAttribute.index()); this.createSplit(new WekaInstances(dataset), 0.8, 123l); // Test classifier evaluation at saturation point ExtrapolatedSaturationPointEvaluator evaluator = new ExtrapolatedSaturationPointEvaluator(new int[] { 8, 16, 64, 128 }, new SystematicSamplingFactory<>(), this.train, 0.7, new InversePowerLawExtrapolationMethod(), 123l, this.test, EClassificationPerformanceMeasure.ERRORRATE); evaluator.setEpsilon(0.0005d); double evaluationResult = evaluator.evaluate(new WekaClassifier(new SMO())); Assert.assertTrue(evaluationResult > 0 && evaluationResult <= 100); }
Example 6
Source File: LoadModel.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License | 6 votes |
/** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here try{ J48 mytree = (J48) weka.core.SerializationHelper.read("/Users/admin/Documents/NetBeansProjects/LoadModel/myDT.model"); DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/LoadModel/segment-test.arff"); Instances tdt = src1.getDataSet(); tdt.setClassIndex(tdt.numAttributes() - 1); System.out.println("ActualClass \t ActualValue \t PredictedValue \t PredictedClass"); for (int i = 0; i < tdt.numInstances(); i++) { String act = tdt.instance(i).stringValue(tdt.instance(i).numAttributes() - 1); double actual = tdt.instance(i).classValue(); Instance inst = tdt.instance(i); double predict = mytree.classifyInstance(inst); String pred = inst.toString(inst.numAttributes() - 1); System.out.println(act + " \t\t " + actual + " \t\t " + predict + " \t\t " + pred); } } catch(Exception e){ System.out.println("Error!!!!\n" + e.getMessage()); } }
Example 7
Source File: AttribSelect.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License | 6 votes |
/** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here try { DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff"); Instances dt = src.getDataSet(); AttributeSelection asel = new AttributeSelection(); CfsSubsetEval evl = new CfsSubsetEval(); GreedyStepwise sh = new GreedyStepwise(); asel.setEvaluator(evl); asel.setSearch(sh); asel.setInputFormat(dt); Instances nd = Filter.useFilter(dt, asel); ArffSaver as = new ArffSaver(); as.setInstances(nd); as.setFile(new File("weather-sel.arff")); as.writeBatch(); } catch(Exception e){ System.out.println(e.getMessage()); } }
Example 8
Source File: Trainer.java From sentiment-analysis with Apache License 2.0 | 6 votes |
/**Returns the text-based Representations.*/ private Instances getText(String fileText) throws Exception{ DataSource ds = new DataSource(fileText); Instances data = ds.getDataSet(); data.setClassIndex(1); StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(data); filter.setLowerCaseTokens(true); filter.setMinTermFreq(1); filter.setUseStoplist(false); filter.setTFTransform(false); filter.setIDFTransform(false); filter.setWordsToKeep(1000000000); NGramTokenizer tokenizer = new NGramTokenizer(); tokenizer.setNGramMinSize(2); tokenizer.setNGramMaxSize(2); filter.setTokenizer(tokenizer); Instances newData = weka.filters.Filter.useFilter(data, filter); return newData; }
Example 9
Source File: ClusterEval.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License | 6 votes |
/** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here try{ DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.arff"); Instances dt = src.getDataSet(); SimpleKMeans model = new SimpleKMeans(); model.setNumClusters(3); model.buildClusterer(dt); System.out.println(model); ClusterEvaluation eval = new ClusterEvaluation(); DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.test.arff"); Instances tdt = src1.getDataSet(); eval.setClusterer(model); eval.evaluateClusterer(tdt); System.out.println(eval.clusterResultsToString()); System.out.println("# of clusters: " + eval.getNumClusters()); } catch(Exception e) { System.out.println(e.getMessage()); } }
Example 10
Source File: InversePowerLawExtrapolationTester.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
private LearningCurveExtrapolator createExtrapolationMethod(final int[] xValues) throws Exception { Instances dataset = null; OpenmlConnector client = new OpenmlConnector(); try { DataSetDescription description = client.dataGet(42); File file = client.datasetGet(description); DataSource source = new DataSource(file.getCanonicalPath()); dataset = source.getDataSet(); dataset.setClassIndex(dataset.numAttributes() - 1); Attribute targetAttribute = dataset.attribute(description.getDefault_target_attribute()); dataset.setClassIndex(targetAttribute.index()); } catch (Exception e) { throw new IOException("Could not load data set from OpenML!", e); } return new LearningCurveExtrapolator(new InversePowerLawExtrapolationMethod(), new WekaClassifier(new J48()), new WekaInstances(dataset), 0.7d, xValues, new SimpleRandomSamplingFactory<>(), 1l); }
Example 11
Source File: FileUtils.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public static List<Instances> readInstances(final String dirPath, final String dataSetPrefix, final String excludePostfix) { List<Instances> results = new ArrayList<>(); File dir = new File(dirPath); File[] dataSetFiles = dir.listFiles((dir1, name) -> { if (excludePostfix != null) { return name.startsWith(dataSetPrefix) && !name.endsWith(excludePostfix); } else { return name.startsWith(dataSetPrefix); } }); for (File file : dataSetFiles) { try { DataSource source = new DataSource(new FileInputStream(file)); Instances insts = source.getDataSet(); insts.setClassIndex(insts.numAttributes() - 1); results.add(insts); } catch (Exception e) { logger.error("Could not import data set. Reason: {}", e.getMessage()); return new ArrayList<>(); } } return results; }
Example 12
Source File: WekaNeurophSample.java From NeurophFramework with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // create weka dataset from file DataSource dataSource = new DataSource("datasets/iris.arff"); Instances wekaDataset = dataSource.getDataSet(); wekaDataset.setClassIndex(4); // normalize dataset Normalize filter = new Normalize(); filter.setInputFormat(wekaDataset); wekaDataset = Filter.useFilter(wekaDataset, filter); // convert weka dataset to neuroph dataset DataSet neurophDataset = WekaDataSetConverter.convertWekaToNeurophDataset(wekaDataset, 4, 3); // convert back neuroph dataset to weka dataset Instances testWekaDataset = WekaDataSetConverter.convertNeurophToWekaDataset(neurophDataset); // print out all to compare System.out.println("Weka data set from file"); printDataSet(wekaDataset); System.out.println("Neuroph data set converted from Weka data set"); printDataSet(neurophDataset); System.out.println("Weka data set reconverted from Neuroph data set"); printDataSet(testWekaDataset); System.out.println("Testing WekaNeurophClassifier"); testNeurophWekaClassifier(wekaDataset); }
Example 13
Source File: TestWekaJ48.java From Java-Data-Analysis with MIT License | 5 votes |
public static void main(String[] args) throws Exception { DataSource source = new DataSource("data/AnonFruit.arff"); Instances instances = source.getDataSet(); instances.setClassIndex(3); // target attribute: (Sweet) J48 j48 = new J48(); // an extension of ID3 j48.setOptions(new String[]{"-U"}); // use unpruned tree j48.buildClassifier(instances); for (Instance instance : instances) { double prediction = j48.classifyInstance(instance); System.out.printf("%4.0f%4.0f%n", instance.classValue(), prediction); } }
Example 14
Source File: TestDataSource.java From Java-Data-Analysis with MIT License | 5 votes |
public static void main(String[] args) throws Exception { DataSource source = new DataSource("data/fruit.arff"); Instances instances = source.getDataSet(); instances.setClassIndex(instances.numAttributes() - 1); System.out.println(instances.attribute(2)); Instance instance = instances.get(3); System.out.println(instance); System.out.println(instance.stringValue(0)); System.out.println(instance.stringValue(2)); }
Example 15
Source File: APICallClustererUPMiner.java From api-mining with GNU General Public License v3.0 | 5 votes |
/** * Cluster API call sequences as described in UPMiner * * @return Multimap of cluster IDs to API call sequences */ public static Multimap<Integer, String> clusterAPICallSeqs(final String arffFile, final double threshold) throws Exception { // Clusterer settings final HierarchicalClusterer clusterer = new HierarchicalClusterer(); clusterer.setOptions(new String[] { "-L", "COMPLETE" }); // link type clusterer.setDebug(true); clusterer.setNumClusters(1); clusterer.setDistanceFunction(SeqSimilarity); clusterer.setDistanceIsBranchLength(false); // Read in API call seqs final DataSource source = new DataSource(arffFile); final Instances data = source.getDataSet(); // Cluster API call seqs clusterer.buildClusterer(data); // Assign seqs to clusters based on dendrogram final String newick = clusterer.graph().replace("Newick:", "") + ":0"; if (newick.equals("(no,clusters):0")) // Handle no clusters return HashMultimap.create(); final Multimap<Integer, String> clusters = NewickTreeParser.getClusters(newick, threshold); System.out.println("No. clusters: " + clusters.keySet().size()); final Multimap<Integer, String> assignments = HashMultimap.create(); for (int i = 0; i < data.numInstances(); i++) { for (final int id : clusters.keySet()) { if (clusters.get(id).contains(data.instance(i).stringValue(0))) assignments.put(id, data.instance(i).stringValue(1)); } } // showDendrogram(clusterer); return assignments; }
Example 16
Source File: Evaluation.java From meka with GNU General Public License v3.0 | 5 votes |
/** * loadDataset - load a dataset, given command line options specifying an arff file. * @param options command line options, specifying dataset filename * @param T set to 'T' if we want to load a test file (default 't': load train or train-test file) * @return the dataset */ public static Instances loadDataset(String options[], char T) throws Exception { Instances D = null; String filename = Utils.getOption(T, options); // Check for filename if (filename == null || filename.isEmpty()) throw new Exception("[Error] You did not specify a dataset!"); // Check for existence of file File file = new File(filename); if (!file.exists()) throw new Exception("[Error] File does not exist: " + filename); if (file.isDirectory()) throw new Exception("[Error] "+filename+ " points to a directory!"); try { DataSource source = new DataSource(filename); D = source.getDataSet(); } catch(Exception e) { e.printStackTrace(); throw new Exception("[Error] Failed to load Instances from file '"+filename+"'."); } return D; }
Example 17
Source File: DevelopClassifier.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License | 5 votes |
/** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here try{ DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/DevelopClassifier/vote.arff"); Instances dt = src.getDataSet(); dt.setClassIndex(dt.numAttributes()-1); String[] options = new String[4]; options[0] = "-C"; options[1] = "0.1"; options[2] = "-M"; options[3] = "2"; J48 tree = new J48(); tree.setOptions(options); tree.buildClassifier(dt); System.out.println(tree.getCapabilities().toString()); System.out.println(tree.graph()); //uncomment the following three lines of code for Naive Bayes NaiveBayes nb = new NaiveBayes(); nb.buildClassifier(dt); System.out.println(nb.getCapabilities().toString()); } catch(Exception e){ System.out.println("Error!!!!\n" + e.getMessage()); } }
Example 18
Source File: AnchorpointsCreationTest.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
@Test public void anchorpointsAreCreatedAndHaveTheValues() throws IOException, InvalidAnchorPointsException, AlgorithmException, InterruptedException, ClassNotFoundException, DatasetCreationException { int[] xValues = new int[] { 2, 4, 8, 16, 32, 64 }; Instances dataset = null; OpenmlConnector client = new OpenmlConnector(); try { DataSetDescription description = client.dataGet(42); File file = client.datasetGet(description); DataSource source = new DataSource(file.getCanonicalPath()); dataset = source.getDataSet(); dataset.setClassIndex(dataset.numAttributes() - 1); Attribute targetAttribute = dataset.attribute(description.getDefault_target_attribute()); dataset.setClassIndex(targetAttribute.index()); } catch (Exception e) { throw new IOException("Could not load data set from OpenML!", e); } // final LearningCurveExtrapolationMethod extrapolationMethod, final ISupervisedLearner<I, D> learner, final D dataset, final double trainsplit, final int[] anchorPoints, // final ISamplingAlgorithmFactory<?, D, ? extends ASamplingAlgorithm<D>> samplingAlgorithmFactory, final long seed WekaInstances simpleDataset = new WekaInstances(dataset); LearningCurveExtrapolator extrapolator = new LearningCurveExtrapolator((x, y, ds) -> { Assert.assertArrayEquals(x, xValues); for (int i = 0; i < y.length; i++) { Assert.assertTrue(y[i] > 0.0d); } return null; }, new WekaClassifier(new J48()), simpleDataset, 0.7d, xValues, new SystematicSamplingFactory<>(), 1l); extrapolator.extrapolateLearningCurve(); }
Example 19
Source File: ModelEvaluation.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License | 4 votes |
/** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here try { DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ModelEvaluation/segment-challenge.arff"); Instances dt = src.getDataSet(); dt.setClassIndex(dt.numAttributes()- 1); String[] options = new String[4]; options[0] = "-C"; options[1] = "0.1"; options[2] = "-M"; options[3] = "2"; J48 mytree = new J48(); mytree.setOptions(options); mytree.buildClassifier(dt); Evaluation eval = new Evaluation(dt); Random rand = new Random(1); DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ModelEvaluation/segment-test.arff"); Instances tdt = src1.getDataSet(); tdt.setClassIndex(tdt.numAttributes() - 1); eval.evaluateModel(mytree, tdt); System.out.println(eval.toSummaryString("Evaluation results:\n", false)); System.out.println("Correct % = " + eval.pctCorrect()); System.out.println("Incorrect % = " + eval.pctIncorrect()); System.out.println("kappa = " + eval.kappa()); System.out.println("MAE = " + eval.meanAbsoluteError()); System.out.println("RMSE = " + eval.rootMeanSquaredError()); System.out.println("RAE = " + eval.relativeAbsoluteError()); System.out.println("Precision = " + eval.precision(1)); System.out.println("Recall = " + eval.recall(1)); System.out.println("fMeasure = " + eval.fMeasure(1)); System.out.println(eval.toMatrixString("=== Overall Confusion Matrix ===")); } catch (Exception e) { System.out.println("Error!!!!\n" + e.getMessage()); } }
Example 20
Source File: ModifiedISACInstanceCollector.java From AILibs with GNU Affero General Public License v3.0 | 4 votes |
private static Instances loadDefaultInstances() throws Exception { InputStream inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("metaData_smallDataSets_computed.arff"); DataSource source = new DataSource(inputStream); return source.getDataSet(); }