weka.core.converters.ArffSaver Java Examples

The following examples show how to use weka.core.converters.ArffSaver. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FilterAttribute.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 7 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff");
        Instances dt = src.getDataSet();
        
        String[] op = new String[]{"-R","2-4"};
        Remove rmv = new Remove();
        rmv.setOptions(op);
        rmv.setInputFormat(dt);
        Instances nd = Filter.useFilter(dt, rmv);
        
        ArffSaver s = new ArffSaver();
        s.setInstances(nd);
        s.setFile(new File("fw.arff"));
        s.writeBatch();
    }
    catch(Exception e){
        System.out.println(e.getMessage());
    }
}
 
Example #2
Source File: DatasetLoading.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 *  Simple util to saveDatasets out. Useful for shapelet transform.
 *
 * @param dataSet
 * @param fileName
 */
public static void saveDataset(Instances dataSet, String fileName) {
    try {
        ArffSaver saver = new ArffSaver();
        saver.setMaxDecimalPlaces(MAX_DECIMAL_PLACES);
        saver.setInstances(dataSet);
        if (fileName.endsWith(".arff")) {
            saver.setFile(new File(fileName));
        } else {
            saver.setFile(new File(fileName + ".arff"));
        }
        saver.writeBatch();
    } catch (IOException ex) {
        System.out.println("Error saving transformed dataset" + ex);
    }
}
 
Example #3
Source File: Datasets.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 6 votes vote down vote up
/** 
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try {
            DataSource src = new DataSource("/Users/admin/wekafiles/data/weather.numeric.arff");
            Instances dt= src.getDataSet();
            
            System.out.println(dt.toSummaryString());
            
            ArffSaver as = new ArffSaver();
            as.setInstances(dt);
            as.setFile(new File("weather.arff"));
            as.writeBatch();
        
    }
    catch(Exception e)
    {
        System.out.println(e.getMessage());
    }
}
 
Example #4
Source File: AttribSelect.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 6 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try {
          DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff");
          Instances dt = src.getDataSet();
          
          AttributeSelection asel = new AttributeSelection();
          
          CfsSubsetEval evl = new CfsSubsetEval();
          GreedyStepwise sh = new GreedyStepwise();
          
          asel.setEvaluator(evl);
          asel.setSearch(sh);
          asel.setInputFormat(dt);
          
          Instances nd = Filter.useFilter(dt, asel);
          ArffSaver as = new ArffSaver();
          as.setInstances(nd);
          as.setFile(new File("weather-sel.arff"));
          as.writeBatch();
    }
    catch(Exception e){
        System.out.println(e.getMessage());
    }
}
 
Example #5
Source File: WekaTee.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
/**
 *  When the score changes, rewrite the file.
 *  This is really rare in practice, so don't bother optimizing it.
 */
private static void dump_from_scratch(Collection<String> names, Timestamp start_time) throws IOException {
	saved_schema_version = names.size();
	
	FastVector attributes = new FastVector();
	// Answer score names
	for (String name: names)
		attributes.addElement(new Attribute(name));
	Instances data = new Instances("Watsonsim captured question stream", attributes, 0);
	
	// Save the results to a file
	saver = new ArffSaver();
	saver.setStructure(data);
	saver.setRetrieval(Saver.INCREMENTAL);
	saver.setFile(new File("data/weka-log." + start_time + ".arff"));
	for (Score row : dataset)
		saver.writeIncremental(new Instance(1.0, row.getEach(names)));
}
 
Example #6
Source File: UtilsDataset.java    From apogen with Apache License 2.0 6 votes vote down vote up
private void convertCSVtoArff(String filename) throws Exception {

		// load CSV
		CSVLoader loader = new CSVLoader();
		loader.setSource(new File(filename));

		// CSV uses no header
		String[] options = new String[1];
		options[0] = "-H";
		loader.setOptions(options);

		Instances data = loader.getDataSet();

		// save ARFF
		ArffSaver saver = new ArffSaver();
		saver.setInstances(data);

		filename = filename.replace(".csv", ".arff");

		// saver.setDestination(new File(filename));
		saver.setFile(new File(filename));
		saver.writeBatch();

	}
 
Example #7
Source File: PrepareClassAttributes.java    From meka with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
  if (args.length != 3)
    throw new IllegalArgumentException("Required parameters: <input> <attribute_indices> <output>");

  System.out.println("Loading input data: " + args[0]);
  Instances input = DataSource.read(args[0]);

  System.out.println("Applying filter using indices: " + args[1]);
  MekaClassAttributes filter = new MekaClassAttributes();
  filter.setAttributeIndices(args[1]);
  filter.setInputFormat(input);
  Instances output = Filter.useFilter(input, filter);

  System.out.println("Saving filtered data to: " + args[2]);
  ArffSaver saver = new ArffSaver();
  saver.setFile(new File(args[2]));
  DataSink.write(saver, output);
}
 
Example #8
Source File: SelectWords.java    From hlta with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Keep the words we want.
 * 
 * @param out
 * @param options
 * @throws Exception 
 */
private void removeWords(String output, String[] options, boolean inverse) throws Exception
{
       Remove remove = new Remove(); 
       
       if(inverse)
       {
           remove.setAttributeIndices(options[1]);
           remove.setInvertSelection(true);
       }else
       {
       	remove.setOptions(options); 
       }
       
       remove.setInputFormat(m_instances); 
       
       Instances newData = Filter.useFilter(m_instances, remove);
       
       ArffSaver saver = new ArffSaver();
       saver.setInstances(newData);
       saver.setFile(new File(output));
       saver.writeBatch();
	
}
 
Example #9
Source File: SemEvalToArff.java    From AffectiveTweets with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Main method for testing this class.
 * 
 * 
 *            should contain the path of input dataset and the name of
 *            target file scheme (see Evaluation)
 *@param args arguments            
 */
static public void main(String args[]) {

	if (args.length == 2) {

		TweetCollectionToArff ta = new SemEvalToArff();

		try {
			Instances dataset = ta.createDataset(args[0]);
			ArffSaver saver = new ArffSaver();
			saver.setInstances(dataset);

			saver.setFile(new File(args[1]));
			saver.writeBatch();

		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

	}

}
 
Example #10
Source File: HumanCodedToArff.java    From AffectiveTweets with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Main method for testing this class.
 * 
 * should contain the path of input dataset and the name of
 *            target file scheme (see Evaluation)
 * @param args arguments           
 */
static public void main(String args[]) {

	if (args.length == 2) {

		TweetCollectionToArff ta = new HumanCodedToArff();

		try {
			Instances dataset = ta.createDataset(args[0]);
			ArffSaver saver = new ArffSaver();
			saver.setInstances(dataset);

			saver.setFile(new File(args[1]));
			saver.writeBatch();

		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

	}

}
 
Example #11
Source File: SpecPragmaticCreateDataset_posteriori_10.java    From TableDisentangler with GNU General Public License v3.0 5 votes vote down vote up
public void ProcessTables(int[] table_array)
{
	
	DataBase();
	int execCount = 0;
	try {
		String SQL = "SELECT * from ArtTable where HasXML='yes' and idTable in "+Arrays.toString(table_array);
		SQL = SQL.replace("[", "(").replace("]", ")");
		Statement st = conn.createStatement();
		Instances instances = CreateInstances();
		FastVector fvWekaAttributes = new FastVector(48);
		rs = st.executeQuery(SQL);
		while (rs.next()) {
			Instance iExample = processTable(rs.getInt(1));
			instances.add(iExample);
			

			execCount ++;
			if(execCount>10000){
				conn.close();
				DataBase();
				execCount = 0;	
			}

		}
		System.out.println(instances.toString());
		ArffSaver saver = new ArffSaver();
		 saver.setInstances(instances);
		 saver.setFile(new File("spptest10.arff"));
		 //saver.setDestination(new File("./data/test.arff"));   // **not** necessary in 3.5.4 and later
		 saver.writeBatch();
	} catch (Exception ex) {
		ex.printStackTrace();
	}
}
 
Example #12
Source File: SpecPragmaticCreateDataset_posteriori.java    From TableDisentangler with GNU General Public License v3.0 5 votes vote down vote up
public void ProcessTables(String tableType)
{
	
	DataBase();
	int execCount = 0;
	try {
		String SQL = "SELECT * from ArtTable where HasXML='yes' and specPragmatic='"+tableType+"' order by RAND() limit 200";
		Statement st = conn.createStatement();
		Instances instances = CreateInstances();
		FastVector fvWekaAttributes = new FastVector(128);
		rs = st.executeQuery(SQL);
		while (rs.next()) {
			Instance iExample = processTable(rs.getInt(1));
			instances.add(iExample);
			

			execCount ++;
			if(execCount>10000){
				conn.close();
				DataBase();
				execCount = 0;	
			}

		}
		System.out.println(instances.toString());
		ArffSaver saver = new ArffSaver();
		 saver.setInstances(instances);
		 saver.setFile(new File("spptest.arff"));
		 //saver.setDestination(new File("./data/test.arff"));   // **not** necessary in 3.5.4 and later
		 saver.writeBatch();
	} catch (Exception ex) {
		ex.printStackTrace();
	}
}
 
Example #13
Source File: Trainer.java    From sentiment-analysis with Apache License 2.0 5 votes vote down vote up
public void saveFile(Instances dataset, String type){
	ArffSaver saver = new ArffSaver();
	saver.setInstances(dataset);
	try {
		saver.setFile(new File(folder+"train/"+type+".arff"));
		saver.writeBatch();
	} catch (IOException e) {
		e.printStackTrace();
	}
}
 
Example #14
Source File: DataSetShuffler.java    From anthelion with Apache License 2.0 5 votes vote down vote up
public static void shuffle(String file, int classindex, String outputFile)
		throws IOException {

	// create the stream to read the data
	ArffFileStream stream = new ArffFileStream(file, classindex);
	InstancesHeader header = stream.getHeader();
	ArrayList<Instance> instanceList = new ArrayList<Instance>();
	System.out.println("Loading data ...");
	int cnt = 0;
	while (stream.hasMoreInstances()) {
		if (++cnt % 10000 == 0) {
			System.out.println("Read " + cnt + " items.");
		}
		instanceList.add(stream.nextInstance());
	}
	System.out.println("Read all items ... shuffling.");
	Collections.shuffle(instanceList);
	ArrayList<Attribute> attributeList = new ArrayList<Attribute>();
	for (int i = 0; i < header.numAttributes(); i++) {
		attributeList.add(header.attribute(i));
	}
	Instances dataSet = new Instances("reduced", attributeList, 2);
	for (Instance inst : instanceList) {
		dataSet.add(inst);
		inst.setDataset(dataSet);
	}
	System.out.println("Writing output ...");
	ArffSaver saver = new ArffSaver();
	saver.setInstances(dataSet);
	saver.setFile(new File(outputFile));
	saver.writeBatch();
	System.out.println("Done.");
}
 
Example #15
Source File: MLPlan4BigFileInputTester.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void test() throws Exception {
	// MLPlan4BigFileInput mlplan = new MLPlan4BigFileInput(new File("testrsc/openml/41103.arff"));

	String origDataSrcName = "testrsc/openml/1240.arff";

	if (true) {
		Instances data = new Instances(new FileReader(new File(origDataSrcName)));
		data.setClassIndex(data.numAttributes() - 1);
		List<Instances> split = WekaUtil.getStratifiedSplit(data, 0, .7f);
		ArffSaver saver = new ArffSaver();
		saver.setInstances(split.get(0));
		saver.setFile(new File(origDataSrcName + ".train"));
		saver.writeBatch();
		saver.setInstances(split.get(1));
		saver.setFile(new File(origDataSrcName + ".test"));
		saver.writeBatch();
		System.exit(0);
	}

	MLPlan4BigFileInput mlplan = new MLPlan4BigFileInput(new File(origDataSrcName + ".train"));
	mlplan.setTimeout(new Timeout(5, TimeUnit.MINUTES));
	mlplan.setLoggerName("testedalgorithm");
	long start = System.currentTimeMillis();
	Classifier c = mlplan.call();
	System.out.println("Observed output: " + c + " after " + (System.currentTimeMillis() - start) + "ms. Now validating the model");

	/* check quality */
	Instances testData = new Instances(new FileReader(new File(origDataSrcName + ".test")));
	testData.setClassIndex(testData.numAttributes() - 1);
	Evaluation eval = new Evaluation(testData);
	eval.evaluateModel(c, testData);
	System.out.println(eval.toSummaryString());

	assertNotNull(c);
}
 
Example #16
Source File: FileUtils.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static void saveSingleInstances(final Instances dataSet, final String filePath) throws IOException {
	ArffSaver saver = new ArffSaver();
	saver.setInstances(dataSet);
	File destFile = new File(filePath);
	saver.setFile(destFile);

	saver.writeBatch();
}
 
Example #17
Source File: GenerateArff.java    From MonitorClient with Apache License 2.0 5 votes vote down vote up
public void generateArffFile(Instances instances, String path) {
    ArffSaver saver = new ArffSaver();
    saver.setInstances(instances);
    try {
        saver.setFile(new File(path));
        saver.writeBatch();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
Example #18
Source File: DataIOFile.java    From bestconf with Apache License 2.0 5 votes vote down vote up
/**
 * Save @param data to the Arff file at @param path
 */
public static void saveDataToArffFile(String path, Instances data) throws IOException{
	    System.out.println("\nSaving to file " + path + "...");
	    ArffSaver saver = new ArffSaver();
	    saver.setInstances(data);
	    saver.setFile(new File(path));
	    saver.writeBatch();
}
 
Example #19
Source File: Csv2arff.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 5 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) throws Exception {
    CSVLoader loader = new CSVLoader();
    loader.setSource(new File("/Users/admin/Documents/NetBeansProjects/Arff2CSV/weather.csv"));
    Instances data = loader.getDataSet();

    ArffSaver saver = new ArffSaver();
    saver.setInstances(data);

    saver.setFile(new File("weather.arff"));
    saver.writeBatch();
}
 
Example #20
Source File: TransformExperiments.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static void runExperiment(ExperimentalArguments expSettings, Instances train, Instances test, SimpleBatchFilter transformer, String fullWriteLocation, String additionalDataFilePath) throws Exception{
    
        //this is hacky, but will do.
        Instances[] transforms = setContractDataAndProcess(expSettings, train, test, transformer);
    
        //Filter.useFilter is wekas weird way
        Instances transformed_train = transforms[0];
        Instances transformed_test = transforms[1];

        ArffSaver saver = new ArffSaver();

        String transformed_train_output = fullWriteLocation + expSettings.datasetName +"_TRAIN.arff";
        String transformed_test_output = fullWriteLocation + expSettings.datasetName +"_TEST.arff";

        saver.setInstances(transformed_train);
        saver.setFile(new File(transformed_train_output));
        saver.writeBatch();

        saver.setInstances(transformed_test);
        saver.setFile(new File(transformed_test_output));
        saver.writeBatch();

        
        writeAdditionalTransformData(expSettings, transformer, additionalDataFilePath);
}