weka.clusterers.ClusterEvaluation Java Examples

The following examples show how to use weka.clusterers.ClusterEvaluation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ClusterEval.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 6 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.arff");
        Instances dt = src.getDataSet();
        SimpleKMeans model = new SimpleKMeans();
        model.setNumClusters(3);
        model.buildClusterer(dt);
        System.out.println(model);
        
        ClusterEvaluation eval = new ClusterEvaluation();
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.test.arff");
        Instances tdt = src1.getDataSet();
        eval.setClusterer(model);
        eval.evaluateClusterer(tdt);
        
        System.out.println(eval.clusterResultsToString());
        System.out.println("# of clusters: " + eval.getNumClusters());
    }
    catch(Exception e)
    {
        System.out.println(e.getMessage());
    }
}
 
Example #2
Source File: WekaClassesToClusterTest.java    From Java-Data-Science-Cookbook with MIT License 6 votes vote down vote up
public void generateClassToCluster(){
	Remove filter = new Remove();
	filter.setAttributeIndices("" + (weather.classIndex() + 1));
	try {
		filter.setInputFormat(weather);
		Instances dataClusterer = Filter.useFilter(weather, filter);
		clusterer = new EM();
		clusterer.buildClusterer(dataClusterer);
		ClusterEvaluation eval = new ClusterEvaluation();
		eval.setClusterer(clusterer);
		eval.evaluateClusterer(weather);

		System.out.println(eval.clusterResultsToString());
	} catch (Exception e) {
	}
}
 
Example #3
Source File: WekaClusterers.java    From apogen with Apache License 2.0 6 votes vote down vote up
/**
 * Run WEKA SimpleKMeans or KMeans++ on the parameter ARFF file searching for
 * numClusters clusters
 * 
 * @param filename
 * @param numClusters
 * @param init
 * @throws Exception
 */
public static LinkedHashMap<Integer, LinkedList<String>> runKmeans(String filename, String numClusters, String init)
		throws Exception {

	String[] options = new String[10];
	options[0] = "-t";
	options[1] = filename;
	options[2] = "-init";
	options[3] = init;
	options[4] = "-N";
	options[5] = numClusters;
	options[6] = "-I";
	options[7] = "100";
	options[8] = "-c";
	options[9] = "first";

	String s = ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options);

	return parseKMeansOutput(s, numClusters);

}
 
Example #4
Source File: WekaClusterers.java    From apogen with Apache License 2.0 6 votes vote down vote up
/**
 * Run K-medoids on the parameter ARFF file searching for numClusters clusters
 * 
 * @param filename
 * @param numClusters
 * @param init
 * @throws Exception
 */
public static LinkedHashMap<Integer, LinkedList<String>> runKMedoids(String filename, String numClusters,
		boolean distance) throws Exception {

	String[] options = new String[6];
	options[0] = "-t";
	options[1] = filename;
	options[2] = "-c";
	options[3] = "first";
	options[4] = "-N";
	options[5] = numClusters;

	String s = ClusterEvaluation.evaluateClusterer(new KMedoids(distance), options);

	return parseKMeansOutput(s, numClusters);
}
 
Example #5
Source File: EvaluationUtils.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
private static double performClustering(final Instances insts) throws Exception {
	logger.debug("Starting cluster evaluation...");

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer())
	.setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())});

	clusterer.buildClusterer(removedClassInstances);

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
}
 
Example #6
Source File: TMAPoints.java    From orbit-image-analysis with GNU General Public License v3.0 6 votes vote down vote up
private int guessNumClusters(EM clusterer, Instances instances, int start, int end) throws Exception {
    ClusterEvaluation eval = new ClusterEvaluation();
    int bestNum = start;
    double best = Double.POSITIVE_INFINITY;
    double bic;
    for (int c = start; c <= end; c++) {
        clusterer.setNumClusters(c);
        clusterer.buildClusterer(instances);
        eval.setClusterer(clusterer);
        eval.evaluateClusterer(instances);
        bic = bic(eval.getLogLikelihood(), c, instances.numInstances());
        logger.trace("numCluster " + c + " -> BIC: " + bic);
        if (bic < best) {
            best = bic;
            bestNum = c;
            logger.trace("bestNum: " + bestNum);
        }
    }
    return bestNum;
}
 
Example #7
Source File: EvaluationUtils.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public static double performKernelClustering(final Instances instances, final int numThreads) throws Exception {
	logger.debug("Starting kernelized cluster evaluation...");

	List<Instances> split = WekaUtil.getStratifiedSplit(instances, 42, kernelSplitPortion);

	ExecutorService execService = Executors.newFixedThreadPool(numThreads);
	List<Future<Double>> futures = new ArrayList<>();
	Future<Double> result0 = execService.submit(() ->
	performClustering(new Instances(split.get(0)))
			);
	futures.add(result0);

	for (Map.Entry<Kernel, Instances> entry : getKernelsWithInstances(split.get(0))) {
		if (Thread.currentThread().isInterrupted()) {
			throw new InterruptedException(EVALUATION_STOPPED_MESSAGE);
		}

		Future<Double> result = execService.submit(() -> {
			Kernel kernel = entry.getKey();
			Instances insts = entry.getValue();

			FilteredClusterer clusterer = new FilteredClusterer();

			Remove filter = new Remove();
			filter.setAttributeIndices("" + (insts.classIndex() + 1));
			filter.setInputFormat(insts);

			Instances removedClassInstances = Filter.useFilter(insts, filter);
			Nystroem kernelFilter = new Nystroem();

			kernelFilter.setKernel(kernel);
			clusterer.setFilter(kernelFilter);
			((SimpleKMeans) clusterer.getClusterer())
			.setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())});

			clusterer.buildClusterer(removedClassInstances);

			ClusterEvaluation clusterEval = new ClusterEvaluation();
			clusterEval.setClusterer(clusterer);
			clusterEval.evaluateClusterer(insts);

			return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
		});
		futures.add(result);
	}

	return evaluateFutures(futures);
}
 
Example #8
Source File: ClusterEvaluationTest.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void evaluateTest() throws Exception {
	logger.info("Starting cluster evaluation test...");

	/* load dataset and create a train-test-split */
	OpenmlConnector connector = new OpenmlConnector();
	DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
	File file = connector.datasetGet(ds);
	Instances data = new Instances(new BufferedReader(new FileReader(file)));
	data.setClassIndex(data.numAttributes() - 1);
	List<Instances> split = StratifyUtil.stratifiedSplit(data, 42, .25);

	Instances insts = split.get(0);

	long timeStart = System.currentTimeMillis();

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer()).setOptions(new String[] { "-num-slots", String.valueOf(Runtime.getRuntime().availableProcessors()), "-N", String.valueOf(insts.classAttribute().numValues()) });
	SimpleKMeans kMeans = (SimpleKMeans) clusterer.getClusterer();
	kMeans.setDistanceFunction(new EuclideanDistance());

	clusterer.buildClusterer(removedClassInstances);

	long timeStartEval = System.currentTimeMillis();

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	long timeTaken = System.currentTimeMillis() - timeStart;
	long timeTakenEval = System.currentTimeMillis() - timeStartEval;

	logger.debug("ClusterEvaluator results: " + clusterEval.clusterResultsToString());

	double acc = EvaluationUtils.predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
	Assert.assertTrue(acc > 0);
	logger.info("Acc: " + acc);
	logger.debug("Clustering took " + (timeTaken / 1000) + " s.");
	logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s.");
}
 
Example #9
Source File: ClusteringTask.java    From Machine-Learning-in-Java with MIT License 4 votes vote down vote up
public static void main(String args[]) throws Exception{
	
	//load data
	Instances data = new Instances(new BufferedReader(new FileReader("data/bank-data.arff")));
	
	// new instance of clusterer
	EM model = new EM();
	// build the clusterer
	model.buildClusterer(data);
	System.out.println(model);
	
	double logLikelihood = ClusterEvaluation.crossValidateModel(model, data, 10, new Random(1));
	System.out.println(logLikelihood);


}