weka.clusterers.SimpleKMeans Java Examples

The following examples show how to use weka.clusterers.SimpleKMeans. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EvaluationUtils.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
private static double performClustering(final Instances insts) throws Exception {
	logger.debug("Starting cluster evaluation...");

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer())
	.setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())});

	clusterer.buildClusterer(removedClassInstances);

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
}
 
Example #2
Source File: WekaClusterers.java    From apogen with Apache License 2.0 6 votes vote down vote up
/**
 * Run WEKA SimpleKMeans or KMeans++ on the parameter ARFF file searching for
 * numClusters clusters
 * 
 * @param filename
 * @param numClusters
 * @param init
 * @throws Exception
 */
public static LinkedHashMap<Integer, LinkedList<String>> runKmeans(String filename, String numClusters, String init)
		throws Exception {

	String[] options = new String[10];
	options[0] = "-t";
	options[1] = filename;
	options[2] = "-init";
	options[3] = init;
	options[4] = "-N";
	options[5] = numClusters;
	options[6] = "-I";
	options[7] = "100";
	options[8] = "-c";
	options[9] = "first";

	String s = ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options);

	return parseKMeansOutput(s, numClusters);

}
 
Example #3
Source File: KMeans.java    From Java-Data-Analysis with MIT License 6 votes vote down vote up
public static void main(String[] args) {
    Instances dataset = load(DATA);
    SimpleKMeans skm = new SimpleKMeans();
    System.out.printf("%d clusters:%n", K);
    try {
        skm.setNumClusters(K);
        skm.buildClusterer(dataset);
        for (Instance instance : dataset) {
            System.out.printf("(%.0f,%.0f): %s%n", 
                    instance.value(0), instance.value(1), 
                    skm.clusterInstance(instance));
        }
    } catch (Exception e) {
        System.err.println(e);
    }
}
 
Example #4
Source File: WekaClusterTest.java    From Java-Data-Science-Cookbook with MIT License 6 votes vote down vote up
public void clusterData(){	
	kmeans = new SimpleKMeans();
	kmeans.setSeed(10);
	try {
		kmeans.setPreserveInstancesOrder(true);
		kmeans.setNumClusters(10);
		kmeans.buildClusterer(cpu);
		int[] assignments = kmeans.getAssignments();
		int i = 0;
		for(int clusterNum : assignments) {
			System.out.printf("Instance %d -> Cluster %d\n", i, clusterNum);
			i++;
		}
	} catch (Exception e1) {
	}
}
 
Example #5
Source File: ClusterEval.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 6 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.arff");
        Instances dt = src.getDataSet();
        SimpleKMeans model = new SimpleKMeans();
        model.setNumClusters(3);
        model.buildClusterer(dt);
        System.out.println(model);
        
        ClusterEvaluation eval = new ClusterEvaluation();
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.test.arff");
        Instances tdt = src1.getDataSet();
        eval.setClusterer(model);
        eval.evaluateClusterer(tdt);
        
        System.out.println(eval.clusterResultsToString());
        System.out.println("# of clusters: " + eval.getNumClusters());
    }
    catch(Exception e)
    {
        System.out.println(e.getMessage());
    }
}
 
Example #6
Source File: Clustering.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 6 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Clustering/weather.arff");
        Instances dt = src.getDataSet();
        SimpleKMeans model = new SimpleKMeans();
        model.setNumClusters(3);
        model.buildClusterer(dt);
        System.out.println(model);
        
    }
    catch(Exception e){
        System.out.println(e.getMessage());
    }
}
 
Example #7
Source File: SimpleKMeansClusterer.java    From mzmine2 with GNU General Public License v2.0 5 votes vote down vote up
@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

  List<Integer> clusters = new ArrayList<Integer>();
  String[] options = new String[2];
  SimpleKMeans clusterer = new SimpleKMeans();

  int numberOfGroups =
      parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue();
  options[0] = "-N";
  options[1] = String.valueOf(numberOfGroups);

  try {
    clusterer.setOptions(options);
    clusterer.buildClusterer(dataset);
    Enumeration<?> e = dataset.enumerateInstances();
    while (e.hasMoreElements()) {
      clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
    }
    ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
        parameters.getParameter(EMClustererParameters.visualization).getValue());
    return result;

  } catch (Exception ex) {
    logger.log(Level.SEVERE, null, ex);
    return null;
  }
}
 
Example #8
Source File: SimpleKMeansClusterer.java    From mzmine3 with GNU General Public License v2.0 5 votes vote down vote up
@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

  List<Integer> clusters = new ArrayList<Integer>();
  String[] options = new String[2];
  SimpleKMeans clusterer = new SimpleKMeans();

  int numberOfGroups =
      parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue();
  options[0] = "-N";
  options[1] = String.valueOf(numberOfGroups);

  try {
    clusterer.setOptions(options);
    clusterer.buildClusterer(dataset);
    Enumeration<?> e = dataset.enumerateInstances();
    while (e.hasMoreElements()) {
      clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
    }
    ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
        parameters.getParameter(EMClustererParameters.visualization).getValue());
    return result;

  } catch (Exception ex) {
    logger.log(Level.SEVERE, null, ex);
    return null;
  }
}
 
Example #9
Source File: Clustering.java    From java-ml-projects with Apache License 2.0 5 votes vote down vote up
private List<Series<Number, Number>> buildClusteredSeries() throws Exception {
	List<XYChart.Series<Number, Number>> clusteredSeries = new ArrayList<>();

	// to build the cluster we remove the class information
	Remove remove = new Remove();
	remove.setAttributeIndices("3");
	remove.setInputFormat(data);
	Instances dataToBeClustered = Filter.useFilter(data, remove);

	SimpleKMeans kmeans = new SimpleKMeans();
	kmeans.setSeed(10);
	kmeans.setPreserveInstancesOrder(true);
	kmeans.setNumClusters(3);
	kmeans.buildClusterer(dataToBeClustered);

	IntStream.range(0, 3).mapToObj(i -> {
		Series<Number, Number> newSeries = new XYChart.Series<>();
		newSeries.setName(String.valueOf(i));
		return newSeries;
	}).forEach(clusteredSeries::add);

	int[] assignments = kmeans.getAssignments();
	for (int i = 0; i < assignments.length; i++) {
		int clusterNum = assignments[i];
		clusteredSeries.get(clusterNum).getData().add(instancetoChartData(data.get(i)));
	}

	return clusteredSeries;
}
 
Example #10
Source File: RBFNetwork.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns default capabilities of the classifier, i.e.,  and "or" of
 * Logistic and LinearRegression.
 *
 * @return      the capabilities of this classifier
 * @see         Logistic
 * @see         LinearRegression
 */
public Capabilities getCapabilities() {
  Capabilities result = new Logistic().getCapabilities();
  result.or(new LinearRegression().getCapabilities());
  Capabilities classes = result.getClassCapabilities();
  result.and(new SimpleKMeans().getCapabilities());
  result.or(classes);
  return result;
}
 
Example #11
Source File: LearnShapelets.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public void initializeShapeletsKMeans() throws Exception {
    //for each scale r, i.e. for each set of K shapelets at
    // length L_min*(r+1)
    
    for (int r=0; r<R; r++) {
        double[][] segments_r = new double[train.length * numberOfSegments[r]][L[r]];
        
        //construct the segments from the train set.
        for (int i = 0; i < train.length; i++)
            for (int j = 0; j < numberOfSegments[r]; j++)
                for (int l = 0; l < L[r]; l++)
                    segments_r[i * numberOfSegments[r] + j][l] = train[i][j + l]; 
                

        // normalize segments
        for (int i = 0; i < train.length; i++)
            for (int j = 0; j < numberOfSegments[r]; j++)
                segments_r[i * numberOfSegments[r] + j] = StatisticalUtilities.normalize(segments_r[i * numberOfSegments[r] + j]);

        Instances ins = InstanceTools.toWekaInstances(segments_r); 
        
        SimpleKMeans skm = new SimpleKMeans();
        skm.setNumClusters(K);
        skm.setMaxIterations(100); 
        //skm.setInitializeUsingKMeansPlusPlusMethod(true); 
        skm.setSeed((int) (rand.nextDouble() * 1000) );
        skm.buildClusterer( ins );
        Instances centroidsWeka = skm.getClusterCentroids();
        shapelets[r] =  InstanceTools.fromWekaInstancesArray(centroidsWeka, false);
          
        // initialize the gradient history of shapelets
        if (shapelets[r] == null)
            print("P not set"); 
    }
}
 
Example #12
Source File: LearnShapelets.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public void initializeShapeletsFromFile() throws Exception {
    //for each scale r, i.e. for each set of K shapelets at
    // length L_min*(r+1)
    
    for (int r=0; r<R; r++) {
        double[][] segments_r = new double[train.length * numberOfSegments[r]][L[r]];
        
        //construct the segments from the train set.
        for (int i = 0; i < train.length; i++)
            for (int j = 0; j < numberOfSegments[r]; j++)
                for (int l = 0; l < L[r]; l++)
                    segments_r[i * numberOfSegments[r] + j][l] = train[i][j + l]; 
                

        // normalize segments
        for (int i = 0; i < train.length; i++)
            for (int j = 0; j < numberOfSegments[r]; j++)
                segments_r[i * numberOfSegments[r] + j] = StatisticalUtilities.normalize(segments_r[i * numberOfSegments[r] + j]);

        Instances ins = InstanceTools.toWekaInstances(segments_r); 
        
        SimpleKMeans skm = new SimpleKMeans();
        skm.setNumClusters(K);
        skm.setMaxIterations(100); 
        //skm.setInitializeUsingKMeansPlusPlusMethod(true); 
        skm.setSeed((int) (rand.nextDouble() * 1000) );
        skm.buildClusterer( ins );
        Instances centroidsWeka = skm.getClusterCentroids();
        shapelets[r] =  InstanceTools.fromWekaInstancesArray(centroidsWeka, false);
          
        // initialize the gradient history of shapelets
        if (shapelets[r] == null)
            print("P not set"); 
    }
}
 
Example #13
Source File: BoTSWEnsemble.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public static double compactnessOfClustering(SimpleKMeans kmeans, Instances input) throws Exception {
    Instances centroids = kmeans.getClusterCentroids();
    int[] assignments = kmeans.getAssignments();

    double totalSqDist = 0.0;
    for (int i = 0; i < assignments.length; ++i) {
        Instance sample = input.get(i);
        Instance centroid = centroids.get(assignments[i]);

        for (int j = 0; j < sample.numAttributes(); ++j)
            totalSqDist += (sample.value(j) - centroid.value(j)) * (sample.value(j) - centroid.value(j));
    }
    return totalSqDist;
}
 
Example #14
Source File: ClassificationViaClustering.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * default constructor
 */
public ClassificationViaClustering() {
  super();
  
  m_Clusterer = new SimpleKMeans();
}
 
Example #15
Source File: Ex06_Clusterers.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    
    // We'll use this data throughout, see Ex01_Datahandling
    int seed = 0;
    Instances[] trainTest = DatasetLoading.sampleItalyPowerDemand(seed);
    Instances inst = trainTest[0];
    Instances inst2 = trainTest[1];
    inst.addAll(inst2);

    // Create an object from one of the time series or vector clusters implemented.
    // Call the buildClusterer method with your data. Most clusters will need the number of clusters k to be set.
    UnsupervisedShapelets us = new UnsupervisedShapelets();
    us.setNumberOfClusters(inst.numClasses());
    us.buildClusterer(inst);

    // You can find the cluster assignments for each data instance by calling getAssignments().
    // The index of assignments array will match the Instances object, i.e. index 0 with value 1 == first instance
    // of data assigned to cluster 1.
    int[] tsAssignments = us.getAssignments();
    System.out.println("UnsupervisedShapelets cluster assignments:");
    System.out.println(Arrays.toString(tsAssignments));

    // A popular metric for cluster evaluation is the Rand index. A utility method is available for calculating
    // this.
    double tsRandIndex = ClusteringUtilities.randIndex(tsAssignments, inst);
    System.out.println("UnsupervisedShapelets Rand index:");
    System.out.println(tsRandIndex);

    // weka also implements a range of clustering algorithms. Any class value must be removed prior to use.
    Instances copy = new Instances(inst);
    deleteClassAttribute(copy);
    SimpleKMeans km = new SimpleKMeans();
    km.setNumClusters(inst.numClasses());
    km.setPreserveInstancesOrder(true);
    km.buildClusterer(copy);

    int[] wekaAssignments = km.getAssignments();
    System.out.println("SimpleKMeans cluster assignments:");
    System.out.println(Arrays.toString(wekaAssignments));

    double wekaRandIndex = ClusteringUtilities.randIndex(wekaAssignments, inst);
    System.out.println("SimpleKMeans Rand index:");
    System.out.println(wekaRandIndex);
}
 
Example #16
Source File: EvaluationUtils.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public static double performKernelClustering(final Instances instances, final int numThreads) throws Exception {
	logger.debug("Starting kernelized cluster evaluation...");

	List<Instances> split = WekaUtil.getStratifiedSplit(instances, 42, kernelSplitPortion);

	ExecutorService execService = Executors.newFixedThreadPool(numThreads);
	List<Future<Double>> futures = new ArrayList<>();
	Future<Double> result0 = execService.submit(() ->
	performClustering(new Instances(split.get(0)))
			);
	futures.add(result0);

	for (Map.Entry<Kernel, Instances> entry : getKernelsWithInstances(split.get(0))) {
		if (Thread.currentThread().isInterrupted()) {
			throw new InterruptedException(EVALUATION_STOPPED_MESSAGE);
		}

		Future<Double> result = execService.submit(() -> {
			Kernel kernel = entry.getKey();
			Instances insts = entry.getValue();

			FilteredClusterer clusterer = new FilteredClusterer();

			Remove filter = new Remove();
			filter.setAttributeIndices("" + (insts.classIndex() + 1));
			filter.setInputFormat(insts);

			Instances removedClassInstances = Filter.useFilter(insts, filter);
			Nystroem kernelFilter = new Nystroem();

			kernelFilter.setKernel(kernel);
			clusterer.setFilter(kernelFilter);
			((SimpleKMeans) clusterer.getClusterer())
			.setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())});

			clusterer.buildClusterer(removedClassInstances);

			ClusterEvaluation clusterEval = new ClusterEvaluation();
			clusterEval.setClusterer(clusterer);
			clusterEval.evaluateClusterer(insts);

			return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
		});
		futures.add(result);
	}

	return evaluateFutures(futures);
}
 
Example #17
Source File: ClusterEvaluationTest.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void evaluateTest() throws Exception {
	logger.info("Starting cluster evaluation test...");

	/* load dataset and create a train-test-split */
	OpenmlConnector connector = new OpenmlConnector();
	DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
	File file = connector.datasetGet(ds);
	Instances data = new Instances(new BufferedReader(new FileReader(file)));
	data.setClassIndex(data.numAttributes() - 1);
	List<Instances> split = StratifyUtil.stratifiedSplit(data, 42, .25);

	Instances insts = split.get(0);

	long timeStart = System.currentTimeMillis();

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer()).setOptions(new String[] { "-num-slots", String.valueOf(Runtime.getRuntime().availableProcessors()), "-N", String.valueOf(insts.classAttribute().numValues()) });
	SimpleKMeans kMeans = (SimpleKMeans) clusterer.getClusterer();
	kMeans.setDistanceFunction(new EuclideanDistance());

	clusterer.buildClusterer(removedClassInstances);

	long timeStartEval = System.currentTimeMillis();

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	long timeTaken = System.currentTimeMillis() - timeStart;
	long timeTakenEval = System.currentTimeMillis() - timeStartEval;

	logger.debug("ClusterEvaluator results: " + clusterEval.clusterResultsToString());

	double acc = EvaluationUtils.predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
	Assert.assertTrue(acc > 0);
	logger.info("Acc: " + acc);
	logger.debug("Clustering took " + (timeTaken / 1000) + " s.");
	logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s.");
}
 
Example #18
Source File: LearnShapeletsLearningAlgorithm.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Initializes the tensor <code>S</code> storing the shapelets for each scale.
 * The initialization is done by deriving inital shapelets from all normalized
 * segments.
 *
 * @param trainingMatrix
 *            The training matrix used for the initialization of <code>S</code>.
 * @return Return the initialized tensor storing an initial guess for the
 *         shapelets based on the clustering
 * @throws TrainingException
 */
public double[][][] initializeS(final double[][] trainingMatrix) throws TrainingException {
	LOGGER.debug("Initializing S...");

	/* read config locally */
	final int scaleR = this.getConfig().scaleR();
	final long seed = this.getConfig().seed();
	final int minShapeLength = this.getConfig().minShapeletLength();

	final double[][][] result = new double[scaleR][][];

	for (int r = 0; r < scaleR; r++) {
		final int numberOfSegments = getNumberOfSegments(this.q, minShapeLength, r);
		if (numberOfSegments < 1) {
			throw new TrainingException("The number of segments is lower than 1. Can not train the LearnShapelets model.");
		}

		final int L = (r + 1) * minShapeLength;

		final double[][] tmpSegments = new double[trainingMatrix.length * numberOfSegments][L];

		// Prepare training data for finding the centroids
		for (int i = 0; i < trainingMatrix.length; i++) {
			for (int j = 0; j < numberOfSegments; j++) {
				for (int l = 0; l < L; l++) {
					tmpSegments[i * numberOfSegments + j][l] = trainingMatrix[i][j + l];
				}
				tmpSegments[i * numberOfSegments + j] = TimeSeriesUtil.zNormalize(tmpSegments[i * numberOfSegments + j], USE_BIAS_CORRECTION);
			}
		}

		// Transform instances
		Instances wekaInstances = WekaTimeseriesUtil.matrixToWekaInstances(tmpSegments);

		// Cluster using k-Means
		SimpleKMeans kMeans = new SimpleKMeans();
		try {
			kMeans.setNumClusters(this.getConfig().numShapelets());
			kMeans.setSeed((int) seed);
			kMeans.setMaxIterations(100);
			kMeans.buildClusterer(wekaInstances);
		} catch (Exception e) {
			LOGGER.warn("Could not initialize matrix S using kMeans clustering for r={} due to the following problem: {}. " + "Using zero matrix instead (possibly leading to a poor training performance).", r, e.getMessage());
			result[r] = new double[this.getConfig().numShapelets()][r * minShapeLength];
			continue;
		}
		Instances clusterCentroids = kMeans.getClusterCentroids();

		double[][] tmpResult = new double[clusterCentroids.numInstances()][clusterCentroids.numAttributes()];
		for (int j = 0; j < tmpResult.length; j++) {
			double[] instValues = clusterCentroids.get(j).toDoubleArray();
			tmpResult[j] = Arrays.copyOf(instValues, tmpResult[j].length);
		}
		result[r] = tmpResult;
	}

	LOGGER.debug("Initialized S.");

	return result;
}
 
Example #19
Source File: ClassificationViaClustering.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * String describing default clusterer.
 * 
 * @return		the classname
 */
protected String defaultClustererString() {
  return SimpleKMeans.class.getName();
}