org.apache.commons.math3.ml.clustering.KMeansPlusPlusClusterer Java Examples

The following examples show how to use org.apache.commons.math3.ml.clustering.KMeansPlusPlusClusterer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: MyTest2.java From ACManager with GNU General Public License v3.0

6 votes

@Test
public void test6() throws Exception {
    Clusterer<DoublePoint> clusterer = new KMeansPlusPlusClusterer<DoublePoint>(3);
    List<DoublePoint> list = new ArrayList<>();

    list.add(new DoublePoint(new double[]{1}));
    list.add(new DoublePoint(new double[]{1.5}));
    list.add(new DoublePoint(new double[]{1.8}));
    list.add(new DoublePoint(new double[]{3.5}));
    list.add(new DoublePoint(new double[]{3.6}));
    list.add(new DoublePoint(new double[]{4}));
    list.add(new DoublePoint(new double[]{4.2}));
    System.out.println(list);

    List<? extends Cluster<DoublePoint>> res = clusterer.cluster(list);
    System.out.println("!!!");
    System.out.println(res.size());
    for (Cluster<DoublePoint> re : res) {
        System.out.println(re.getPoints());
    }
}

Example #2

Source File: KmeansSampling.java From AILibs with GNU Affero General Public License v3.0

5 votes

@SuppressWarnings("unchecked")
@Override
public IAlgorithmEvent nextWithException() throws AlgorithmException, InterruptedException, AlgorithmTimeoutedException, AlgorithmExecutionCanceledException {
	switch (this.getState()) {
	case CREATED:
		// Initialize variables
		try {
			this.sample = (D) this.getInput().createEmptyCopy();
		} catch (DatasetCreationException e) {
			throw new AlgorithmException("Could not create a copy of the dataset.", e);
		}

		// create cluster
		JDKRandomGenerator r = new JDKRandomGenerator();
		r.setSeed(this.seed);
		// update k if k=-1
		if (this.k == -1) {
			this.k = this.sampleSize;
		}
		if (this.clusterResults == null) {
			KMeansPlusPlusClusterer<I> kMeansCluster = new KMeansPlusPlusClusterer<>(this.k, -1, this.distanceMeassure, r);
			this.clusterResults = kMeansCluster.cluster(this.getInput()); // this is not interruptible!!
		}
		return this.activate();
	case ACTIVE:
		return this.doAlgorithmStep();
	default:
		throw new IllegalStateException("Unknown algorithm state " + this.getState());
	}
}

Example #3

Source File: GMeansStratiAmountSelectorAndAssigner.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Override
public void init(final IDataset<?> dataset, final int stratiAmount) {
	this.setDataset(dataset);
	if (this.clusterer == null || this.getClusters() == null) {
		// This object was not used for strati amount selection.
		// Perform k-means clustering to get the correct strati amounts.
		JDKRandomGenerator rand = new JDKRandomGenerator();
		rand.setSeed(this.randomSeed);
		KMeansPlusPlusClusterer<Clusterable> kmeans = new KMeansPlusPlusClusterer<>(stratiAmount, -1, this.distanceMeasure, rand);
		this.setClusters(kmeans.cluster(new ListView<Clusterable>(dataset)));
	}
}

Example #4

Source File: KMeansStratiAssigner.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Override
public void init(final IDataset<?> dataset, final int stratiAmount) {
	this.setDataset(dataset);

	// Perform initial Clustering of the dataset.
	JDKRandomGenerator rand = new JDKRandomGenerator();
	rand.setSeed(this.randomSeed);
	List<Clusterable> cDataset = (List<Clusterable>)dataset;
	KMeansPlusPlusClusterer<Clusterable> clusterer = new KMeansPlusPlusClusterer<>(stratiAmount, -1, this.distanceMeasure, rand);
	this.logger.info("Clustering dataset with {} instances.", dataset.size());
	this.setClusters(clusterer.cluster(cDataset));
	this.logger.info("Finished clustering");
}

Example #5

Source File: KmeansEvaluator.java From lucene-solr with Apache License 2.0

4 votes

@Override
@SuppressWarnings({"unchecked"})
public Object doWork(Object value1, Object value2) throws IOException {

  Matrix matrix = null;
  int k = 0;

  if(value1 instanceof Matrix) {
    matrix = (Matrix)value1;
  } else {
    throw new IOException("The first parameter for kmeans should be the observation matrix.");
  }

  if(value2 instanceof Number) {
    k = ((Number)value2).intValue();
  } else {
    throw new IOException("The second parameter for kmeans should be k.");
  }


  @SuppressWarnings({"rawtypes"})
  KMeansPlusPlusClusterer<ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k, maxIterations);
  List<ClusterPoint> points = new ArrayList<>();
  double[][] data = matrix.getData();

  List<String> ids = matrix.getRowLabels();

  for(int i=0; i<data.length; i++) {
    double[] vec = data[i];
    if(ids != null) {
      points.add(new ClusterPoint(ids.get(i), vec));
    } else {
      points.add(new ClusterPoint(Integer.toString(i), vec));
    }
  }

  @SuppressWarnings({"rawtypes"})
  Map fields = new HashMap();

  fields.put("k", k);
  fields.put("distance", "euclidean");
  fields.put("maxIterations", maxIterations);

  return new ClusterTuple(fields, kmeans.cluster(points), matrix.getColumnLabels());
}

Example #6

Source File: MultiKmeansEvaluator.java From lucene-solr with Apache License 2.0

4 votes

@Override
@SuppressWarnings({"unchecked"})
public Object doWork(Object... values) throws IOException {

  if(values.length != 3) {
    throw new IOException("The multiKmeans function expects three parameters; a matrix to cluster, k and number of trials.");
  }

  Object value1 = values[0];
  Object value2 = values[1];
  Object value3 = values[2];

  Matrix matrix = null;
  int k = 0;
  int trials=0;

  if(value1 instanceof Matrix) {
    matrix = (Matrix)value1;
  } else {
    throw new IOException("The first parameter for multiKmeans should be the observation matrix.");
  }

  if(value2 instanceof Number) {
    k = ((Number)value2).intValue();
  } else {
    throw new IOException("The second parameter for multiKmeans should be k.");
  }

  if(value3 instanceof Number) {
    trials= ((Number)value3).intValue();
  } else {
    throw new IOException("The third parameter for multiKmeans should be trials.");
  }

  @SuppressWarnings({"rawtypes"})
  KMeansPlusPlusClusterer<KmeansEvaluator.ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k, maxIterations);
  @SuppressWarnings({"rawtypes"})
  MultiKMeansPlusPlusClusterer multiKmeans = new MultiKMeansPlusPlusClusterer(kmeans, trials);

  List<KmeansEvaluator.ClusterPoint> points = new ArrayList<>();
  double[][] data = matrix.getData();

  List<String> ids = matrix.getRowLabels();

  for(int i=0; i<data.length; i++) {
    double[] vec = data[i];
    points.add(new KmeansEvaluator.ClusterPoint(ids.get(i), vec));
  }

  @SuppressWarnings({"rawtypes"})
  Map fields = new HashMap();

  fields.put("k", k);
  fields.put("trials", trials);
  fields.put("distance", "euclidean");
  fields.put("maxIterations", maxIterations);

  return new KmeansEvaluator.ClusterTuple(fields, multiKmeans.cluster(points), matrix.getColumnLabels());
}

Example #7

Source File: Stats.java From gama with GNU General Public License v3.0

4 votes

@operator (
		value = "kmeans",
		can_be_const = false,
		type = IType.LIST,
		category = { IOperatorCategory.STATISTICAL },
		concept = { IConcept.STATISTIC, IConcept.CLUSTERING })
@doc (
		value = "returns the list of clusters (list of instance indices) computed with the kmeans++ "
				+ "algorithm from the first operand data according to the number of clusters to split"
				+ " the data into (k) and the maximum number of iterations to run the algorithm for "
				+ "(If negative, no maximum will be used) (maxIt). Usage: kmeans(data,k,maxit)",
		special_cases = "if the lengths of two vectors in the right-hand aren't equal, returns 0",
		examples = { @example (
				value = "kmeans ([[2,4,5], [3,8,2], [1,1,3], [4,3,4]],2,10)",
				equals = "[[0,2,3],[1]]") })
public static IList<IList> KMeansPlusplusApache(final IScope scope, final IList data, final Integer k,
		final Integer maxIt) throws GamaRuntimeException {
	final MersenneTwister rand = new MersenneTwister(scope.getRandom().getSeed().longValue());

	final List<DoublePoint> instances = new ArrayList<>();
	for (int i = 0; i < data.size(); i++) {
		final IList d = (IList) data.get(i);
		final double point[] = new double[d.size()];
		for (int j = 0; j < d.size(); j++) {
			point[j] = Cast.asFloat(scope, d.get(j));
		}
		instances.add(new Instance(i, point));
	}
	final KMeansPlusPlusClusterer<DoublePoint> kmeans =
			new KMeansPlusPlusClusterer<>(k, maxIt, new EuclideanDistance(), rand);
	final List<CentroidCluster<DoublePoint>> clusters = kmeans.cluster(instances);
	try (final Collector.AsList results = Collector.getList()) {
		for (final Cluster<DoublePoint> cl : clusters) {
			final IList clG = GamaListFactory.create();
			for (final DoublePoint pt : cl.getPoints()) {
				clG.addValue(scope, ((Instance) pt).getId());
			}
			results.add(clG);
		}
		return results.items();
	}
}