org.apache.commons.math3.ml.clustering.Clusterable Java Examples

The following examples show how to use org.apache.commons.math3.ml.clustering.Clusterable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ClusterEvaluator.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Computes the centroid for a cluster.
 *
 * @param cluster the cluster
 * @return the computed centroid for the cluster,
 * or {@code null} if the cluster does not contain any points
 */
protected Clusterable centroidOf(final Cluster<T> cluster) {
    final List<T> points = cluster.getPoints();
    if (points.isEmpty()) {
        return null;
    }

    // in case the cluster is of type CentroidCluster, no need to compute the centroid
    if (cluster instanceof CentroidCluster) {
        return ((CentroidCluster<T>) cluster).getCenter();
    }

    final int dimension = points.get(0).getPoint().length;
    final double[] centroid = new double[dimension];
    for (final T p : points) {
        final double[] point = p.getPoint();
        for (int i = 0; i < centroid.length; i++) {
            centroid[i] += point[i];
        }
    }
    for (int i = 0; i < centroid.length; i++) {
        centroid[i] /= points.size();
    }
    return new DoublePoint(centroid);
}
 
Example #2
Source File: SumOfClusterVariances.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Override
public double score(final List<? extends Cluster<T>> clusters) {
    double varianceSum = 0.0;
    for (final Cluster<T> cluster : clusters) {
        if (!cluster.getPoints().isEmpty()) {

            final Clusterable center = centroidOf(cluster);

            // compute the distance variance of the current cluster
            final Variance stat = new Variance();
            for (final T point : cluster.getPoints()) {
                stat.increment(distance(point, center));
            }
            varianceSum += stat.getResult();

        }
    }
    return varianceSum;
}
 
Example #3
Source File: ClusterEvaluator.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Computes the centroid for a cluster.
 *
 * @param cluster the cluster
 * @return the computed centroid for the cluster,
 * or {@code null} if the cluster does not contain any points
 */
protected Clusterable centroidOf(final Cluster<T> cluster) {
    final List<T> points = cluster.getPoints();
    if (points.isEmpty()) {
        return null;
    }

    // in case the cluster is of type CentroidCluster, no need to compute the centroid
    if (cluster instanceof CentroidCluster) {
        return ((CentroidCluster<T>) cluster).getCenter();
    }

    final int dimension = points.get(0).getPoint().length;
    final double[] centroid = new double[dimension];
    for (final T p : points) {
        final double[] point = p.getPoint();
        for (int i = 0; i < centroid.length; i++) {
            centroid[i] += point[i];
        }
    }
    for (int i = 0; i < centroid.length; i++) {
        centroid[i] /= points.size();
    }
    return new DoublePoint(centroid);
}
 
Example #4
Source File: ClusterStratiAssigner.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public int assignToStrati(final IInstance datapoint) {
	if (this.dataset == null) {
		throw new IllegalStateException("ClusterStratiAssigner has not been initialized!");
	}
	if (!this.dataset.contains(datapoint)) {
		throw new IllegalArgumentException("Given datapoint " + datapoint + " is not in the original dataset with " + this.dataset.size() + " entries.");
	}

	for (int i = 0; i < this.clusters.size(); i++) {
		List<Clusterable> clusterPoints = this.clusters.get(i).getPoints();
		for (int n = 0; n < clusterPoints.size(); n++) {
			if (Arrays.equals(datapoint.getPoint(), clusterPoints.get(n).getPoint())) {
				return i;
			}
		}
	}
	throw new IllegalStateException("Datapoint was not found in any cluster. This should not happen.");
}
 
Example #5
Source File: SumOfClusterVariances.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Override
public double score(final List<? extends Cluster<T>> clusters) {
    double varianceSum = 0.0;
    for (final Cluster<T> cluster : clusters) {
        if (!cluster.getPoints().isEmpty()) {

            final Clusterable center = centroidOf(cluster);

            // compute the distance variance of the current cluster
            final Variance stat = new Variance();
            for (final T point : cluster.getPoints()) {
                stat.increment(distance(point, center));
            }
            varianceSum += stat.getResult();

        }
    }
    return varianceSum;
}
 
Example #6
Source File: GetCentroidsEvaluator.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public Object doWork(Object value) throws IOException {
  if(!(value instanceof KmeansEvaluator.ClusterTuple)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for value, expecting a clustering result",toExpression(constructingFactory), value.getClass().getSimpleName()));
  } else {
    KmeansEvaluator.ClusterTuple clusterTuple = (KmeansEvaluator.ClusterTuple)value;
    List<CentroidCluster<KmeansEvaluator.ClusterPoint>> clusters = clusterTuple.getClusters();
    double[][] data = new double[clusters.size()][];
    for(int i=0; i<clusters.size(); i++) {
      CentroidCluster<KmeansEvaluator.ClusterPoint> centroidCluster = clusters.get(i);
      Clusterable clusterable = centroidCluster.getCenter();
      data[i] = clusterable.getPoint();
    }
    Matrix centroids = new Matrix(data);
    centroids.setColumnLabels(clusterTuple.getColumnLabels());
    return centroids;
  }
}
 
Example #7
Source File: GMeansStratiAmountSelectorAndAssigner.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void init(final IDataset<?> dataset, final int stratiAmount) {
	this.setDataset(dataset);
	if (this.clusterer == null || this.getClusters() == null) {
		// This object was not used for strati amount selection.
		// Perform k-means clustering to get the correct strati amounts.
		JDKRandomGenerator rand = new JDKRandomGenerator();
		rand.setSeed(this.randomSeed);
		KMeansPlusPlusClusterer<Clusterable> kmeans = new KMeansPlusPlusClusterer<>(stratiAmount, -1, this.distanceMeasure, rand);
		this.setClusters(kmeans.cluster(new ListView<Clusterable>(dataset)));
	}
}
 
Example #8
Source File: DBSCANClusterer.java    From egads with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Expands the cluster to include density-reachable items.
 *
 * @param cluster Cluster to expand
 * @param point Point to add to cluster
 * @param neighbors List of neighbors
 * @param points the data set
 * @param visited the set of already visited points
 * @return the expanded cluster
 */
private Cluster<T> expandCluster(final Cluster<T> cluster,
                                 final T point,
                                 final List<T> neighbors,
                                 final Collection<T> points,
                                 final Map<Clusterable, PointStatus> visited) {
    cluster.addPoint(point);
    visited.put(point, PointStatus.PART_OF_CLUSTER);
 
    List<T> seeds = new ArrayList<T>(neighbors);
    int index = 0;
    while (index < seeds.size()) {
        final T current = seeds.get(index);
        PointStatus pStatus = visited.get(current);
        // only check non-visited points
        if (pStatus == null) {
            final List<T> currentNeighbors = getNeighbors(current, points);
            if (currentNeighbors.size() >= minPts) {
                seeds = merge(seeds, currentNeighbors);
            }
        }
 
        if (pStatus != PointStatus.PART_OF_CLUSTER) {
            visited.put(current, PointStatus.PART_OF_CLUSTER);
            cluster.addPoint(current);
        }
 
        index++;
    }
    return cluster;
}
 
Example #9
Source File: ClusterStratiAssigner.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public void setDataset(final IDataset<?> dataset) {
	Objects.requireNonNull(dataset);
	if (dataset.isEmpty()) {
		throw new IllegalArgumentException("Cannot compute strati for empty dataset.");
	}
	if (!Clusterable.class.isAssignableFrom(dataset.getClassOfInstances())) {
		boolean allElementsClusterable = dataset.stream().allMatch(Clusterable.class::isInstance);
		if (!allElementsClusterable) {
			throw new IllegalArgumentException("Dataset does contain elements that are not clusterable elements, but only elements of class " + dataset.getClassOfInstances() + ".");
		}
	}
	this.dataset = dataset;
}
 
Example #10
Source File: KMeansStratiAssigner.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void init(final IDataset<?> dataset, final int stratiAmount) {
	this.setDataset(dataset);

	// Perform initial Clustering of the dataset.
	JDKRandomGenerator rand = new JDKRandomGenerator();
	rand.setSeed(this.randomSeed);
	List<Clusterable> cDataset = (List<Clusterable>)dataset;
	KMeansPlusPlusClusterer<Clusterable> clusterer = new KMeansPlusPlusClusterer<>(stratiAmount, -1, this.distanceMeasure, rand);
	this.logger.info("Clustering dataset with {} instances.", dataset.size());
	this.setClusters(clusterer.cluster(cDataset));
	this.logger.info("Finished clustering");
}
 
Example #11
Source File: GMeansStratiAmountSelectorAndAssigner.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public int selectStratiAmount(final IDataset<?> dataset) {
	// Perform g-means to get a fitting k and the corresponding clusters.
	List<Clusterable> cDataset = new ListView<>(dataset);
	this.clusterer = new GMeans<>(cDataset, this.distanceMeasure, this.randomSeed);
	this.setClusters(this.clusterer.cluster());
	return this.getClusters().size();
}
 
Example #12
Source File: ClusterStratiAssigner.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
protected void setClusters(final List<CentroidCluster<Clusterable>> clusters) {
	this.clusters = clusters;
}
 
Example #13
Source File: ClusterStratiAssigner.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public List<CentroidCluster<Clusterable>> getClusters() {
	return this.clusters;
}
 
Example #14
Source File: ClusterAlgorithmComparison.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
private Clusterable transform(Clusterable point, int width, int height) {
    double[] arr = point.getPoint();
    return new DoublePoint(new double[] { PAD + (arr[0] + 1) / 2.0 * (width - 2 * PAD),
                                          height - PAD - (arr[1] + 1) / 2.0 * (height - 2 * PAD) });
}
 
Example #15
Source File: ClusterAlgorithmComparison.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
private Clusterable transform(Clusterable point, int width, int height) {
    double[] arr = point.getPoint();
    return new DoublePoint(new double[] { PAD + (arr[0] + 1) / 2.0 * (width - 2 * PAD),
                                          height - PAD - (arr[1] + 1) / 2.0 * (height - 2 * PAD) });
}
 
Example #16
Source File: ClusterAlgorithmComparison.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
private Clusterable transform(Clusterable point, int width, int height) {
    double[] arr = point.getPoint();
    return new DoublePoint(new double[] { PAD + (arr[0] + 1) / 2.0 * (width - 2 * PAD),
                                          height - PAD - (arr[1] + 1) / 2.0 * (height - 2 * PAD) });
}
 
Example #17
Source File: ClusterAlgorithmComparison.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
private Clusterable transform(Clusterable point, int width, int height) {
    double[] arr = point.getPoint();
    return new DoublePoint(new double[] { PAD + (arr[0] + 1) / 2.0 * (width - 2 * PAD),
                                          height - PAD - (arr[1] + 1) / 2.0 * (height - 2 * PAD) });
}
 
Example #18
Source File: ClusterEvaluator.java    From astor with GNU General Public License v2.0 2 votes vote down vote up
/**
 * Calculates the distance between two {@link Clusterable} instances
 * with the configured {@link DistanceMeasure}.
 *
 * @param p1 the first clusterable
 * @param p2 the second clusterable
 * @return the distance between the two clusterables
 */
protected double distance(final Clusterable p1, final Clusterable p2) {
    return measure.compute(p1.getPoint(), p2.getPoint());
}
 
Example #19
Source File: ClusterEvaluator.java    From astor with GNU General Public License v2.0 2 votes vote down vote up
/**
 * Calculates the distance between two {@link Clusterable} instances
 * with the configured {@link DistanceMeasure}.
 *
 * @param p1 the first clusterable
 * @param p2 the second clusterable
 * @return the distance between the two clusterables
 */
protected double distance(final Clusterable p1, final Clusterable p2) {
    return measure.compute(p1.getPoint(), p2.getPoint());
}