org.apache.flink.examples.java.clustering.KMeans Java Examples

The following examples show how to use org.apache.flink.examples.java.clustering.KMeans. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: PreviewPlanDumpTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void dumpIterativeKMeans() {
	// prepare the test environment
	PreviewPlanEnvironment env = new PreviewPlanEnvironment();
	env.setAsContext();
	try {
		KMeans.main(new String[] {
			"--points ", IN_FILE,
			"--centroids ", IN_FILE,
			"--output ", OUT_FILE,
			"--iterations", "123"});
	} catch (OptimizerPlanEnvironment.ProgramAbortException pae) {
		// all good.
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail("KMeans failed with an exception");
	}
	dump(env.getPlan());
}

Example #2

Source File: DumpCompiledPlanTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void dumpIterativeKMeans() {
	// prepare the test environment
	PreviewPlanEnvironment env = new PreviewPlanEnvironment();
	env.setAsContext();
	try {
		KMeans.main(new String[] {
			"--points ", IN_FILE,
			"--centroids ", IN_FILE,
			"--output ", OUT_FILE,
			"--iterations", "123"});
	} catch (OptimizerPlanEnvironment.ProgramAbortException pae) {
		// all good.
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail("KMeans failed with an exception");
	}
	dump(env.getPlan());
}

Example #3

Source File: PreviewPlanDumpTest.java From flink with Apache License 2.0

6 votes

@Test
public void dumpIterativeKMeans() {
	// prepare the test environment
	PreviewPlanEnvironment env = new PreviewPlanEnvironment();
	env.setAsContext();
	try {
		KMeans.main(new String[] {
			"--points ", IN_FILE,
			"--centroids ", IN_FILE,
			"--output ", OUT_FILE,
			"--iterations", "123"});
	} catch (OptimizerPlanEnvironment.ProgramAbortException pae) {
		// all good.
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail("KMeans failed with an exception");
	}
	dump(env.getPlan());
}

Example #4

Source File: DumpCompiledPlanTest.java From flink with Apache License 2.0

6 votes

@Test
public void dumpIterativeKMeans() {
	// prepare the test environment
	PreviewPlanEnvironment env = new PreviewPlanEnvironment();
	env.setAsContext();
	try {
		KMeans.main(new String[] {
			"--points ", IN_FILE,
			"--centroids ", IN_FILE,
			"--output ", OUT_FILE,
			"--iterations", "123"});
	} catch (OptimizerPlanEnvironment.ProgramAbortException pae) {
		// all good.
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail("KMeans failed with an exception");
	}
	dump(env.getPlan());
}

Example #5

Source File: SuccessAfterNetworkBuffersFailureITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

private static void runKMeans(ExecutionEnvironment env) throws Exception {

		env.setParallelism(PARALLELISM);
		env.getConfig().disableSysoutLogging();

		// get input data
		DataSet<KMeans.Point> points =  KMeansData.getDefaultPointDataSet(env).rebalance();
		DataSet<KMeans.Centroid> centroids =  KMeansData.getDefaultCentroidDataSet(env).rebalance();

		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<KMeans.Centroid> loop = centroids.iterate(20);

		// add some re-partitions to increase network buffer use
		DataSet<KMeans.Centroid> newCentroids = points
				// compute closest centroid for each point
				.map(new KMeans.SelectNearestCenter()).withBroadcastSet(loop, "centroids")
				.rebalance()
				// count and sum point coordinates for each centroid
				.map(new KMeans.CountAppender())
				.groupBy(0).reduce(new KMeans.CentroidAccumulator())
				// compute new centroids from point counts and coordinate sums
				.rebalance()
				.map(new KMeans.CentroidAverager());

		// feed new centroids back into next iteration
		DataSet<KMeans.Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, KMeans.Point>> clusteredPoints = points
				// assign points to final clusters
				.map(new KMeans.SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		clusteredPoints.output(new DiscardingOutputFormat<Tuple2<Integer, KMeans.Point>>());

		env.execute("KMeans Example");
	}

Example #6

Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0

5 votes

private static void runKMeans(ExecutionEnvironment env) throws Exception {

		env.setParallelism(PARALLELISM);
		env.getConfig().disableSysoutLogging();

		// get input data
		DataSet<KMeans.Point> points =  KMeansData.getDefaultPointDataSet(env).rebalance();
		DataSet<KMeans.Centroid> centroids =  KMeansData.getDefaultCentroidDataSet(env).rebalance();

		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<KMeans.Centroid> loop = centroids.iterate(20);

		// add some re-partitions to increase network buffer use
		DataSet<KMeans.Centroid> newCentroids = points
				// compute closest centroid for each point
				.map(new KMeans.SelectNearestCenter()).withBroadcastSet(loop, "centroids")
				.rebalance()
				// count and sum point coordinates for each centroid
				.map(new KMeans.CountAppender())
				.groupBy(0).reduce(new KMeans.CentroidAccumulator())
				// compute new centroids from point counts and coordinate sums
				.rebalance()
				.map(new KMeans.CentroidAverager());

		// feed new centroids back into next iteration
		DataSet<KMeans.Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, KMeans.Point>> clusteredPoints = points
				// assign points to final clusters
				.map(new KMeans.SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		clusteredPoints.output(new DiscardingOutputFormat<Tuple2<Integer, KMeans.Point>>());

		env.execute("KMeans Example");
	}

Example #7

Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0

5 votes

private static void runKMeans(ExecutionEnvironment env) throws Exception {

		env.setParallelism(PARALLELISM);

		// get input data
		DataSet<KMeans.Point> points =  KMeansData.getDefaultPointDataSet(env).rebalance();
		DataSet<KMeans.Centroid> centroids =  KMeansData.getDefaultCentroidDataSet(env).rebalance();

		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<KMeans.Centroid> loop = centroids.iterate(20);

		// add some re-partitions to increase network buffer use
		DataSet<KMeans.Centroid> newCentroids = points
				// compute closest centroid for each point
				.map(new KMeans.SelectNearestCenter()).withBroadcastSet(loop, "centroids")
				.rebalance()
				// count and sum point coordinates for each centroid
				.map(new KMeans.CountAppender())
				.groupBy(0).reduce(new KMeans.CentroidAccumulator())
				// compute new centroids from point counts and coordinate sums
				.rebalance()
				.map(new KMeans.CentroidAverager());

		// feed new centroids back into next iteration
		DataSet<KMeans.Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, KMeans.Point>> clusteredPoints = points
				// assign points to final clusters
				.map(new KMeans.SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		clusteredPoints.output(new DiscardingOutputFormat<Tuple2<Integer, KMeans.Point>>());

		env.execute("KMeans Example");
	}

Example #8

Source File: PreviewPlanDumpTest.java From flink with Apache License 2.0

5 votes

@Test
public void dumpIterativeKMeans() throws Exception {
	verifyPlanDump(KMeans.class,
		"--points ", IN_FILE,
		"--centroids ", IN_FILE,
		"--output ", OUT_FILE,
		"--iterations", "123");
}

Example #9

Source File: DumpCompiledPlanTest.java From flink with Apache License 2.0

5 votes

@Test
public void dumpIterativeKMeans() throws Exception {
	verifyOptimizedPlan(KMeans.class,
		"--points ", IN_FILE,
		"--centroids ", IN_FILE,
		"--output ", OUT_FILE,
		"--iterations", "123");
}