#!/usr/bin/env python # Copyright (C) 2014 Open Data ("Open Data" refers to # one or more of the following companies: Open Data Partners LLC, # Open Data Research LLC, or Open Data Capital LLC.) # # This file is part of Hadrian. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import random import unittest import numpy from titus.genpy import PFAEngine from titus.producer.tools import look from titus.producer.kmeans import * from titus.datatype import AvroArray from titus.datatype import AvroDouble class TestProducerKMeans(unittest.TestCase): def testSuperSimple(self): dataset = numpy.array([[50.0], [30.0]]) kmeans = KMeans(1, dataset) print kmeans.optimize(whileall(printValue("g"), moving(), maxIterations(10))) @staticmethod def data(*centers): while True: center, = random.sample(centers, 1) x = random.gauss(center[0], 1) y = random.gauss(center[1], 1) z = random.gauss(center[2], 1) yield (x, y, z) def assertArrayAlmostEqual(self, xarray, yarray, *args, **kwds): for x, y in zip(xarray, yarray): self.assertAlmostEqual(x, y, *args, **kwds) def testKMeans(self): random.seed(12345) numpy.seterr(divide="ignore", invalid="ignore") dataset = numpy.empty((100000, 3), dtype=numpy.dtype(float)) for i, x in enumerate(TestProducerKMeans.data([1, 1, 1], [3, 2, 5], [8, 2, 7], [5, 8, 5], [1, 1, 9])): if i >= dataset.shape[0]: break dataset[i,:] = x kmeans = KMeans(5, dataset) kmeans.optimize(whileall(moving(), maxIterations(1000))) centers = kmeans.centers() self.assertArrayAlmostEqual(centers[0], [1.00, 1.01, 1.00], places=2) self.assertArrayAlmostEqual(centers[1], [1.01, 1.00, 9.01], places=2) self.assertArrayAlmostEqual(centers[2], [3.01, 2.01, 5.00], places=2) self.assertArrayAlmostEqual(centers[3], [4.99, 8.00, 4.99], places=2) self.assertArrayAlmostEqual(centers[4], [8.02, 2.00, 7.01], places=2) doc = kmeans.pfaDocument("Cluster", ["one", "two", "three", "four", "five"]) # look(doc, maxDepth=8) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][0]["center"], [1.00, 1.01, 1.00], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][1]["center"], [1.01, 1.00, 9.01], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][2]["center"], [3.01, 2.01, 5.00], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][3]["center"], [4.99, 8.00, 4.99], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][4]["center"], [8.02, 2.00, 7.01], places=2) engine, = PFAEngine.fromJson(doc) self.assertEqual(engine.action([1.00, 1.01, 1.00]), "one") self.assertEqual(engine.action([1.01, 1.00, 9.01]), "two") self.assertEqual(engine.action([3.01, 2.01, 5.00]), "three") self.assertEqual(engine.action([4.99, 8.00, 4.99]), "four") self.assertEqual(engine.action([8.02, 2.00, 7.01]), "five") def testKMeansTransform(self): random.seed(12345) numpy.seterr(divide="ignore", invalid="ignore") dataset = numpy.empty((100000, 3), dtype=numpy.dtype(float)) for i, (x, y, z) in enumerate(TestProducerKMeans.data([1, 1, 1], [3, 2, 5], [8, 2, 7], [5, 8, 5], [1, 1, 9])): if i >= dataset.shape[0]: break dataset[i,:] = [x * 10.0, y * 20.0, z * 30.0] trans = Transformation("x/10.0", "y/20.0", "z/30.0") kmeans = KMeans(5, trans.transform(dataset, ["x", "y", "z"])) kmeans.optimize(whileall(moving(), maxIterations(1000))) centers = kmeans.centers() self.assertArrayAlmostEqual(centers[0], [1.00, 1.01, 1.00], places=1) self.assertArrayAlmostEqual(centers[1], [1.01, 1.00, 9.01], places=1) self.assertArrayAlmostEqual(centers[2], [3.01, 2.01, 5.00], places=1) self.assertArrayAlmostEqual(centers[3], [4.99, 8.00, 4.99], places=1) self.assertArrayAlmostEqual(centers[4], [8.02, 2.00, 7.01], places=1) doc = kmeans.pfaDocument("Cluster", ["one", "two", "three", "four", "five"], preprocess=trans.new(AvroArray(AvroDouble()), x="input[0]", y="input[1]", z="input[2]")) # look(doc, maxDepth=10) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][0]["center"], [1.00, 1.01, 1.00], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][1]["center"], [1.01, 1.00, 9.01], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][2]["center"], [3.01, 2.01, 5.00], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][3]["center"], [4.99, 8.00, 4.99], places=2) self.assertArrayAlmostEqual(doc["cells"]["clusters"]["init"][4]["center"], [8.02, 2.00, 7.01], places=2) engine, = PFAEngine.fromJson(doc) self.assertEqual(engine.action([1.00 * 10, 1.01 * 20, 1.00 * 30]), "one") self.assertEqual(engine.action([1.01 * 10, 1.00 * 20, 9.01 * 30]), "two") self.assertEqual(engine.action([3.01 * 10, 2.01 * 20, 5.00 * 30]), "three") self.assertEqual(engine.action([4.99 * 10, 8.00 * 20, 4.99 * 30]), "four") self.assertEqual(engine.action([8.02 * 10, 2.00 * 20, 7.01 * 30]), "five") if __name__ == "__main__": unittest.main()