weka.core.EuclideanDistance Java Examples

The following examples show how to use weka.core.EuclideanDistance. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ED1NN.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public final double distance(Instance first, Instance second, double cutoff) {

        // base case - we're assuming class val is last. If this is true, this method is fine,
        // if not, we'll default to the DTW class
        if (first.classIndex() != first.numAttributes() - 1 || second.classIndex() != second.numAttributes() - 1) {
            EuclideanDistance temp = new EuclideanDistance();
            temp.setDontNormalize(true);
            return temp.distance(first, second, cutoff);
        }

        double sum = 0;
        for (int a = 0; a < first.numAttributes() - 1; a++) {
            sum += (first.value(a) - second.value(a)) * (first.value(a) - second.value(a));
        }

//        return Math.sqrt(sum);
        return sum;
    }
 
Example #2
Source File: LLGC.java    From collective-classification-weka-package with GNU General Public License v3.0 6 votes vote down vote up
/**
 * performs initialization of members
 */
@Override
protected void initializeMembers() {
  super.initializeMembers();
  
  m_TrainsetNew          = null;
  m_TestsetNew           = null;
  m_Alpha                = 0.99;
  m_Sigma                = 1.0;
  m_Repeats              = 0;
  m_SequenceLimit        = SEQ_LIMIT_GRAPHKERNEL;
  m_filterType           = SMO.FILTER_NORMALIZE;
  m_IncludeNumAttributes = true;
  m_MatrixY              = null;
  m_MatrixW              = null;
  m_MatrixD              = null;
  m_MatrixS              = null;
  m_MatrixFStar          = null;
  m_Data                 = null;
  m_DistanceFunction     = new EuclideanDistance();
}
 
Example #3
Source File: KNNAugSpaceSampler.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * @param nearestNeighbour The nearest neighbour search algorithm to use.
 * @author Michael
 *
 */
public KNNAugSpaceSampler(final Instances preciseInsts, final Random rng, final int k, final NearestNeighbourSearch nearestNeighbour) {
	super(preciseInsts, rng);
	this.k = k;
	DistanceFunction dist = new EuclideanDistance(preciseInsts);
	String distOptionColumns = String.format("-R first-%d", preciseInsts.numAttributes() - 1);
	String[] distOptions = {distOptionColumns};

	try {
		dist.setOptions(distOptions);
		nearestNeighbour.setDistanceFunction(dist);
		nearestNeighbour.setInstances(preciseInsts);
	} catch (Exception e) {
		logger.error("Could not configure distance function or setup nearest neighbour: {}", e);
	}
	nearestNeighbour.setMeasurePerformance(false);
	this.nearestNeighbour = nearestNeighbour;
}
 
Example #4
Source File: TopDownConstructor.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Builds the ball tree top down. 
 * @return The root node of the tree. 
 * @throws Exception If there is problem building
 * the tree.
 */
public BallNode buildTree() throws Exception {
  BallNode root;
  
  m_NumNodes = m_MaxDepth = 0;
  m_NumLeaves = 1;
  
  m_Splitter.setInstances(m_Instances);
  m_Splitter.setInstanceList(m_InstList);
  m_Splitter.
  setEuclideanDistanceFunction((EuclideanDistance)m_DistanceFunction);
  
  root = new BallNode(0, m_InstList.length-1, 0);
  root.setPivot(BallNode.calcCentroidPivot(m_InstList, m_Instances));
  root.setRadius(BallNode.calcRadius(m_InstList, m_Instances, root.getPivot(), m_DistanceFunction));
  
  splitNodes(root, m_MaxDepth+1, root.m_Radius);
  
  return root; 
}
 
Example #5
Source File: NearestNeighbourSearch.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Parses a given list of options. Valid options are:
 *
 <!-- options-start -->
 <!-- options-end -->
 *
 * @param options 	the list of options as an array of strings
 * @throws Exception 	if an option is not supported
 */
public void setOptions(String[] options) throws Exception {
  String nnSearchClass = Utils.getOption('A', options);
  if(nnSearchClass.length() != 0) {
    String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass);
    if(nnSearchClassSpec.length == 0) { 
      throw new Exception("Invalid DistanceFunction specification string."); 
    }
    String className = nnSearchClassSpec[0];
    nnSearchClassSpec[0] = "";

    setDistanceFunction( (DistanceFunction)
                          Utils.forName( DistanceFunction.class, 
                                         className, nnSearchClassSpec) );
  }
  else {
    setDistanceFunction(new EuclideanDistance());
  }
  
  setMeasurePerformance(Utils.getFlag('P',options));
}
 
Example #6
Source File: BallTree.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Builds the BallTree on the supplied set of 
 * instances/points (supplied with setInstances(Instances) 
 * method and referenced by the m_Instances field). This
 * method should not be called by outside classes. They
 * should only use setInstances(Instances) method.
 * 
 * @throws Exception If no instances are supplied 
 * (m_Instances is null), or if some other error in the 
 * supplied BallTreeConstructor occurs while building 
 * the tree.  
 */
protected void buildTree() throws Exception {
  if(m_Instances==null)
    throw new Exception("No instances supplied yet. Have to call " +
                        "setInstances(instances) with a set of Instances " +
                        "first.");
  m_InstList = new int[m_Instances.numInstances()];
  
  for(int i=0; i<m_InstList.length; i++) {
    m_InstList[i] = i;
  } //end for
  
  m_DistanceFunction.setInstances(m_Instances);
  m_TreeConstructor.setInstances(m_Instances);
  m_TreeConstructor.setInstanceList(m_InstList);
  m_TreeConstructor.setEuclideanDistanceFunction(
                    (EuclideanDistance)m_DistanceFunction);
  
  m_Root = m_TreeConstructor.buildTree();
}
 
Example #7
Source File: SimpleKMeans.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * sets the distance function to use for instance comparison.
 * 
 * @param df the new distance function to use
 * @throws Exception if instances cannot be processed
 */
public void setDistanceFunction(DistanceFunction df) throws Exception {
  if (!(df instanceof EuclideanDistance)
      && !(df instanceof ManhattanDistance)) {
    throw new Exception(
        "SimpleKMeans currently only supports the Euclidean and Manhattan distances.");
  }
  m_DistanceFunction = df;
}
 
Example #8
Source File: SimpleKMeans.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * clusters an instance that has been through the filters.
 * 
 * @param instance the instance to assign a cluster to
 * @param updateErrors if true, update the within clusters sum of errors
 * @param useFastDistCalc whether to use the fast distance calculation or not
 * @return a cluster number
 */
private int clusterProcessedInstance(Instance instance, boolean updateErrors,
    boolean useFastDistCalc) {
  double minDist = Integer.MAX_VALUE;
  int bestCluster = 0;
  for (int i = 0; i < m_NumClusters; i++) {
    double dist;
    if (useFastDistCalc)
      dist = m_DistanceFunction.distance(instance,
          m_ClusterCentroids.instance(i), minDist);
    else
      dist = m_DistanceFunction.distance(instance,
          m_ClusterCentroids.instance(i));
    if (dist < minDist) {
      minDist = dist;
      bestCluster = i;
    }
  }
  if (updateErrors) {
    if (m_DistanceFunction instanceof EuclideanDistance) {
      // Euclidean distance to Squared Euclidean distance
      minDist *= minDist;
    }
    m_squaredErrors[bestCluster] += minDist;
  }
  return bestCluster;
}
 
Example #9
Source File: EnsembleProvider.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Initializes the CAWPE ensemble model consisting of five classifiers (SMO,
 * KNN, J48, Logistic and MLP) using a majority voting strategy. The ensemble
 * uses Weka classifiers. It refers to "Heterogeneous ensemble of standard
 * classification algorithms" (HESCA) as described in Lines, Jason & Taylor,
 * Sarah & Bagnall, Anthony. (2018). Time Series Classification with HIVE-COTE:
 * The Hierarchical Vote Collective of Transformation-Based Ensembles. ACM
 * Transactions on Knowledge Discovery from Data. 12. 1-35. 10.1145/3182382.
 *
 * @param seed
 *            Seed used within the classifiers and the majority confidence
 *            voting scheme
 * @param numFolds
 *            Number of folds used within the determination of the classifier
 *            weights for the {@link MajorityConfidenceVote}
 * @return Returns an initialized (but untrained) ensemble model.
 * @throws Exception
 *             Thrown when the initialization has failed
 */
public static Classifier provideCAWPEEnsembleModel(final int seed, final int numFolds) throws Exception {
	Classifier[] classifiers = new Classifier[5];

	Vote voter = new MajorityConfidenceVote(numFolds, seed);

	SMO smo = new SMO();
	smo.turnChecksOff();
	smo.setBuildCalibrationModels(true);
	PolyKernel kl = new PolyKernel();
	kl.setExponent(1);
	smo.setKernel(kl);
	smo.setRandomSeed(seed);
	classifiers[0] = smo;

	IBk k = new IBk(100);
	k.setCrossValidate(true);
	EuclideanDistance ed = new EuclideanDistance();
	ed.setDontNormalize(true);
	k.getNearestNeighbourSearchAlgorithm().setDistanceFunction(ed);
	classifiers[1] = k;

	J48 c45 = new J48();
	c45.setSeed(seed);
	classifiers[2] = c45;

	classifiers[3] = new Logistic();

	classifiers[4] = new MultilayerPerceptron();

	voter.setClassifiers(classifiers);
	return voter;
}
 
Example #10
Source File: CAWPE.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Uses the 'basic UCI' set up:
 * Comps: SVML, MLP, NN, Logistic, C4.5
 * Weight: TrainAcc(4) (train accuracies to the power 4)
 * Vote: MajorityConfidence (summing probability distributions)
 */
public final void setupDefaultSettings_NoLogistic() {
    this.ensembleName = "CAWPE-NoLogistic";
    
    this.weightingScheme = new TrainAcc(4);
    this.votingScheme = new MajorityConfidence();
    
    CrossValidationEvaluator cv = new CrossValidationEvaluator(seed, false, false, false, false); 
    cv.setNumFolds(10);
    this.trainEstimator = cv; 
    
    Classifier[] classifiers = new Classifier[4];
    String[] classifierNames = new String[4];

    SMO smo = new SMO();
    smo.turnChecksOff();
    smo.setBuildLogisticModels(true);
    PolyKernel kl = new PolyKernel();
    kl.setExponent(1);
    smo.setKernel(kl);
    smo.setRandomSeed(seed);
    classifiers[0] = smo;
    classifierNames[0] = "SVML";

    kNN k=new kNN(100);
    k.setCrossValidate(true);
    k.normalise(false);
    k.setDistanceFunction(new EuclideanDistance());
    classifiers[1] = k;
    classifierNames[1] = "NN";

    classifiers[2] = new J48();
    classifierNames[2] = "C4.5";

    classifiers[3] = new MultilayerPerceptron();
    classifierNames[3] = "MLP";

    setClassifiers(classifiers, classifierNames, null);
}
 
Example #11
Source File: KDTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * sets the distance function to use for nearest neighbour search.
 * 
 * @param df		the distance function to use
 * @throws Exception	if not EuclideanDistance
 */
public void setDistanceFunction(DistanceFunction df) throws Exception {
  if (!(df instanceof EuclideanDistance))
    throw new Exception("KDTree currently only works with "
        + "EuclideanDistanceFunction.");
  m_DistanceFunction = m_EuclideanDistance = (EuclideanDistance) df;
}
 
Example #12
Source File: KDTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Builds the KDTree on the supplied set of instances/points. It 
 * is adviseable to run the replace missing attributes filter 
 * on the passed instances first.
 * NOTE: This method should not be called from outside this 
 * class. Outside classes should call setInstances(Instances)
 * instead.
 * 
 * @param instances	The instances to build the tree on
 * @throws Exception	if something goes wrong
 */
protected void buildKDTree(Instances instances) throws Exception {

  checkMissing(instances);
  if (m_EuclideanDistance == null)
    m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(
        instances);
  else
    m_EuclideanDistance.setInstances(instances);

  m_Instances = instances;
  int numInst = m_Instances.numInstances();

  // Make the global index list
  m_InstList = new int[numInst];

  for (int i = 0; i < numInst; i++) {
    m_InstList[i] = i;
  }

  double[][] universe = m_EuclideanDistance.getRanges();

  // initializing internal fields of KDTreeSplitter
  m_Splitter.setInstances(m_Instances);
  m_Splitter.setInstanceList(m_InstList);
  m_Splitter.setEuclideanDistanceFunction(m_EuclideanDistance);
  m_Splitter.setNodeWidthNormalization(m_NormalizeNodeWidth);

  // building tree
  m_NumNodes = m_NumLeaves = 1;
  m_MaxDepth = 0;
  m_Root = new KDTreeNode(m_NumNodes, 0, m_Instances.numInstances() - 1,
      universe);

  splitNodes(m_Root, universe, m_MaxDepth + 1);
}
 
Example #13
Source File: CoverTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * Builds the tree on the given set of instances.
 * P.S.: For internal use only. Outside classes 
 * should call setInstances(). 
 * @param insts The instances on which to build 
 * the cover tree.
 * @throws Exception If the supplied set of 
 * Instances is empty, or if there are missing
 * values. 
 */
protected void buildCoverTree(Instances insts) throws Exception {
  if (insts.numInstances() == 0)
    throw new Exception(
 "CoverTree: Empty set of instances. Cannot build tree.");
  checkMissing(insts);
  if (m_EuclideanDistance == null)
    m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(insts);
  else
    m_EuclideanDistance.setInstances(insts);
  
  Stack<DistanceNode> point_set = new Stack<DistanceNode>();
  Stack<DistanceNode> consumed_set = new Stack<DistanceNode>();

  Instance point_p = insts.instance(0); int p_idx = 0;
  double max_dist=-1, dist=0.0; Instance max_q=point_p;
  
  for (int i = 1; i < insts.numInstances(); i++) {
    DistanceNode temp = new DistanceNode();
    temp.dist = new Stack<Double>();
    dist = Math.sqrt(m_DistanceFunction.distance(point_p, insts.instance(i), Double.POSITIVE_INFINITY));
    if(dist > max_dist) {
      max_dist = dist; max_q = insts.instance(i);
    }
    temp.dist.push(dist);
    temp.idx = i;
    point_set.push(temp);
  }
  
    max_dist = max_set(point_set);
    m_Root = batch_insert(p_idx, get_scale(max_dist), get_scale(max_dist),
                          point_set, consumed_set);
}
 
Example #14
Source File: SimpleKMeansWithSilhouette.java    From apogen with Apache License 2.0 5 votes vote down vote up
/**
 * clusters an instance that has been through the filters.
 * 
 * @param instance
 *            the instance to assign a cluster to
 * @param updateErrors
 *            if true, update the within clusters sum of errors
 * @param useFastDistCalc
 *            whether to use the fast distance calculation or not
 * @param instanceCanopies
 *            the canopies covering the instance to be clustered, or null if not
 *            using the option to reduce the number of distance computations via
 *            canopies
 * @return a cluster number
 */
private int clusterProcessedInstance(Instance instance, boolean updateErrors, boolean useFastDistCalc,
		long[] instanceCanopies) {
	double minDist = Integer.MAX_VALUE;
	int bestCluster = 0;
	for (int i = 0; i < m_NumClusters; i++) {
		double dist;
		if (useFastDistCalc) {
			if (m_speedUpDistanceCompWithCanopies && instanceCanopies != null && instanceCanopies.length > 0) {
				try {
					if (!Canopy.nonEmptyCanopySetIntersection(m_centroidCanopyAssignments.get(i),
							instanceCanopies)) {
						continue;
					}
				} catch (Exception ex) {
					ex.printStackTrace();
				}
				dist = m_DistanceFunction.distance(instance, m_ClusterCentroids.instance(i), minDist);
			} else {
				dist = m_DistanceFunction.distance(instance, m_ClusterCentroids.instance(i), minDist);
			}
		} else {
			dist = m_DistanceFunction.distance(instance, m_ClusterCentroids.instance(i));
		}
		if (dist < minDist) {
			minDist = dist;
			bestCluster = i;
		}
	}
	if (updateErrors) {
		if (m_DistanceFunction instanceof EuclideanDistance) {
			// Euclidean distance to Squared Euclidean distance
			minDist *= minDist * instance.weight();
		}
		m_squaredErrors[bestCluster] += minDist;
	}
	return bestCluster;
}
 
Example #15
Source File: BasicDTW.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static void main(String[] args){
//Test BasicDTW
        Instances test = DatasetLoading.loadDataNullable("C:\\Users\\ajb\\Dropbox\\test\\Beef");
        BasicDTW dtw=new BasicDTW(test);
        EuclideanDistance ed=new EuclideanDistance(test);
        ed.setDontNormalize(true);
        System.out.println(" DATA \n"+test.toString());
        System.out.println(" ED ="+ed.distance(test.instance(0),test.instance(1)));
        
        
        
        System.out.println(" ED ="+ed.distance(test.instance(0),test.instance(1),2));
        System.out.println(" DTW ="+dtw.distance(test.instance(0),test.instance(1)));
        System.out.println(" DTW ="+dtw.distance(test.instance(0),test.instance(1),1));



//Test Early abandon
        
        
        
    }
 
Example #16
Source File: CAWPE.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
     * Comps: NN, SVML, SVMQ, C4.5, NB,  RotF, RandF, BN,
     * Weight: TrainAcc
     * Vote: MajorityVote
     *
     * As used originally in ST_HESCA, COTE.
     * NOTE the original also contained Bayes Net (BN). We have removed it because the classifier crashes
     * unpredictably when discretising features (due to lack of variance in the feature, but not easily detected and
     * dealt with
     *
     */
    public final void setupOriginalHESCASettings() {
        this.ensembleName = "HESCA";
        
        this.weightingScheme = new TrainAcc();
        this.votingScheme = new MajorityVote();
        
        CrossValidationEvaluator cv = new CrossValidationEvaluator(seed, false, false, false, false); 
        cv.setNumFolds(10);
        this.trainEstimator = cv; 
        int numClassifiers=7;
        Classifier[] classifiers = new Classifier[numClassifiers];
        String[] classifierNames = new String[numClassifiers];

        kNN k=new kNN(100);
        k.setCrossValidate(true);
        k.normalise(false);
        k.setDistanceFunction(new EuclideanDistance());
        classifiers[0] = k;
        classifierNames[0] = "NN";

        classifiers[1] = new NaiveBayes();
        classifierNames[1] = "NB";

        classifiers[2] = new J48();
        classifierNames[2] = "C45";

        SMO svml = new SMO();
        svml.turnChecksOff();
        PolyKernel kl = new PolyKernel();
        kl.setExponent(1);
        svml.setKernel(kl);
        svml.setRandomSeed(seed);
        classifiers[3] = svml;
        classifierNames[3] = "SVML";

        SMO svmq =new SMO();
//Assumes no missing, all real valued and a discrete class variable
        svmq.turnChecksOff();
        PolyKernel kq = new PolyKernel();
        kq.setExponent(2);
        svmq.setKernel(kq);
        svmq.setRandomSeed(seed);
        classifiers[4] =svmq;
        classifierNames[4] = "SVMQ";

        RandomForest r=new RandomForest();
        r.setNumTrees(500);
        r.setSeed(seed);
        classifiers[5] = r;
        classifierNames[5] = "RandF";

        RotationForest rf=new RotationForest();
        rf.setNumIterations(50);
        rf.setSeed(seed);
        classifiers[6] = rf;
        classifierNames[6] = "RotF";

//        classifiers[7] = new BayesNet();
//        classifierNames[7] = "bayesNet";

        setClassifiers(classifiers, classifierNames, null);
    }
 
Example #17
Source File: CAWPE.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Uses the 'basic UCI' set up:
 * Comps: SVML, MLP, NN, Logistic, C4.5
 * Weight: TrainAcc(4) (train accuracies to the power 4)
 * Vote: MajorityConfidence (summing probability distributions)
 */
@Override //Abstract Ensemble 
public final void setupDefaultEnsembleSettings() {
    this.ensembleName = "CAWPE";
    
    this.weightingScheme = new TrainAcc(4);
    this.votingScheme = new MajorityConfidence();
    this.transform = null;
    
    CrossValidationEvaluator cv = new CrossValidationEvaluator(seed, false, false, false, false); 
    cv.setNumFolds(10);
    this.trainEstimator = cv; 

    Classifier[] classifiers = new Classifier[5];
    String[] classifierNames = new String[5];

    SMO smo = new SMO();
    smo.turnChecksOff();
    smo.setBuildLogisticModels(true);
    PolyKernel kl = new PolyKernel();
    kl.setExponent(1);
    smo.setKernel(kl);
    smo.setRandomSeed(seed);
    classifiers[0] = smo;
    classifierNames[0] = "SVML";

    kNN k=new kNN(100);
    k.setCrossValidate(true);
    k.normalise(false);
    k.setDistanceFunction(new EuclideanDistance());
    classifiers[1] = k;
    classifierNames[1] = "NN";

    classifiers[2] = new J48();
    classifierNames[2] = "C4.5";

    classifiers[3] = new Logistic();
    classifierNames[3] = "Logistic";

    classifiers[4] = new MultilayerPerceptron();
    classifierNames[4] = "MLP";
    
    setClassifiers(classifiers, classifierNames, null);
}
 
Example #18
Source File: HierarchicalClusterer.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Parses a given list of options. <p/>
 *
  <!-- options-start -->
 * Valid options are: <p/>
 * 
  <!-- options-end -->
 *
 * @param options the list of options as an array of strings
 * @throws Exception if an option is not supported
 */
public void setOptions(String[] options) throws Exception {
  m_bPrintNewick = Utils.getFlag('P', options);

  String optionString = Utils.getOption('N', options); 
  if (optionString.length() != 0) {
    Integer temp = new Integer(optionString);
    setNumClusters(temp);
  }
  else {
    setNumClusters(2);
  }

  setDebug(Utils.getFlag('D', options));
  setDistanceIsBranchLength(Utils.getFlag('B', options));

  String sLinkType = Utils.getOption('L', options);


  if (sLinkType.compareTo("SINGLE") == 0) {setLinkType(new SelectedTag(SINGLE, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("COMPLETE") == 0) {setLinkType(new SelectedTag(COMPLETE, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("AVERAGE") == 0) {setLinkType(new SelectedTag(AVERAGE, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("MEAN") == 0) {setLinkType(new SelectedTag(MEAN, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("CENTROID") == 0) {setLinkType(new SelectedTag(CENTROID, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("WARD") == 0) {setLinkType(new SelectedTag(WARD, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("ADJCOMLPETE") == 0) {setLinkType(new SelectedTag(ADJCOMLPETE, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("NEIGHBOR_JOINING") == 0) {setLinkType(new SelectedTag(NEIGHBOR_JOINING, TAGS_LINK_TYPE));}

  String nnSearchClass = Utils.getOption('A', options);
  if(nnSearchClass.length() != 0) {
    String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass);
    if(nnSearchClassSpec.length == 0) { 
      throw new Exception("Invalid DistanceFunction specification string."); 
    }
    String className = nnSearchClassSpec[0];
    nnSearchClassSpec[0] = "";

    setDistanceFunction( (DistanceFunction)
        Utils.forName( DistanceFunction.class, 
            className, nnSearchClassSpec) );
  }
  else {
    setDistanceFunction(new EuclideanDistance());
  }

  Utils.checkForRemainingOptions(options);
}
 
Example #19
Source File: KMedoids.java    From apogen with Apache License 2.0 4 votes vote down vote up
/**
 * Sets the options
 * 
 * @param options
 *            a list of options as an array of strings
 * @throws Exception
 *             if an option is not support
 */
public void setOptions(String[] options) throws Exception {

	// Set the number of the cluster
	String optionString = Utils.getOption('N', options);
	if (optionString.length() != 0) {
		setNumClusters(Integer.parseInt(optionString));
	}

	// Set the number of the maximum iterations
	optionString = Utils.getOption("I", options);
	if (optionString.length() != 0) {
		setMaxIterations(Integer.parseInt(optionString));
	}

	// Set the repeat times
	optionString = Utils.getOption("J", options);
	if (optionString.length() != 0) {
		setRepeatTimes(Integer.parseInt(optionString));
	}

	// Set the distance function
	String distFunctionClass = Utils.getOption('A', options);
	if (distFunctionClass.length() != 0) {
		String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass);
		if (distFunctionClassSpec.length == 0) {
			throw new Exception("Invalid DistanceFunction specification string.");
		}
		String className = distFunctionClassSpec[0];
		distFunctionClassSpec[0] = "";

		setDistanceFunction(
				(DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec));
	} else {
		setDistanceFunction(new EuclideanDistance());
	}

	// Set whether to output the cluster result
	m_SaveClusterResult = Utils.getFlag("s", options);

	// Other options
	super.setOptions(options);
}
 
Example #20
Source File: ClusterEvaluationTest.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void evaluateTest() throws Exception {
	logger.info("Starting cluster evaluation test...");

	/* load dataset and create a train-test-split */
	OpenmlConnector connector = new OpenmlConnector();
	DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
	File file = connector.datasetGet(ds);
	Instances data = new Instances(new BufferedReader(new FileReader(file)));
	data.setClassIndex(data.numAttributes() - 1);
	List<Instances> split = StratifyUtil.stratifiedSplit(data, 42, .25);

	Instances insts = split.get(0);

	long timeStart = System.currentTimeMillis();

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer()).setOptions(new String[] { "-num-slots", String.valueOf(Runtime.getRuntime().availableProcessors()), "-N", String.valueOf(insts.classAttribute().numValues()) });
	SimpleKMeans kMeans = (SimpleKMeans) clusterer.getClusterer();
	kMeans.setDistanceFunction(new EuclideanDistance());

	clusterer.buildClusterer(removedClassInstances);

	long timeStartEval = System.currentTimeMillis();

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	long timeTaken = System.currentTimeMillis() - timeStart;
	long timeTakenEval = System.currentTimeMillis() - timeStartEval;

	logger.debug("ClusterEvaluator results: " + clusterEval.clusterResultsToString());

	double acc = EvaluationUtils.predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
	Assert.assertTrue(acc > 0);
	logger.info("Acc: " + acc);
	logger.debug("Clustering took " + (timeTaken / 1000) + " s.");
	logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s.");
}
 
Example #21
Source File: ED1NN.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static void runComparison() throws Exception {
        String tscProbDir = "C:/users/sjx07ngu/Dropbox/TSC Problems/";

//        String datasetName = "ItalyPowerDemand";
//        String datasetName = "GunPoint";
//        String datasetName = "Beef";
//        String datasetName = "Coffee";
        String datasetName = "SonyAiboRobotSurface1";

        double r = 0.1;
        Instances train = DatasetLoading.loadDataNullable(tscProbDir + datasetName + "/" + datasetName + "_TRAIN");
        Instances test = DatasetLoading.loadDataNullable(tscProbDir + datasetName + "/" + datasetName + "_TEST");

        // old version
        kNN knn = new kNN(); //efaults to k = 1 without any normalisation
        EuclideanDistance oldED = new EuclideanDistance();
        oldED.setDontNormalize(true);
        knn.setDistanceFunction(oldED);
        knn.buildClassifier(train);

        // new version
        ED1NN edNew = new ED1NN();
        edNew.buildClassifier(train);

        int correctOld = 0;
        int correctNew = 0;

        long start, end, oldTime, newTime;
        double pred;

        // classification with old MSM class and kNN
        start = System.nanoTime();

        correctOld = 0;
        for (int i = 0; i < test.numInstances(); i++) {
            pred = knn.classifyInstance(test.instance(i));
            if (pred == test.instance(i).classValue()) {
                correctOld++;
            }
        }
        end = System.nanoTime();
        oldTime = end - start;

        // classification with new MSM and own 1NN
        start = System.nanoTime();
        correctNew = 0;
        for (int i = 0; i < test.numInstances(); i++) {
            pred = edNew.classifyInstance(test.instance(i));
            if (pred == test.instance(i).classValue()) {
                correctNew++;
            }
        }
        end = System.nanoTime();
        newTime = end - start;

        System.out.println("Comparison of MSM: " + datasetName);
        System.out.println("==========================================");
        System.out.println("Old acc:    " + ((double) correctOld / test.numInstances()));
        System.out.println("New acc:    " + ((double) correctNew / test.numInstances()));
        System.out.println("Old timing: " + oldTime);
        System.out.println("New timing: " + newTime);
        System.out.println("Relative Performance: " + ((double) newTime / oldTime));
    }
 
Example #22
Source File: CoverTree.java    From tsml with GNU General Public License v3.0 3 votes vote down vote up
/**
 * Sets the distance function to use for nearest neighbour search.
 * Currently only EuclideanDistance is supported.
 * 
 * @param df 		the distance function to use 
 * @throws Exception 	if not EuclideanDistance
 */
public void setDistanceFunction(DistanceFunction df) throws Exception {
  if (!(df instanceof EuclideanDistance))
    throw new Exception("CoverTree currently only works with "
 + "EuclideanDistanceFunction.");
  m_DistanceFunction = m_EuclideanDistance = (EuclideanDistance) df;
}
 
Example #23
Source File: SimpleKMeansWithSilhouette.java    From apogen with Apache License 2.0 3 votes vote down vote up
/**
 * sets the distance function to use for instance comparison.
 * 
 * @param df
 *            the new distance function to use
 * @throws Exception
 *             if instances cannot be processed
 */
public void setDistanceFunction(DistanceFunction df) throws Exception {
	if (!(df instanceof EuclideanDistance) && !(df instanceof ManhattanDistance)) {
		throw new Exception("SimpleKMeans currently only supports the Euclidean and Manhattan distances.");
	}
	m_DistanceFunction = df;
}
 
Example #24
Source File: KMedoids.java    From apogen with Apache License 2.0 3 votes vote down vote up
/**
 * Sets the distance function to use for instance comparison.
 * 
 * @param df
 *            the new distance function to use
 * @throws Exception
 *             if df is not EuclideanDistance or ManhattanDistance
 */
public void setDistanceFunction(DistanceFunction df) throws Exception {
	if ((df instanceof EuclideanDistance) || (df instanceof ManhattanDistance)) {
		m_DistanceFunction = df;
	} else {
		throw new Exception("MyPAM only support the Euclidean or Manhattan distance.");
	}
}
 
Example #25
Source File: KDTreeNodeSplitter.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Sets the EuclideanDistance object to use for 
 * splitting nodes.
 * @param func The EuclideanDistance object.
 */
public void setEuclideanDistanceFunction(EuclideanDistance func) {
  m_EuclideanDistance = func;
}
 
Example #26
Source File: KDTreeNodeSplitter.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Creates a new instance of KDTreeNodeSplitter.
 * @param instList Reference of the master index array.
 * @param insts The set of training instances on which 
 * the tree is built.
 * @param e The EuclideanDistance object that is used
 * in tree contruction.
 */
public KDTreeNodeSplitter(int[] instList, Instances insts, EuclideanDistance e) { 
  m_InstList = instList;
  m_Instances = insts;
  m_EuclideanDistance = e;
}
 
Example #27
Source File: BallTreeConstructor.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Sets the distance function to use to build the 
 * tree.
 * @param func The distance function.
 */
public void setEuclideanDistanceFunction(EuclideanDistance func) {
  m_DistanceFunction = func;
}
 
Example #28
Source File: MedianOfWidestDimension.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Constructor. 
 * @param instList The master index array.
 * @param insts The instances on which the tree
 * is (or is to be) built.
 * @param e The Euclidean distance function to 
 * use for splitting.
 */
public MedianOfWidestDimension(int[] instList, Instances insts, 
                               EuclideanDistance e) {
  super(instList, insts, e);
}
 
Example #29
Source File: BallSplitter.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Sets the distance function used to (or to be used 
 * to) build the tree. 
 * @param func The distance function. 
 */
public void setEuclideanDistanceFunction(EuclideanDistance func) {
  m_DistanceFunction = func;
}
 
Example #30
Source File: BallSplitter.java    From tsml with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Creates a new instance of BallSplitter.
 * @param instList The master index array.
 * @param insts The instances on which the tree
 * is (or is to be) built.
 * @param e The Euclidean distance function to 
 * use for splitting.
 */
public BallSplitter(int[] instList, Instances insts, EuclideanDistance e) { 
  m_Instlist = instList;
  m_Instances = insts;
  m_DistanceFunction = e;
}