/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * FilteredClusterer.java
 * Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.clusterers;

import java.util.Enumeration;
import java.util.Vector;

import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.SupervisedFilter;

/**
 <!-- globalinfo-start -->
 * Class for running an arbitrary clusterer on data that has been passed through an arbitrary filter. Like the clusterer, the structure of the filter is based exclusively on the training data and test instances will be processed by the filter without changing their structure.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -F &lt;filter specification&gt;
 *  Full class name of filter to use, followed
 *  by filter options.
 *  eg: "weka.filters.unsupervised.attribute.Remove -V -R 1,2"
 * (default: weka.filters.AllFilter)</pre>
 * 
 * <pre> -W
 *  Full name of base clusterer.
 *  (default: weka.clusterers.SimpleKMeans)</pre>
 * 
 * <pre> 
 * Options specific to clusterer weka.clusterers.SimpleKMeans:
 * </pre>
 * 
 * <pre> -N &lt;num&gt;
 *  number of clusters.
 *  (default 2).</pre>
 * 
 * <pre> -V
 *  Display std. deviations for centroids.
 * </pre>
 * 
 * <pre> -M
 *  Replace missing values with mean/mode.
 * </pre>
 * 
 * <pre> -S &lt;num&gt;
 *  Random number seed.
 *  (default 10)</pre>
 * 
 <!-- options-end -->
 *
 * Based on code from the FilteredClassifier by Len Trigg.
 *
 * @author Len Trigg ([email protected])
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 8034 $
 * @see weka.classifiers.meta.FilteredClassifier
 */
public class FilteredClusterer
  extends SingleClustererEnhancer {

  /** for serialization. */
  private static final long serialVersionUID = 1420005943163412943L;
  
  /** The filter. */
  protected Filter m_Filter;

  /** The instance structure of the filtered instances. */
  protected Instances m_FilteredInstances;

  /**
   * Default constructor.
   */
  public FilteredClusterer() {
    m_Clusterer = new SimpleKMeans();
    m_Filter    = new weka.filters.AllFilter();
  }

  /**
   * Returns a string describing this clusterer.
   * 
   * @return 		a description of the clusterer suitable for
   * 			displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return
        "Class for running an arbitrary clusterer on data that has been passed "
      + "through an arbitrary filter. Like the clusterer, the structure of the filter "
      + "is based exclusively on the training data and test instances will be processed "
      + "by the filter without changing their structure.";
  }

  /**
   * String describing default filter.
   * 
   * @return 		the default filter classname
   */
  protected String defaultFilterString() {
    return weka.filters.AllFilter.class.getName();
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return 		an enumeration of all the available options.
   */
  public Enumeration listOptions() {
    Vector result = new Vector();

    result.addElement(new Option(
	"\tFull class name of filter to use, followed\n"
	+ "\tby filter options.\n"
	+ "\teg: \"weka.filters.unsupervised.attribute.Remove -V -R 1,2\"\n"
	+ "(default: " + defaultFilterString() + ")",
	"F", 1, "-F <filter specification>"));

    Enumeration enm = super.listOptions();
    while (enm.hasMoreElements())
      result.addElement(enm.nextElement());

    return result.elements();
  }

  /**
   * Parses a given list of options. <p/>
   *
   <!-- options-start -->
   * Valid options are: <p/>
   * 
   * <pre> -F &lt;filter specification&gt;
   *  Full class name of filter to use, followed
   *  by filter options.
   *  eg: "weka.filters.unsupervised.attribute.Remove -V -R 1,2"
   * (default: weka.filters.AllFilter)</pre>
   * 
   * <pre> -W
   *  Full name of base clusterer.
   *  (default: weka.clusterers.SimpleKMeans)</pre>
   * 
   * <pre> 
   * Options specific to clusterer weka.clusterers.SimpleKMeans:
   * </pre>
   * 
   * <pre> -N &lt;num&gt;
   *  number of clusters.
   *  (default 2).</pre>
   * 
   * <pre> -V
   *  Display std. deviations for centroids.
   * </pre>
   * 
   * <pre> -M
   *  Replace missing values with mean/mode.
   * </pre>
   * 
   * <pre> -S &lt;num&gt;
   *  Random number seed.
   *  (default 10)</pre>
   * 
   <!-- options-end -->
   *
   * @param options 	the list of options as an array of strings
   * @throws Exception 	if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String	tmpStr;
    String[]	tmpOptions;
    
    tmpStr = Utils.getOption('F', options);
    if (tmpStr.length() > 0) {
      tmpOptions = Utils.splitOptions(tmpStr);
      if (tmpOptions.length == 0)
	throw new IllegalArgumentException("Invalid filter specification string");
      tmpStr = tmpOptions[0];
      tmpOptions[0] = "";
      setFilter((Filter) Utils.forName(Filter.class, tmpStr, tmpOptions));
    } 
    else {
      setFilter(new weka.filters.AllFilter());
    }
    
    super.setOptions(options);
  }

  /**
   * Gets the current settings of the clusterer.
   *
   * @return 		an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {
    Vector	result;
    String[]	options;
    int		i;
    
    result = new Vector();
    
    result.add("-F");
    result.add(getFilterSpec());
    
    options = super.getOptions();
    for (i = 0; i < options.length; i++)
      result.add(options[i]);

    return (String[]) result.toArray(new String[result.size()]);
  }
  
  /**
   * Returns the tip text for this property.
   * 
   * @return 		tip text for this property suitable for
   * 			displaying in the explorer/experimenter gui
   */
  public String filterTipText() {
    return "The filter to be used.";
  }

  /**
   * Sets the filter.
   *
   * @param filter 	the filter with all options set.
   */
  public void setFilter(Filter filter) {
    m_Filter = filter;
    
    if (m_Filter instanceof SupervisedFilter)
      System.out.println(
	  "WARNING: you are using a supervised filter, which will leak "
	  + "information about the class attribute!");
  }

  /**
   * Gets the filter used.
   *
   * @return 		the filter
   */
  public Filter getFilter() {
    return m_Filter;
  }
  
  /**
   * Gets the filter specification string, which contains the class name of
   * the filter and any options to the filter.
   *
   * @return 		the filter string.
   */
  protected String getFilterSpec() {
    String	result;
    Filter 	filter;
    
    filter = getFilter();
    result = filter.getClass().getName();
    
    if (filter instanceof OptionHandler)
      result += " " + Utils.joinOptions(((OptionHandler) filter).getOptions());
    
    return result;
  }

  /**
   * Returns default capabilities of the clusterer.
   *
   * @return		the capabilities of this clusterer
   */
  public Capabilities getCapabilities() {
    Capabilities	result;
    
    if (getFilter() == null) {
      result = super.getCapabilities();
      result.disableAll();
      result.enable(Capability.NO_CLASS);
    } else {
      result = getFilter().getCapabilities();
    }
    
    // set dependencies
    for (Capability cap: Capability.values())
      result.enableDependency(cap);
    
    return result;
  }

  /**
   * Build the clusterer on the filtered data.
   *
   * @param data 	the training data
   * @throws Exception 	if the clusterer could not be built successfully
   */
  public void buildClusterer(Instances data) throws Exception {
    if (m_Clusterer == null)
      throw new Exception("No base clusterer has been set!");

    // remove instances with missing class
    if (data.classIndex() > -1) {
      data = new Instances(data);
      data.deleteWithMissingClass();
    }
    
    m_Filter.setInputFormat(data);  // filter capabilities are checked here
    data = Filter.useFilter(data, m_Filter);

    // can clusterer handle the data?
    getClusterer().getCapabilities().testWithFail(data);

    m_FilteredInstances = data.stringFreeStructure();
    m_Clusterer.buildClusterer(data);
  }

  /**
   * Classifies a given instance after filtering.
   *
   * @param instance 	the instance to be classified
   * @return 		the class distribution for the given instance
   * @throws Exception 	if instance could not be classified
   * 			successfully
   */
  public double[] distributionForInstance(Instance instance)
    throws Exception {

    if (m_Filter.numPendingOutput() > 0)
      throw new Exception("Filter output queue not empty!");
    
    if (!m_Filter.input(instance))
      throw new Exception(
	  "Filter didn't make the test instance immediately available!");
    
    m_Filter.batchFinished();
    Instance newInstance = m_Filter.output();

    return m_Clusterer.distributionForInstance(newInstance);
  }

  /**
   * Output a representation of this clusterer.
   * 
   * @return 		a representation of this clusterer
   */
  public String toString() {
    String 	result;
    
    if (m_FilteredInstances == null)
      result = "FilteredClusterer: No model built yet.";
    else
      result = "FilteredClusterer using "
	+ getClustererSpec()
	+ " on data filtered through "
	+ getFilterSpec()
	+ "\n\nFiltered Header\n"
	+ m_FilteredInstances.toString()
	+ "\n\nClusterer Model\n"
	+ m_Clusterer.toString();
    
    return result;
  }
  
  /**
   * Returns the revision string.
   * 
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 8034 $");
  }

  /**
   * Main method for testing this class.
   *
   * @param args 	the commandline options, use "-h" for help
   */
  public static void main(String [] args) {
    runClusterer(new FilteredClusterer(), args);
  }
}