Java Code Examples for weka.filters.unsupervised.attribute.Remove#setAttributeIndices()

The following examples show how to use weka.filters.unsupervised.attribute.Remove#setAttributeIndices() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WekaClassesToClusterTest.java    From Java-Data-Science-Cookbook with MIT License 6 votes vote down vote up
public void generateClassToCluster(){
	Remove filter = new Remove();
	filter.setAttributeIndices("" + (weather.classIndex() + 1));
	try {
		filter.setInputFormat(weather);
		Instances dataClusterer = Filter.useFilter(weather, filter);
		clusterer = new EM();
		clusterer.buildClusterer(dataClusterer);
		ClusterEvaluation eval = new ClusterEvaluation();
		eval.setClusterer(clusterer);
		eval.evaluateClusterer(weather);

		System.out.println(eval.clusterResultsToString());
	} catch (Exception e) {
	}
}
 
Example 2
Source File: EvaluationUtils.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
private static double performClustering(final Instances insts) throws Exception {
	logger.debug("Starting cluster evaluation...");

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer())
	.setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())});

	clusterer.buildClusterer(removedClassInstances);

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
}
 
Example 3
Source File: SelectWords.java    From hlta with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Keep the words we want.
 * 
 * @param out
 * @param options
 * @throws Exception 
 */
private void removeWords(String output, String[] options, boolean inverse) throws Exception
{
       Remove remove = new Remove(); 
       
       if(inverse)
       {
           remove.setAttributeIndices(options[1]);
           remove.setInvertSelection(true);
       }else
       {
       	remove.setOptions(options); 
       }
       
       remove.setInputFormat(m_instances); 
       
       Instances newData = Filter.useFilter(m_instances, remove);
       
       ArffSaver saver = new ArffSaver();
       saver.setInstances(newData);
       saver.setFile(new File(output));
       saver.writeBatch();
	
}
 
Example 4
Source File: Cluster.java    From chuidiang-ejemplos with GNU Lesser General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Instances data = GenerateTestVessels.getData();
    data.setClassIndex(-1); // No class index.

    Remove rm = new Remove();
    rm.setAttributeIndices("1");
    rm.setInputFormat(data);
    data = Filter.useFilter(data,rm);
    System.out.println(data);


    EM cw = new EM();

    cw.buildClusterer(data);
    System.out.println(cw);

    System.out.println(cw.clusterInstance(data.firstInstance()));
}
 
Example 5
Source File: WekaFilteredClassifierTest.java    From Java-Data-Science-Cookbook with MIT License 5 votes vote down vote up
public void buildFilteredClassifier(){
	rf = new RandomForest();
	Remove rm = new Remove();
	rm.setAttributeIndices("1");
	FilteredClassifier fc = new FilteredClassifier();
	fc.setFilter(rm);
	fc.setClassifier(rf);
	try{
		fc.buildClassifier(weather);
		for (int i = 0; i < weather.numInstances(); i++){
			double pred = fc.classifyInstance(weather.instance(i));
			System.out.print("given value: " + weather.classAttribute().value((int) weather.instance(i).classValue()));
			System.out.println("---predicted value: " + weather.classAttribute().value((int) pred));
		}
	} catch (Exception e) {
	}
}
 
Example 6
Source File: Clustering.java    From java-ml-projects with Apache License 2.0 5 votes vote down vote up
private List<Series<Number, Number>> buildClusteredSeries() throws Exception {
	List<XYChart.Series<Number, Number>> clusteredSeries = new ArrayList<>();

	// to build the cluster we remove the class information
	Remove remove = new Remove();
	remove.setAttributeIndices("3");
	remove.setInputFormat(data);
	Instances dataToBeClustered = Filter.useFilter(data, remove);

	SimpleKMeans kmeans = new SimpleKMeans();
	kmeans.setSeed(10);
	kmeans.setPreserveInstancesOrder(true);
	kmeans.setNumClusters(3);
	kmeans.buildClusterer(dataToBeClustered);

	IntStream.range(0, 3).mapToObj(i -> {
		Series<Number, Number> newSeries = new XYChart.Series<>();
		newSeries.setName(String.valueOf(i));
		return newSeries;
	}).forEach(clusteredSeries::add);

	int[] assignments = kmeans.getAssignments();
	for (int i = 0; i < assignments.length; i++) {
		int clusterNum = assignments[i];
		clusteredSeries.get(clusterNum).getData().add(instancetoChartData(data.get(i)));
	}

	return clusteredSeries;
}
 
Example 7
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static Instances removeAttributes(final Instances data, final Collection<Integer> attributes) throws Exception {
	Remove remove = new Remove();
	StringBuilder sb = new StringBuilder();
	for (int att : attributes) {
		if (sb.length() != 0) {
			sb.append(",");
		}
		sb.append(att + 1);
	}
	remove.setAttributeIndices(sb.toString());
	remove.setInputFormat(data);
	return Filter.useFilter(data, remove);
}
 
Example 8
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static Instances removeClassAttribute(final Instances data) throws Exception {
	if (data.classIndex() < 0) {
		throw new IllegalArgumentException("Class index of data is not set!");
	}
	Remove remove = new Remove();
	remove.setAttributeIndices("" + (data.classIndex() + 1));
	remove.setInputFormat(data);
	return Filter.useFilter(data, remove);
}
 
Example 9
Source File: LabelTransformationClassifier.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Returns a new set of instances either only with the labels (labels = true) or
    * only the features (labels = false)
    *
    * @param inst The input instances.
    * @param labels Return labels (true) or features (false)
    */
   protected Instances extractPart(Instances inst, boolean labels) throws Exception{
//TODO Maybe alreade exists somewhere in Meka?

Remove remove = new Remove();
remove.setAttributeIndices("first-"+(inst.classIndex()));
remove.setInvertSelection(labels);
remove.setInputFormat(inst);
return Filter.useFilter(inst, remove);
   }
 
Example 10
Source File: F.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Remove Indices - Remove ALL labels (assume they are the first L attributes) from D.
 * @param	D		Dataset
 * @param	L 		number of labels
 * @return	New dataset with labels removed.
 */
public static Instances removeLabels(Instances D, int L) throws Exception {
	Remove remove = new Remove();
	remove.setAttributeIndices("1-"+L);
	remove.setInputFormat(D);
	return Filter.useFilter(D, remove);
}
 
Example 11
Source File: Apriori.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Removes columns that are all missing from the data
 * 
 * @param instances the instances
 * @return a new set of instances with all missing columns removed
 * @throws Exception if something goes wrong
 */
protected Instances removeMissingColumns(Instances instances)
    throws Exception {

  int numInstances = instances.numInstances();
  StringBuffer deleteString = new StringBuffer();
  int removeCount = 0;
  boolean first = true;
  int maxCount = 0;

  for (int i = 0; i < instances.numAttributes(); i++) {
    AttributeStats as = instances.attributeStats(i);
    if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
      // see if we can decrease this by looking for the most frequent value
      int[] counts = as.nominalCounts;
      if (counts[Utils.maxIndex(counts)] > maxCount) {
        maxCount = counts[Utils.maxIndex(counts)];
      }
    }
    if (as.missingCount == numInstances) {
      if (first) {
        deleteString.append((i + 1));
        first = false;
      } else {
        deleteString.append("," + (i + 1));
      }
      removeCount++;
    }
  }
  if (m_verbose) {
    System.err.println("Removed : " + removeCount
        + " columns with all missing " + "values.");
  }
  if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
    m_upperBoundMinSupport = (double) maxCount / (double) numInstances;
    if (m_verbose) {
      System.err.println("Setting upper bound min support to : "
          + m_upperBoundMinSupport);
    }
  }

  if (deleteString.toString().length() > 0) {
    Remove af = new Remove();
    af.setAttributeIndices(deleteString.toString());
    af.setInvertSelection(false);
    af.setInputFormat(instances);
    Instances newInst = Filter.useFilter(instances, af);

    return newInst;
  }
  return instances;
}
 
Example 12
Source File: ClusterEvaluationTest.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void evaluateTest() throws Exception {
	logger.info("Starting cluster evaluation test...");

	/* load dataset and create a train-test-split */
	OpenmlConnector connector = new OpenmlConnector();
	DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
	File file = connector.datasetGet(ds);
	Instances data = new Instances(new BufferedReader(new FileReader(file)));
	data.setClassIndex(data.numAttributes() - 1);
	List<Instances> split = StratifyUtil.stratifiedSplit(data, 42, .25);

	Instances insts = split.get(0);

	long timeStart = System.currentTimeMillis();

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer()).setOptions(new String[] { "-num-slots", String.valueOf(Runtime.getRuntime().availableProcessors()), "-N", String.valueOf(insts.classAttribute().numValues()) });
	SimpleKMeans kMeans = (SimpleKMeans) clusterer.getClusterer();
	kMeans.setDistanceFunction(new EuclideanDistance());

	clusterer.buildClusterer(removedClassInstances);

	long timeStartEval = System.currentTimeMillis();

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	long timeTaken = System.currentTimeMillis() - timeStart;
	long timeTakenEval = System.currentTimeMillis() - timeStartEval;

	logger.debug("ClusterEvaluator results: " + clusterEval.clusterResultsToString());

	double acc = EvaluationUtils.predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
	Assert.assertTrue(acc > 0);
	logger.info("Acc: " + acc);
	logger.debug("Clustering took " + (timeTaken / 1000) + " s.");
	logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s.");
}
 
Example 13
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public static Instances removeAttribute(final Instances data, final int attribute) throws Exception {
	Remove remove = new Remove();
	remove.setAttributeIndices("" + (attribute + 1));
	remove.setInputFormat(data);
	return Filter.useFilter(data, remove);
}
 
Example 14
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public static Instance removeClassAttribute(final Instance inst) throws Exception {
	Remove remove = new Remove();
	remove.setAttributeIndices("" + (inst.classIndex() + 1));
	remove.setInputFormat(inst.dataset());
	return useFilterOnSingleInstance(inst, remove);
}