weka.filters.unsupervised.attribute.Remove Java Examples

The following examples show how to use weka.filters.unsupervised.attribute.Remove. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FilterAttribute.java    From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License 7 votes vote down vote up
/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Datasets/weather.arff");
        Instances dt = src.getDataSet();
        
        String[] op = new String[]{"-R","2-4"};
        Remove rmv = new Remove();
        rmv.setOptions(op);
        rmv.setInputFormat(dt);
        Instances nd = Filter.useFilter(dt, rmv);
        
        ArffSaver s = new ArffSaver();
        s.setInstances(nd);
        s.setFile(new File("fw.arff"));
        s.writeBatch();
    }
    catch(Exception e){
        System.out.println(e.getMessage());
    }
}
 
Example #2
Source File: Cluster.java    From chuidiang-ejemplos with GNU Lesser General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Instances data = GenerateTestVessels.getData();
    data.setClassIndex(-1); // No class index.

    Remove rm = new Remove();
    rm.setAttributeIndices("1");
    rm.setInputFormat(data);
    data = Filter.useFilter(data,rm);
    System.out.println(data);


    EM cw = new EM();

    cw.buildClusterer(data);
    System.out.println(cw);

    System.out.println(cw.clusterInstance(data.firstInstance()));
}
 
Example #3
Source File: WekaClassesToClusterTest.java    From Java-Data-Science-Cookbook with MIT License 6 votes vote down vote up
public void generateClassToCluster(){
	Remove filter = new Remove();
	filter.setAttributeIndices("" + (weather.classIndex() + 1));
	try {
		filter.setInputFormat(weather);
		Instances dataClusterer = Filter.useFilter(weather, filter);
		clusterer = new EM();
		clusterer.buildClusterer(dataClusterer);
		ClusterEvaluation eval = new ClusterEvaluation();
		eval.setClusterer(clusterer);
		eval.evaluateClusterer(weather);

		System.out.println(eval.clusterResultsToString());
	} catch (Exception e) {
	}
}
 
Example #4
Source File: EvaluationUtils.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
private static double performClustering(final Instances insts) throws Exception {
	logger.debug("Starting cluster evaluation...");

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer())
	.setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())});

	clusterer.buildClusterer(removedClassInstances);

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
}
 
Example #5
Source File: SelectWords.java    From hlta with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Keep the words we want.
 * 
 * @param out
 * @param options
 * @throws Exception 
 */
private void removeWords(String output, String[] options, boolean inverse) throws Exception
{
       Remove remove = new Remove(); 
       
       if(inverse)
       {
           remove.setAttributeIndices(options[1]);
           remove.setInvertSelection(true);
       }else
       {
       	remove.setOptions(options); 
       }
       
       remove.setInputFormat(m_instances); 
       
       Instances newData = Filter.useFilter(m_instances, remove);
       
       ArffSaver saver = new ArffSaver();
       saver.setInstances(newData);
       saver.setFile(new File(output));
       saver.writeBatch();
	
}
 
Example #6
Source File: RankingByPairwiseComparison.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
private Instances applyFiltersToDataset(final Instances dataset) throws Exception {
	Remove removeFilter = new Remove();
	removeFilter.setAttributeIndicesArray(this.labelIndices.stream().mapToInt(x -> x).toArray());
	removeFilter.setInvertSelection(false);
	removeFilter.setInputFormat(dataset);
	Instances filteredDataset = Filter.useFilter(dataset, removeFilter);

	Add addTarget = new Add();
	addTarget.setAttributeIndex("last");
	addTarget.setNominalLabels("true,false");
	addTarget.setAttributeName("a>b");
	addTarget.setInputFormat(filteredDataset);
	filteredDataset = Filter.useFilter(filteredDataset, addTarget);
	filteredDataset.setClassIndex(filteredDataset.numAttributes() - 1);
	return filteredDataset;
}
 
Example #7
Source File: F.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Remove Indices - Remove ALL labels (assume they are the first L attributes) from D.
 * @param	D		Dataset
 * @param	L 		number of labels
 * @return	New dataset with labels removed.
 */
public static Instances removeLabels(Instances D, int L) throws Exception {
	Remove remove = new Remove();
	remove.setAttributeIndices("1-"+L);
	remove.setInputFormat(D);
	return Filter.useFilter(D, remove);
}
 
Example #8
Source File: RuleNode.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Build a linear model for this node using those attributes
  * specified in indices.
  *
  * @param indices an array of attribute indices to include in the linear
  * model
  * @throws Exception if something goes wrong
  */
 private void buildLinearModel(int [] indices) throws Exception {
   // copy the training instances and remove all but the tested
   // attributes
   Instances reducedInst = new Instances(m_instances);
   Remove attributeFilter = new Remove();
   
   attributeFilter.setInvertSelection(true);
   attributeFilter.setAttributeIndicesArray(indices);
   attributeFilter.setInputFormat(reducedInst);

   reducedInst = Filter.useFilter(reducedInst, attributeFilter);
   
   // build a linear regression for the training data using the
   // tested attributes
   LinearRegression temp = new LinearRegression();
   temp.buildClassifier(reducedInst);

   double [] lmCoeffs = temp.coefficients();
   double [] coeffs = new double [m_instances.numAttributes()];

   for (int i = 0; i < lmCoeffs.length - 1; i++) {
     if (indices[i] != m_classIndex) {
coeffs[indices[i]] = lmCoeffs[i];
     }
   }
   m_nodeModel = new PreConstructedLinearModel(coeffs, lmCoeffs[lmCoeffs.length - 1]);
   m_nodeModel.buildClassifier(m_instances);
 }
 
Example #9
Source File: WekaFilteredClassifierTest.java    From Java-Data-Science-Cookbook with MIT License 5 votes vote down vote up
public void buildFilteredClassifier(){
	rf = new RandomForest();
	Remove rm = new Remove();
	rm.setAttributeIndices("1");
	FilteredClassifier fc = new FilteredClassifier();
	fc.setFilter(rm);
	fc.setClassifier(rf);
	try{
		fc.buildClassifier(weather);
		for (int i = 0; i < weather.numInstances(); i++){
			double pred = fc.classifyInstance(weather.instance(i));
			System.out.print("given value: " + weather.classAttribute().value((int) weather.instance(i).classValue()));
			System.out.println("---predicted value: " + weather.classAttribute().value((int) pred));
		}
	} catch (Exception e) {
	}
}
 
Example #10
Source File: Clustering.java    From java-ml-projects with Apache License 2.0 5 votes vote down vote up
private List<Series<Number, Number>> buildClusteredSeries() throws Exception {
	List<XYChart.Series<Number, Number>> clusteredSeries = new ArrayList<>();

	// to build the cluster we remove the class information
	Remove remove = new Remove();
	remove.setAttributeIndices("3");
	remove.setInputFormat(data);
	Instances dataToBeClustered = Filter.useFilter(data, remove);

	SimpleKMeans kmeans = new SimpleKMeans();
	kmeans.setSeed(10);
	kmeans.setPreserveInstancesOrder(true);
	kmeans.setNumClusters(3);
	kmeans.buildClusterer(dataToBeClustered);

	IntStream.range(0, 3).mapToObj(i -> {
		Series<Number, Number> newSeries = new XYChart.Series<>();
		newSeries.setName(String.valueOf(i));
		return newSeries;
	}).forEach(clusteredSeries::add);

	int[] assignments = kmeans.getAssignments();
	for (int i = 0; i < assignments.length; i++) {
		int clusterNum = assignments[i];
		clusteredSeries.get(clusterNum).getData().add(instancetoChartData(data.get(i)));
	}

	return clusteredSeries;
}
 
Example #11
Source File: LabelTransformationClassifier.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Returns a new set of instances either only with the labels (labels = true) or
    * only the features (labels = false)
    *
    * @param inst The input instances.
    * @param labels Return labels (true) or features (false)
    */
   protected Instances extractPart(Instances inst, boolean labels) throws Exception{
//TODO Maybe alreade exists somewhere in Meka?

Remove remove = new Remove();
remove.setAttributeIndices("first-"+(inst.classIndex()));
remove.setInvertSelection(labels);
remove.setInputFormat(inst);
return Filter.useFilter(inst, remove);
   }
 
Example #12
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static Instances removeAttributes(final Instances data, final Collection<Integer> attributes) throws Exception {
	Remove remove = new Remove();
	StringBuilder sb = new StringBuilder();
	for (int att : attributes) {
		if (sb.length() != 0) {
			sb.append(",");
		}
		sb.append(att + 1);
	}
	remove.setAttributeIndices(sb.toString());
	remove.setInputFormat(data);
	return Filter.useFilter(data, remove);
}
 
Example #13
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static Instances removeClassAttribute(final Instances data) throws Exception {
	if (data.classIndex() < 0) {
		throw new IllegalArgumentException("Class index of data is not set!");
	}
	Remove remove = new Remove();
	remove.setAttributeIndices("" + (data.classIndex() + 1));
	remove.setInputFormat(data);
	return Filter.useFilter(data, remove);
}
 
Example #14
Source File: ClusterEvaluationTest.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void evaluateTest() throws Exception {
	logger.info("Starting cluster evaluation test...");

	/* load dataset and create a train-test-split */
	OpenmlConnector connector = new OpenmlConnector();
	DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
	File file = connector.datasetGet(ds);
	Instances data = new Instances(new BufferedReader(new FileReader(file)));
	data.setClassIndex(data.numAttributes() - 1);
	List<Instances> split = StratifyUtil.stratifiedSplit(data, 42, .25);

	Instances insts = split.get(0);

	long timeStart = System.currentTimeMillis();

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer()).setOptions(new String[] { "-num-slots", String.valueOf(Runtime.getRuntime().availableProcessors()), "-N", String.valueOf(insts.classAttribute().numValues()) });
	SimpleKMeans kMeans = (SimpleKMeans) clusterer.getClusterer();
	kMeans.setDistanceFunction(new EuclideanDistance());

	clusterer.buildClusterer(removedClassInstances);

	long timeStartEval = System.currentTimeMillis();

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	long timeTaken = System.currentTimeMillis() - timeStart;
	long timeTakenEval = System.currentTimeMillis() - timeStartEval;

	logger.debug("ClusterEvaluator results: " + clusterEval.clusterResultsToString());

	double acc = EvaluationUtils.predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
	Assert.assertTrue(acc > 0);
	logger.info("Acc: " + acc);
	logger.debug("Clustering took " + (timeTaken / 1000) + " s.");
	logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s.");
}
 
Example #15
Source File: BRq.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
	testCapabilities(data);

	int c = data.classIndex();

	if(getDebug()) System.out.print("-: Creating "+c+" models ("+m_Classifier.getClass().getName()+"): ");
	m_MultiClassifiers = AbstractClassifier.makeCopies(m_Classifier,c);

	Instances sub_data = null;

	for(int i = 0; i < c; i++) {

		int indices[][] = new int[c][c - 1];
		for(int j = 0, k = 0; j < c; j++) {
			if(j != i) {
				indices[i][k++] = j;
			}
		}

		//Select only class attribute 'i'
		Remove FilterRemove = new Remove();
		FilterRemove.setAttributeIndicesArray(indices[i]);
		FilterRemove.setInputFormat(data);
		FilterRemove.setInvertSelection(true);
		sub_data = Filter.useFilter(data, FilterRemove);
		sub_data.setClassIndex(0);
		/* BEGIN downsample for this link */
		sub_data.randomize(m_Random);
		int numToRemove = sub_data.numInstances() - (int)Math.round(sub_data.numInstances() * m_DownSampleRatio);
		for(int m = 0, removed = 0; m < sub_data.numInstances(); m++) {
			if (sub_data.instance(m).classValue() <= 0.0) {
				sub_data.instance(m).setClassMissing();
				if (++removed >= numToRemove)
					break;
			}
		}
		sub_data.deleteWithMissingClass();
		/* END downsample for this link */


		//Build the classifier for that class
		m_MultiClassifiers[i].buildClassifier(sub_data);
		if(getDebug()) System.out.print(" " + (i+1));

	}

	if(getDebug()) System.out.println(" :-");

	m_InstancesTemplate = new Instances(sub_data, 0);

}
 
Example #16
Source File: RegressionTask.java    From Machine-Learning-in-Java with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		/*
		 * Load data
		 */
		CSVLoader loader = new CSVLoader();
		loader.setFieldSeparator(",");
		loader.setSource(new File("data/ENB2012_data.csv"));
		Instances data = loader.getDataSet();

		// System.out.println(data);

		/*
		 * Build regression models
		 */
		// set class index to Y1 (heating load)
		data.setClassIndex(data.numAttributes() - 2);
		// remove last attribute Y2
		Remove remove = new Remove();
		remove.setOptions(new String[] { "-R", data.numAttributes() + "" });
		remove.setInputFormat(data);
		data = Filter.useFilter(data, remove);

		// build a regression model
		LinearRegression model = new LinearRegression();
		model.buildClassifier(data);
		System.out.println(model);

		// 10-fold cross-validation
		Evaluation eval = new Evaluation(data);
		eval.crossValidateModel(model, data, 10, new Random(1), new String[] {});
		System.out.println(eval.toSummaryString());
		double coef[] = model.coefficients();
		System.out.println();

		// build a regression tree model

		M5P md5 = new M5P();
		md5.setOptions(new String[] { "" });
		md5.buildClassifier(data);
		System.out.println(md5);

		// 10-fold cross-validation
		eval.crossValidateModel(md5, data, 10, new Random(1), new String[] {});
		System.out.println(eval.toSummaryString());
		System.out.println();
		
		
		
		
		/*
		 * Bonus: Build additional models 
		 */
		
		// ZeroR modelZero = new ZeroR();
		//
		//
		//
		//
		//
		// REPTree modelTree = new REPTree();
		// modelTree.buildClassifier(data);
		// System.out.println(modelTree);
		// eval = new Evaluation(data);
		// eval.crossValidateModel(modelTree, data, 10, new Random(1), new
		// String[]{});
		// System.out.println(eval.toSummaryString());
		//
		// SMOreg modelSVM = new SMOreg();
		//
		// MultilayerPerceptron modelPerc = new MultilayerPerceptron();
		//
		// GaussianProcesses modelGP = new GaussianProcesses();
		// modelGP.buildClassifier(data);
		// System.out.println(modelGP);
		// eval = new Evaluation(data);
		// eval.crossValidateModel(modelGP, data, 10, new Random(1), new
		// String[]{});
		// System.out.println(eval.toSummaryString());

		/*
		 * Bonus: Save ARFF
		 */
		// ArffSaver saver = new ArffSaver();
		// saver.setInstances(data);
		// saver.setFile(new File(args[1]));
		// saver.setDestination(new File(args[1]));
		// saver.writeBatch();

	}
 
Example #17
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public static Instance removeClassAttribute(final Instance inst) throws Exception {
	Remove remove = new Remove();
	remove.setAttributeIndices("" + (inst.classIndex() + 1));
	remove.setInputFormat(inst.dataset());
	return useFilterOnSingleInstance(inst, remove);
}
 
Example #18
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public static Instances removeAttribute(final Instances data, final int attribute) throws Exception {
	Remove remove = new Remove();
	remove.setAttributeIndices("" + (attribute + 1));
	remove.setInputFormat(data);
	return Filter.useFilter(data, remove);
}
 
Example #19
Source File: Apriori.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Removes columns that are all missing from the data
 * 
 * @param instances the instances
 * @return a new set of instances with all missing columns removed
 * @throws Exception if something goes wrong
 */
protected Instances removeMissingColumns(Instances instances)
    throws Exception {

  int numInstances = instances.numInstances();
  StringBuffer deleteString = new StringBuffer();
  int removeCount = 0;
  boolean first = true;
  int maxCount = 0;

  for (int i = 0; i < instances.numAttributes(); i++) {
    AttributeStats as = instances.attributeStats(i);
    if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
      // see if we can decrease this by looking for the most frequent value
      int[] counts = as.nominalCounts;
      if (counts[Utils.maxIndex(counts)] > maxCount) {
        maxCount = counts[Utils.maxIndex(counts)];
      }
    }
    if (as.missingCount == numInstances) {
      if (first) {
        deleteString.append((i + 1));
        first = false;
      } else {
        deleteString.append("," + (i + 1));
      }
      removeCount++;
    }
  }
  if (m_verbose) {
    System.err.println("Removed : " + removeCount
        + " columns with all missing " + "values.");
  }
  if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
    m_upperBoundMinSupport = (double) maxCount / (double) numInstances;
    if (m_verbose) {
      System.err.println("Setting upper bound min support to : "
          + m_upperBoundMinSupport);
    }
  }

  if (deleteString.toString().length() > 0) {
    Remove af = new Remove();
    af.setAttributeIndices(deleteString.toString());
    af.setInvertSelection(false);
    af.setInputFormat(instances);
    Instances newInst = Filter.useFilter(instances, af);

    return newInst;
  }
  return instances;
}
 
Example #20
Source File: EvaluationUtils.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public static double performKernelClustering(final Instances instances, final int numThreads) throws Exception {
	logger.debug("Starting kernelized cluster evaluation...");

	List<Instances> split = WekaUtil.getStratifiedSplit(instances, 42, kernelSplitPortion);

	ExecutorService execService = Executors.newFixedThreadPool(numThreads);
	List<Future<Double>> futures = new ArrayList<>();
	Future<Double> result0 = execService.submit(() ->
	performClustering(new Instances(split.get(0)))
			);
	futures.add(result0);

	for (Map.Entry<Kernel, Instances> entry : getKernelsWithInstances(split.get(0))) {
		if (Thread.currentThread().isInterrupted()) {
			throw new InterruptedException(EVALUATION_STOPPED_MESSAGE);
		}

		Future<Double> result = execService.submit(() -> {
			Kernel kernel = entry.getKey();
			Instances insts = entry.getValue();

			FilteredClusterer clusterer = new FilteredClusterer();

			Remove filter = new Remove();
			filter.setAttributeIndices("" + (insts.classIndex() + 1));
			filter.setInputFormat(insts);

			Instances removedClassInstances = Filter.useFilter(insts, filter);
			Nystroem kernelFilter = new Nystroem();

			kernelFilter.setKernel(kernel);
			clusterer.setFilter(kernelFilter);
			((SimpleKMeans) clusterer.getClusterer())
			.setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())});

			clusterer.buildClusterer(removedClassInstances);

			ClusterEvaluation clusterEval = new ClusterEvaluation();
			clusterEval.setClusterer(clusterer);
			clusterEval.evaluateClusterer(insts);

			return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
		});
		futures.add(result);
	}

	return evaluateFutures(futures);
}
 
Example #21
Source File: WekaMatchingRule.java    From winter with Apache License 2.0 4 votes vote down vote up
/**
 * Apply trained model to a candidate record-pair. Therefore a new
 * FeatureDataSet is created, which is afterwards classified as match or
 * non-match
 * 
 * @param record1
 *            the first record (must not be null)
 * @param record2
 *            the second record (must not be null)
 * @param schemaCorrespondences
 *            the schema correspondences between the first and the second
 *            records
 * @return A correspondence holding the input parameters plus the
 *         classification´s result, which is either match (1.0) or
 *         non-match(0.0).
 */

@Override
public Correspondence<RecordType, SchemaElementType> apply(RecordType record1, RecordType record2,
		Processable<Correspondence<SchemaElementType, Matchable>> schemaCorrespondences) {

	if (this.classifier == null) {
		logger.error("Please initialise a classifier!");
		return null;
	} else {
		FeatureVectorDataSet matchSet = this.initialiseFeatures(record1, record2, schemaCorrespondences);
		Record matchRecord = generateFeatures(record1, record2, schemaCorrespondences, matchSet);

		// transform entry for classification.
		matchSet.add(matchRecord);
		Instances matchInstances = this.transformToWeka(matchSet, this.matchSet);

		// reduce dimensions if feature subset selection was applied before.
		if ((this.backwardSelection || this.forwardSelection) && this.fs != null)
			try {
				Remove removeFilter = new Remove();
				removeFilter.setAttributeIndicesArray(this.fs.selectedAttributes());
				removeFilter.setInvertSelection(true);
				removeFilter.setInputFormat(matchInstances);
				matchInstances = Filter.useFilter(matchInstances, removeFilter);
			} catch (Exception e1) {
				e1.printStackTrace();
			}
		// Apply matching rule
		try {
			double[] distribution = this.classifier.distributionForInstance(matchInstances.firstInstance());
			int positiveClassIndex = matchInstances.attribute(matchInstances.classIndex()).indexOfValue("1");
			double matchConfidence = distribution[positiveClassIndex];
			if (this.isDebugReportActive()) {
				fillSimilarity(record1, record2, matchConfidence);
			}
			return new Correspondence<RecordType, SchemaElementType>(record1, record2, matchConfidence,
					schemaCorrespondences);

			
		} catch (Exception e) {
			e.printStackTrace();
			logger.error(String.format("Classifier Exception for Record '%s': %s",
					matchRecord == null ? "null" : matchRecord.toString(), e.getMessage()));
		}
		return null;
	}
}
 
Example #22
Source File: RandomSubSpace.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * builds the classifier.
  *
  * @param data 	the training data to be used for generating the
  * 			classifier.
  * @throws Exception 	if the classifier could not be built successfully
  */
 public void buildClassifier(Instances data) throws Exception {

   // can classifier handle the data?
   getCapabilities().testWithFail(data);

   // remove instances with missing class
   m_data = new Instances(data);
   m_data.deleteWithMissingClass();
   
   // only class? -> build ZeroR model
   if (m_data.numAttributes() == 1) {
     System.err.println(
  "Cannot build model (only class attribute present in data!), "
  + "using ZeroR model instead!");
     m_ZeroR = new weka.classifiers.rules.ZeroR();
     m_ZeroR.buildClassifier(m_data);
     return;
   }
   else {
     m_ZeroR = null;
   }
   
   super.buildClassifier(data);

   Integer[] indices = new Integer[data.numAttributes()-1];
   int classIndex = data.classIndex();
   int offset = 0;
   for(int i = 0; i < indices.length+1; i++) {
     if (i != classIndex) {
indices[offset++] = i+1;
     }
   }
   int subSpaceSize = numberOfAttributes(indices.length, getSubSpaceSize());
   Random random = data.getRandomNumberGenerator(m_Seed);
   
   for (int j = 0; j < m_Classifiers.length; j++) {
     if (m_Classifier instanceof Randomizable) {
((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
     }
     FilteredClassifier fc = new FilteredClassifier();
     fc.setClassifier(m_Classifiers[j]);
     m_Classifiers[j] = fc;
     Remove rm = new Remove();
     rm.setOptions(new String[]{"-V", "-R", randomSubSpace(indices,subSpaceSize,classIndex+1,random)});
     fc.setFilter(rm);

     // build the classifier
     //m_Classifiers[j].buildClassifier(m_data);
   }
   
   buildClassifiers();
   
   // save memory
   m_data = null;
 }
 
Example #23
Source File: StackingC.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Method that builds meta level.
  * 
  * @param newData the data to work with
  * @param random the random number generator to use for cross-validation
  * @throws Exception if generation fails
  */
 protected void generateMetaLevel(Instances newData, Random random) 
   throws Exception {

   Instances metaData = metaFormat(newData);
   m_MetaFormat = new Instances(metaData, 0);
   for (int j = 0; j < m_NumFolds; j++) {
     Instances train = newData.trainCV(m_NumFolds, j, random);

     // Build base classifiers
     for (int i = 0; i < m_Classifiers.length; i++) {
getClassifier(i).buildClassifier(train);
     }

     // Classify test instances and add to meta data
     Instances test = newData.testCV(m_NumFolds, j);
     for (int i = 0; i < test.numInstances(); i++) {
metaData.add(metaInstance(test.instance(i)));
     }
   }
   
   m_MetaClassifiers = AbstractClassifier.makeCopies(m_MetaClassifier,
				      m_BaseFormat.numClasses());
   
   int [] arrIdc = new int[m_Classifiers.length + 1];
   arrIdc[m_Classifiers.length] = metaData.numAttributes() - 1;
   Instances newInsts;
   for (int i = 0; i < m_MetaClassifiers.length; i++) {
     for (int j = 0; j < m_Classifiers.length; j++) {
arrIdc[j] = m_BaseFormat.numClasses() * j + i;
     }
     m_makeIndicatorFilter = new weka.filters.unsupervised.attribute.MakeIndicator();
     m_makeIndicatorFilter.setAttributeIndex("" + (metaData.classIndex() + 1));
     m_makeIndicatorFilter.setNumeric(true);
     m_makeIndicatorFilter.setValueIndex(i);
     m_makeIndicatorFilter.setInputFormat(metaData);
     newInsts = Filter.useFilter(metaData,m_makeIndicatorFilter);
     
     m_attrFilter = new weka.filters.unsupervised.attribute.Remove();
     m_attrFilter.setInvertSelection(true);
     m_attrFilter.setAttributeIndicesArray(arrIdc);
     m_attrFilter.setInputFormat(m_makeIndicatorFilter.getOutputFormat());
     newInsts = Filter.useFilter(newInsts,m_attrFilter);
     
     newInsts.setClassIndex(newInsts.numAttributes()-1);
     
     m_MetaClassifiers[i].buildClassifier(newInsts);
   }
 }