weka.attributeSelection.AttributeSelection Java Examples

The following examples show how to use weka.attributeSelection.AttributeSelection. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ModelFactory.java    From AIDR with GNU Affero General Public License v3.0 6 votes vote down vote up
private static AttributeSelection getAttributeSelector(
		Instances trainingData) throws Exception {
	AttributeSelection selector = new AttributeSelection();
	InfoGainAttributeEval evaluator = new InfoGainAttributeEval();
	Ranker ranker = new Ranker();
	ranker.setNumToSelect(Math.min(500, trainingData.numAttributes() - 1));
	selector.setEvaluator(evaluator);
	selector.setSearch(ranker);
	selector.SelectAttributes(trainingData);
	return selector;
}
 
Example #2
Source File: WekaFeatureSelectionTest.java    From Java-Data-Science-Cookbook with MIT License 5 votes vote down vote up
public void selectFeatures(){
	AttributeSelection attSelection = new AttributeSelection();
    CfsSubsetEval eval = new CfsSubsetEval();
    BestFirst search = new BestFirst();
    attSelection.setEvaluator(eval);
    attSelection.setSearch(search);
    try {
		attSelection.SelectAttributes(iris);
		int[] attIndex = attSelection.selectedAttributes();
		System.out.println(Utils.arrayToString(attIndex));
	} catch (Exception e) {
	}
}
 
Example #3
Source File: WekaFeatureSelectionTest.java    From Java-Data-Science-Cookbook with MIT License 5 votes vote down vote up
public void selectFeaturesWithFilter(){
	weka.filters.supervised.attribute.AttributeSelection filter = new weka.filters.supervised.attribute.AttributeSelection();
    CfsSubsetEval eval = new CfsSubsetEval();
    BestFirst search = new BestFirst();
    filter.setEvaluator(eval);
    filter.setSearch(search);
    try {
		filter.setInputFormat(iris);
		Instances newData = Filter.useFilter(iris, filter);
		System.out.println(newData);
	} catch (Exception e) {
	}
}
 
Example #4
Source File: MLPipeline.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public MLPipeline(final ASSearch searcher, final ASEvaluation evaluator, final Classifier baseClassifier) {
	super();
	if (baseClassifier == null) {
		throw new IllegalArgumentException("Base classifier must not be null!");
	}
	if (searcher != null && evaluator != null) {
		AttributeSelection selector = new AttributeSelection();
		selector.setSearch(searcher);
		selector.setEvaluator(evaluator);
		this.preprocessors.add(new SupervisedFilterSelector(searcher, evaluator, selector));
	}
	super.setClassifier(baseClassifier);
}
 
Example #5
Source File: SupervisedFilterSelector.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public SupervisedFilterSelector(final ASSearch searcher, final ASEvaluation evaluator) {
	super();
	this.searcher = searcher;
	this.evaluator = evaluator;
	this.selector = new AttributeSelection();
	this.selector.setSearch(searcher);
	this.selector.setEvaluator(evaluator);
}
 
Example #6
Source File: SuvervisedFilterPreprocessor.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public SuvervisedFilterPreprocessor(final ASSearch searcher, final ASEvaluation evaluator) {
	super();
	this.searcher = searcher;
	this.evaluator = evaluator;
	this.selector = new AttributeSelection();
	this.selector.setSearch(searcher);
	this.selector.setEvaluator(evaluator);
}
 
Example #7
Source File: AttributeSelectedClassifier.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Build the classifier on the dimensionally reduced data.
 *
 * @param data the training data
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {
  if (m_Classifier == null) {
    throw new Exception("No base classifier has been set!");
  }

  if (m_Evaluator == null) {
    throw new Exception("No attribute evaluator has been set!");
  }

  if (m_Search == null) {
    throw new Exception("No search method has been set!");
  }
 
  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances newData = new Instances(data);
  newData.deleteWithMissingClass();
  
  if (newData.numInstances() == 0) {
    m_Classifier.buildClassifier(newData);
    return;
  }
  if (newData.classAttribute().isNominal()) {
    m_numClasses = newData.classAttribute().numValues();
  } else {
    m_numClasses = 1;
  }

  Instances resampledData = null;
  // check to see if training data has all equal weights
  double weight = newData.instance(0).weight();
  boolean ok = false;
  for (int i = 1; i < newData.numInstances(); i++) {
    if (newData.instance(i).weight() != weight) {
      ok = true;
      break;
    }
  }
  
  if (ok) {
    if (!(m_Evaluator instanceof WeightedInstancesHandler) || 
        !(m_Classifier instanceof WeightedInstancesHandler)) {
      Random r = new Random(1);
      for (int i = 0; i < 10; i++) {
        r.nextDouble();
      }
      resampledData = newData.resampleWithWeights(r);
    }
  } else {
    // all equal weights in the training data so just use as is
    resampledData = newData;
  }

  m_AttributeSelection = new AttributeSelection();
  m_AttributeSelection.setEvaluator(m_Evaluator);
  m_AttributeSelection.setSearch(m_Search);
  long start = System.currentTimeMillis();
  m_AttributeSelection.
    SelectAttributes((m_Evaluator instanceof WeightedInstancesHandler) 
                     ? newData
                     : resampledData);
  long end = System.currentTimeMillis();
  if (m_Classifier instanceof WeightedInstancesHandler) {
    newData = m_AttributeSelection.reduceDimensionality(newData);
    m_Classifier.buildClassifier(newData);
  } else {
    resampledData = m_AttributeSelection.reduceDimensionality(resampledData);
    m_Classifier.buildClassifier(resampledData);
  }

  long end2 = System.currentTimeMillis();
  m_numAttributesSelected = m_AttributeSelection.numberAttributesSelected();
  m_ReducedHeader = 
    new Instances((m_Classifier instanceof WeightedInstancesHandler) ?
                  newData
                  : resampledData, 0);
  m_selectionTime = (double)(end - start);
  m_totalTime = (double)(end2 - start);
}
 
Example #8
Source File: SupervisedFilterSelector.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public SupervisedFilterSelector(final ASSearch searcher, final ASEvaluation evaluator, final AttributeSelection selector) {
	super();
	this.searcher = searcher;
	this.evaluator = evaluator;
	this.selector = selector;
}
 
Example #9
Source File: SupervisedFilterSelector.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public AttributeSelection getSelector() {
	return this.selector;
}
 
Example #10
Source File: SuvervisedFilterPreprocessor.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public SuvervisedFilterPreprocessor(final ASSearch searcher, final ASEvaluation evaluator, final AttributeSelection selector) {
	super();
	this.searcher = searcher;
	this.evaluator = evaluator;
	this.selector = selector;
}
 
Example #11
Source File: SuvervisedFilterPreprocessor.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
public AttributeSelection getSelector() {
	return this.selector;
}
 
Example #12
Source File: KddCup.java    From Machine-Learning-in-Java with MIT License 4 votes vote down vote up
public static Instances preProcessData(Instances data) throws Exception{
	
	/* 
	 * Remove useless attributes
	 */
	RemoveUseless removeUseless = new RemoveUseless();
	removeUseless.setOptions(new String[] { "-M", "99" });	// threshold
	removeUseless.setInputFormat(data);
	data = Filter.useFilter(data, removeUseless);

	
	/* 
	 * Remove useless attributes
	 */
	ReplaceMissingValues fixMissing = new ReplaceMissingValues();
	fixMissing.setInputFormat(data);
	data = Filter.useFilter(data, fixMissing);
	

	/* 
	 * Remove useless attributes
	 */
	Discretize discretizeNumeric = new Discretize();
	discretizeNumeric.setOptions(new String[] {
			"-O",
			"-M",  "-1.0", 
			"-B",  "4",  // no of bins
			"-R",  "first-last"}); //range of attributes
	fixMissing.setInputFormat(data);
	data = Filter.useFilter(data, fixMissing);

	/* 
	 * Select only informative attributes
	 */
	InfoGainAttributeEval eval = new InfoGainAttributeEval();
	Ranker search = new Ranker();
	search.setOptions(new String[] { "-T", "0.001" });	// information gain threshold
	AttributeSelection attSelect = new AttributeSelection();
	attSelect.setEvaluator(eval);
	attSelect.setSearch(search);
	
	// apply attribute selection
	attSelect.SelectAttributes(data);
	
	// remove the attributes not selected in the last run
	data = attSelect.reduceDimensionality(data);
	
	

	return data;
}
 
Example #13
Source File: ModelFactory.java    From AIDR with GNU Affero General Public License v3.0 4 votes vote down vote up
public static Model buildModel(int crisisID, int attributeID, Model oldModel)
		throws Exception {

	// TODO: Improve model training to try different classifiers and
	// different mixes of old and new data

	// Get training and evaluation data
	Instances trainingSet = DataStore.getTrainingSet(crisisID, attributeID);
	Instances evaluationSet = DataStore.getEvaluationSet(crisisID,
			attributeID, trainingSet);

	if (trainingSet.attribute(trainingSet.numAttributes() - 1).numValues() < 2) {
		logger.info("ModelFactory" + 
				"All training examples have the same label. Postponing training.");
		return oldModel;
	}
	if (evaluationSet.numInstances() < 2) {
		logger.info("ModelFactory" +
				"The evaluation set is too small. Postponing training.");
		return oldModel;
	}

	// Do attribute selection
	AttributeSelection selector = getAttributeSelector(trainingSet);
	trainingSet = selector.reduceDimensionality(trainingSet);
	evaluationSet = selector.reduceDimensionality(evaluationSet);

	// Train classifier
	Classifier classifier = trainClassifier(trainingSet);

	// Create the model object
	Model model = new Model(attributeID, classifier, getTemplateSet(trainingSet));
	model.setTrainingSampleCount(trainingSet.size());

	// Evaluate classifier
	model.evaluate(evaluationSet);
	double newPerformance = model.getWeightedPerformance();
	double oldPerformance = 0;
	if (oldModel != null) {
		oldModel.evaluate(evaluationSet);
		oldPerformance = oldModel.getWeightedPerformance();
	}

	// Koushik: Changed as per ChaTo's suggestion
	/*
       if (newPerformance > oldPerformance - EPSILON) {
           return model;
       } else {
           return oldModel;
       }*/
	if (newPerformance > oldPerformance - PERFORMANCE_IMPROVEMENT_MARGIN) {
		return model;
	} else if( model.getTrainingSampleCount() > oldModel.getTrainingSampleCount() + TRAINING_EXAMPLES_FORCE_RETRAIN) {
		return model;
	} else {
		return oldModel;
	}
}