Java Code Examples for weka.filters.unsupervised.attribute.Remove#setInvertSelection()

The following examples show how to use weka.filters.unsupervised.attribute.Remove#setInvertSelection() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RankingByPairwiseComparison.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
private Instances applyFiltersToDataset(final Instances dataset) throws Exception {
	Remove removeFilter = new Remove();
	removeFilter.setAttributeIndicesArray(this.labelIndices.stream().mapToInt(x -> x).toArray());
	removeFilter.setInvertSelection(false);
	removeFilter.setInputFormat(dataset);
	Instances filteredDataset = Filter.useFilter(dataset, removeFilter);

	Add addTarget = new Add();
	addTarget.setAttributeIndex("last");
	addTarget.setNominalLabels("true,false");
	addTarget.setAttributeName("a>b");
	addTarget.setInputFormat(filteredDataset);
	filteredDataset = Filter.useFilter(filteredDataset, addTarget);
	filteredDataset.setClassIndex(filteredDataset.numAttributes() - 1);
	return filteredDataset;
}
 
Example 2
Source File: SelectWords.java    From hlta with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Keep the words we want.
 * 
 * @param out
 * @param options
 * @throws Exception 
 */
private void removeWords(String output, String[] options, boolean inverse) throws Exception
{
       Remove remove = new Remove(); 
       
       if(inverse)
       {
           remove.setAttributeIndices(options[1]);
           remove.setInvertSelection(true);
       }else
       {
       	remove.setOptions(options); 
       }
       
       remove.setInputFormat(m_instances); 
       
       Instances newData = Filter.useFilter(m_instances, remove);
       
       ArffSaver saver = new ArffSaver();
       saver.setInstances(newData);
       saver.setFile(new File(output));
       saver.writeBatch();
	
}
 
Example 3
Source File: RuleNode.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Build a linear model for this node using those attributes
  * specified in indices.
  *
  * @param indices an array of attribute indices to include in the linear
  * model
  * @throws Exception if something goes wrong
  */
 private void buildLinearModel(int [] indices) throws Exception {
   // copy the training instances and remove all but the tested
   // attributes
   Instances reducedInst = new Instances(m_instances);
   Remove attributeFilter = new Remove();
   
   attributeFilter.setInvertSelection(true);
   attributeFilter.setAttributeIndicesArray(indices);
   attributeFilter.setInputFormat(reducedInst);

   reducedInst = Filter.useFilter(reducedInst, attributeFilter);
   
   // build a linear regression for the training data using the
   // tested attributes
   LinearRegression temp = new LinearRegression();
   temp.buildClassifier(reducedInst);

   double [] lmCoeffs = temp.coefficients();
   double [] coeffs = new double [m_instances.numAttributes()];

   for (int i = 0; i < lmCoeffs.length - 1; i++) {
     if (indices[i] != m_classIndex) {
coeffs[indices[i]] = lmCoeffs[i];
     }
   }
   m_nodeModel = new PreConstructedLinearModel(coeffs, lmCoeffs[lmCoeffs.length - 1]);
   m_nodeModel.buildClassifier(m_instances);
 }
 
Example 4
Source File: LabelTransformationClassifier.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
    * Returns a new set of instances either only with the labels (labels = true) or
    * only the features (labels = false)
    *
    * @param inst The input instances.
    * @param labels Return labels (true) or features (false)
    */
   protected Instances extractPart(Instances inst, boolean labels) throws Exception{
//TODO Maybe alreade exists somewhere in Meka?

Remove remove = new Remove();
remove.setAttributeIndices("first-"+(inst.classIndex()));
remove.setInvertSelection(labels);
remove.setInputFormat(inst);
return Filter.useFilter(inst, remove);
   }
 
Example 5
Source File: Apriori.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Removes columns that are all missing from the data
 * 
 * @param instances the instances
 * @return a new set of instances with all missing columns removed
 * @throws Exception if something goes wrong
 */
protected Instances removeMissingColumns(Instances instances)
    throws Exception {

  int numInstances = instances.numInstances();
  StringBuffer deleteString = new StringBuffer();
  int removeCount = 0;
  boolean first = true;
  int maxCount = 0;

  for (int i = 0; i < instances.numAttributes(); i++) {
    AttributeStats as = instances.attributeStats(i);
    if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
      // see if we can decrease this by looking for the most frequent value
      int[] counts = as.nominalCounts;
      if (counts[Utils.maxIndex(counts)] > maxCount) {
        maxCount = counts[Utils.maxIndex(counts)];
      }
    }
    if (as.missingCount == numInstances) {
      if (first) {
        deleteString.append((i + 1));
        first = false;
      } else {
        deleteString.append("," + (i + 1));
      }
      removeCount++;
    }
  }
  if (m_verbose) {
    System.err.println("Removed : " + removeCount
        + " columns with all missing " + "values.");
  }
  if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) {
    m_upperBoundMinSupport = (double) maxCount / (double) numInstances;
    if (m_verbose) {
      System.err.println("Setting upper bound min support to : "
          + m_upperBoundMinSupport);
    }
  }

  if (deleteString.toString().length() > 0) {
    Remove af = new Remove();
    af.setAttributeIndices(deleteString.toString());
    af.setInvertSelection(false);
    af.setInputFormat(instances);
    Instances newInst = Filter.useFilter(instances, af);

    return newInst;
  }
  return instances;
}
 
Example 6
Source File: WekaMatchingRule.java    From winter with Apache License 2.0 4 votes vote down vote up
/**
 * Apply trained model to a candidate record-pair. Therefore a new
 * FeatureDataSet is created, which is afterwards classified as match or
 * non-match
 * 
 * @param record1
 *            the first record (must not be null)
 * @param record2
 *            the second record (must not be null)
 * @param schemaCorrespondences
 *            the schema correspondences between the first and the second
 *            records
 * @return A correspondence holding the input parameters plus the
 *         classification´s result, which is either match (1.0) or
 *         non-match(0.0).
 */

@Override
public Correspondence<RecordType, SchemaElementType> apply(RecordType record1, RecordType record2,
		Processable<Correspondence<SchemaElementType, Matchable>> schemaCorrespondences) {

	if (this.classifier == null) {
		logger.error("Please initialise a classifier!");
		return null;
	} else {
		FeatureVectorDataSet matchSet = this.initialiseFeatures(record1, record2, schemaCorrespondences);
		Record matchRecord = generateFeatures(record1, record2, schemaCorrespondences, matchSet);

		// transform entry for classification.
		matchSet.add(matchRecord);
		Instances matchInstances = this.transformToWeka(matchSet, this.matchSet);

		// reduce dimensions if feature subset selection was applied before.
		if ((this.backwardSelection || this.forwardSelection) && this.fs != null)
			try {
				Remove removeFilter = new Remove();
				removeFilter.setAttributeIndicesArray(this.fs.selectedAttributes());
				removeFilter.setInvertSelection(true);
				removeFilter.setInputFormat(matchInstances);
				matchInstances = Filter.useFilter(matchInstances, removeFilter);
			} catch (Exception e1) {
				e1.printStackTrace();
			}
		// Apply matching rule
		try {
			double[] distribution = this.classifier.distributionForInstance(matchInstances.firstInstance());
			int positiveClassIndex = matchInstances.attribute(matchInstances.classIndex()).indexOfValue("1");
			double matchConfidence = distribution[positiveClassIndex];
			if (this.isDebugReportActive()) {
				fillSimilarity(record1, record2, matchConfidence);
			}
			return new Correspondence<RecordType, SchemaElementType>(record1, record2, matchConfidence,
					schemaCorrespondences);

			
		} catch (Exception e) {
			e.printStackTrace();
			logger.error(String.format("Classifier Exception for Record '%s': %s",
					matchRecord == null ? "null" : matchRecord.toString(), e.getMessage()));
		}
		return null;
	}
}
 
Example 7
Source File: BRq.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
	testCapabilities(data);

	int c = data.classIndex();

	if(getDebug()) System.out.print("-: Creating "+c+" models ("+m_Classifier.getClass().getName()+"): ");
	m_MultiClassifiers = AbstractClassifier.makeCopies(m_Classifier,c);

	Instances sub_data = null;

	for(int i = 0; i < c; i++) {

		int indices[][] = new int[c][c - 1];
		for(int j = 0, k = 0; j < c; j++) {
			if(j != i) {
				indices[i][k++] = j;
			}
		}

		//Select only class attribute 'i'
		Remove FilterRemove = new Remove();
		FilterRemove.setAttributeIndicesArray(indices[i]);
		FilterRemove.setInputFormat(data);
		FilterRemove.setInvertSelection(true);
		sub_data = Filter.useFilter(data, FilterRemove);
		sub_data.setClassIndex(0);
		/* BEGIN downsample for this link */
		sub_data.randomize(m_Random);
		int numToRemove = sub_data.numInstances() - (int)Math.round(sub_data.numInstances() * m_DownSampleRatio);
		for(int m = 0, removed = 0; m < sub_data.numInstances(); m++) {
			if (sub_data.instance(m).classValue() <= 0.0) {
				sub_data.instance(m).setClassMissing();
				if (++removed >= numToRemove)
					break;
			}
		}
		sub_data.deleteWithMissingClass();
		/* END downsample for this link */


		//Build the classifier for that class
		m_MultiClassifiers[i].buildClassifier(sub_data);
		if(getDebug()) System.out.print(" " + (i+1));

	}

	if(getDebug()) System.out.println(" :-");

	m_InstancesTemplate = new Instances(sub_data, 0);

}