weka.core.Instances Java Examples

The following examples show how to use weka.core.Instances. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NormalizeCase.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public void standard(Instances r) throws Exception{
	double mean,sum,sumSq,stdev,x,y;
	int size=r.numAttributes();
	int classIndex=r.classIndex();
	if(classIndex>0)
		size--;
	for(int i=0;i<r.numInstances();i++)
	{
		sum=sumSq=mean=stdev=0;
		for(int j=0;j<r.numAttributes();j++){
		if(j!=classIndex&& !r.attribute(j).isNominal()){// Ignore all nominal atts{
				x=r.instance(i).value(j);
				sum+=x;
			}
			mean=sum/size;
		}
		for(int j=0;j<r.numAttributes();j++){
			if(j!=classIndex&& !r.attribute(j).isNominal()){// Ignore all nominal atts{
				x=r.instance(i).value(j);
				r.instance(i).setValue(j,(x-mean));
			}
		}
	}
}
 
Example #2
Source File: BoTSWEnsemble.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
public BoTSW_Bag buildTestBag(Instance instnc) throws Exception {
    double[][] features = extractFeatures(toArrayNoClass(instnc));

    //cluster/form histograms
    Instances testFeatures = new Instances(clusterData, features.length);
    double[] hist = new double[params.k];
    for (int i = 0; i < features.length; ++i) {
        testFeatures.add(new DenseInstance(1, features[i]));
        int cluster = kmeans.clusterInstance(testFeatures.get(i));
        ++hist[cluster];
    }

    hist = normaliseHistogramSSR(hist);
    hist = normaliseHistograml2(hist);

    return new BoTSW_Bag(hist, instnc.classValue());
}
 
Example #3
Source File: MIOptimalBall.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Calculate the distance from one data point to a bag
 *
 * @param center the data point in instance space
 * @param bag the bag 
 * @return the double value as the distance.
 */
public double minBagDistance (Instance center, Instance bag){
  double distance;
  double minDistance = Double.MAX_VALUE;
  Instances temp = bag.relationalValue(1);  
  //calculate the distance from the data point to each instance in the bag and return the minimum distance 
  for (int i=0; i<temp.numInstances(); i++){
    distance =0;
    for (int j=0; j<center.numAttributes(); j++)
      distance += (center.value(j)-temp.instance(i).value(j))*(center.value(j)-temp.instance(i).value(j));

    if (minDistance>distance)
      minDistance = distance;
  }
  return Math.sqrt(minDistance); 
}
 
Example #4
Source File: RepeatedRuns.java    From meka with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Returns the evaluation statistics generated for the dataset.
 *
 * @param classifier    the classifier to evaluate
 * @param dataset       the dataset to evaluate on
 * @return              the statistics
 */
@Override
public List<EvaluationStatistics> evaluate(MultiLabelClassifier classifier, Instances dataset) {
	List<EvaluationStatistics>  result;

	m_ActualNumThreads = ThreadUtils.getActualNumThreads(m_NumThreads, m_UpperRuns - m_LowerRuns + 1);

	log("Number of threads (" + ThreadUtils.SEQUENTIAL + " = sequential): " + m_ActualNumThreads);
	if (m_ActualNumThreads == ThreadUtils.SEQUENTIAL)
		result = evaluateSequential(classifier, dataset);
	else
		result = evaluateParallel(classifier, dataset);

	if (m_Stopped)
		result.clear();

	return result;
}
 
Example #5
Source File: Reorder.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Sets the format of the input instances.
  *
  * @param instanceInfo an Instances object containing the input instance
  * structure (any instances contained in the object are ignored - only the
  * structure is required).
  * @return true if the outputFormat may be collected immediately
  * @throws Exception if a problem occurs setting the input format
  */
 public boolean setInputFormat(Instances instanceInfo) throws Exception {
   super.setInputFormat(instanceInfo);
   
   FastVector attributes = new FastVector();
   int outputClass = -1;
   m_SelectedAttributes = determineIndices(instanceInfo.numAttributes());
   for (int i = 0; i < m_SelectedAttributes.length; i++) {
     int current = m_SelectedAttributes[i];
     if (instanceInfo.classIndex() == current) {
outputClass = attributes.size();
     }
     Attribute keep = (Attribute)instanceInfo.attribute(current).copy();
     attributes.addElement(keep);
   }
   
   initInputLocators(instanceInfo, m_SelectedAttributes);

   Instances outputFormat = new Instances(instanceInfo.relationName(),
				   attributes, 0); 
   outputFormat.setClassIndex(outputClass);
   setOutputFormat(outputFormat);
   
   return true;
 }
 
Example #6
Source File: PKIDiscretize.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Finds the number of bins to use and creates the cut points.
  *
  * @param index the attribute index
  */
 protected void findNumBins(int index) {

   Instances toFilter = getInputFormat();

   // Find number of instances for attribute where not missing
   int numOfInstances = toFilter.numInstances();
   for (int i = 0; i < toFilter.numInstances(); i++) {
     if (toFilter.instance(i).isMissing(index))
numOfInstances--;
   }

   m_NumBins = (int)(Math.sqrt(numOfInstances));

   if (m_NumBins > 0) {
     calculateCutPointsByEqualFrequencyBinning(index);
   }
 }
 
Example #7
Source File: WekaMatchingRule.java    From winter with Apache License 2.0 6 votes vote down vote up
/**
 * Defines the structure of a Weka table
 * 
 * @param features
 *            Holds all features including a label, if training data is
 *            created.
 * @param datasetName
 *            Holds the dataset´s name
 * @return returns the empty created dataset
 */

private Instances defineDataset(FeatureVectorDataSet features, String datasetName) {
	ArrayList<weka.core.Attribute> attributes = new ArrayList<weka.core.Attribute>();
	// create features per attributes of the FeatureVectorDataSet
	for (Iterator<Attribute> attrIterator = features.getSchema().get().iterator(); attrIterator.hasNext();) {
		Attribute attr = attrIterator.next();
		if (!attr.equals(FeatureVectorDataSet.ATTRIBUTE_LABEL)) {
			weka.core.Attribute attribute = new weka.core.Attribute(attr.getIdentifier());
			attributes.add(attribute);
		}
	}

	// Treat the label as a special case, which is always at the last
	// position of the dataset.
	ArrayList<String> labels = new ArrayList<String>();
	labels.add("1");
	labels.add("0");
	weka.core.Attribute cls = new weka.core.Attribute(FeatureVectorDataSet.ATTRIBUTE_LABEL.getIdentifier(), labels);
	attributes.add(cls);

	Instances dataset = new Instances(datasetName, attributes, 0);
	dataset.setClassIndex(attributes.size() - 1);
	return dataset;
}
 
Example #8
Source File: EvaluationUtils.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
public static double performLDA(final Instances instances) throws Exception {
	List<Instances> split = WekaUtil.getStratifiedSplit(instances, 42, .7f);

	LDA lda = new LDA();

	Instances train = split.get(0);
	if (train.numAttributes() > 5_000) {
		RemoveUseless rem = new RemoveUseless();
		rem.setMaximumVariancePercentageAllowed(0.9);
		rem.setInputFormat(train);
		train = Filter.useFilter(train, rem);
	}
	lda.buildClassifier(train);

	Evaluation eval = new Evaluation(split.get(0));
	eval.evaluateModel(lda, split.get(1));

	return eval.pctCorrect() / 100.0;
}
 
Example #9
Source File: ImageInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Test getDataSetIterator
 */
@Test
public void testGetIterator() throws Exception {
  final Instances metaData = DatasetLoader.loadMiniMnistMeta();
  this.idi.setImagesLocation(new File("datasets/nominal/mnist-minimal"));
  final int batchSize = 1;
  final DataSetIterator it = this.idi.getDataSetIterator(metaData, SEED, batchSize);

  Set<Integer> labels = new HashSet<>();
  for (Instance inst : metaData) {
    int label = Integer.parseInt(inst.stringValue(1));
    final DataSet next = Utils.getNext(it);
    int itLabel = next.getLabels().argMax().getInt(0);
    Assert.assertEquals(label, itLabel);
    labels.add(label);
  }
  final List<Integer> collect =
      it.getLabels().stream().map(Integer::valueOf).collect(Collectors.toList());
  Assert.assertEquals(10, labels.size());
  Assert.assertTrue(labels.containsAll(collect));
  Assert.assertTrue(collect.containsAll(labels));
}
 
Example #10
Source File: BestConf.java    From bestconf with Apache License 2.0 6 votes vote down vote up
public static void getBestPerfFrom(String path){
	try {
		BestConf bestconf = new BestConf();
		Instances trainingSet = DataIOFile.loadDataFromArffFile(path);
		Instance best = trainingSet.firstInstance();
		//set the best configuration to the cluster
		Map<Attribute,Double> attsmap = new HashMap<Attribute,Double>();
		for(int i=0;i<best.numAttributes()-1;i++){
			attsmap.put(best.attribute(i), best.value(i));
		}

		double bestPerf = bestconf.setOptimal(attsmap, "getBestPerfFrom");
		System.out.println("=========================================");
		System.err.println("The actual performance for the best point is : "+bestPerf);
		System.out.println("=========================================");
	} catch (IOException e) {
		e.printStackTrace();
	}
}
 
Example #11
Source File: ResidualSplit.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Prints the condition satisfied by instances in a subset.
  */
 public final String rightSide(int index,Instances data) {

   StringBuffer text;

   text = new StringBuffer();
   if (data.attribute(m_attIndex).isNominal())
     text.append(" = "+
  data.attribute(m_attIndex).value(index));
   else
     if (index == 0)
text.append(" <= "+
    Utils.doubleToString(m_splitPoint,6));
     else
text.append(" > "+
    Utils.doubleToString(m_splitPoint,6));
   return text.toString();
 }
 
Example #12
Source File: SkewedRandomSearch.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void init(Instances input){
    super.init(input);

    cumulativeDistribution = findCumulativeCounts(lengthDistribution);
    //generate the random shapelets we're going to visit.
    for(int i = 0; i< numShapeletsPerSeries; i++){
        //randomly generate values.
        int series = random.nextInt(input.numInstances());
        
        //this gives an index, we assume the length dsitribution is from min-max. so a value of 0 is == minShapeletLength
        int length = sampleCounts(cumulativeDistribution, random) + minShapeletLength; //select the random length from the distribution of lengths.
        int position  = random.nextInt(seriesLength - length); // can only have valid start positions based on the length. (numAtts-1)-l+1
        int dimension = random.nextInt(numDimensions);
        
        //find the shapelets for that series.
        ArrayList<CandidateSearchData> shapeletList = shapeletsToFind.get(series);
        if(shapeletList == null)
            shapeletList = new ArrayList<>();
        
        //add the random shapelet to the length
        shapeletList.add(new CandidateSearchData(position,length,dimension));
        //put back the updated version.
        shapeletsToFind.put(series, shapeletList);
    }
}
 
Example #13
Source File: SimpleMI.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Computes the distribution for a given exemplar
 *
 * @param newBag the exemplar for which distribution is computed
 * @return the distribution
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance newBag)
  throws Exception {

  double [] distribution = new double[2];
  Instances test = new Instances (newBag.dataset(), 0);	
  test.add(newBag);	

  test = transform(test);
  test.deleteAttributeAt(0);
  Instance newInst=test.firstInstance();

  distribution = m_Classifier.distributionForInstance(newInst);

  return distribution;	   
}
 
Example #14
Source File: MLUtils.java    From meka with GNU General Public License v3.0 6 votes vote down vote up
/**
 * AddZtoD - Add attribute space Z[N][H] (N rows of H columns) to Instances D, which should have N rows also.
 * @param	D 	dataset (of N instances)
 * @param	Z	attribute space (of N rows, H columns)
 * @param	L	column to add Z from in D
 */
private static Instances addZtoD(Instances D, double Z[][], int L) {

	int H = Z[0].length;
	int N = D.numInstances();

	// add attributes
	for(int a = 0; a < H; a++) {
		D.insertAttributeAt(new Attribute("A"+a),L+a);
	}

	// add values Z[0]...Z[N] to D
	for(int a = 0; a < H; a++) {
		for(int i = 0; i < N; i++) {
			D.instance(i).setValue(L+a,Z[i][a]);
		}
	}

	D.setClassIndex(L);
	return D;
}
 
Example #15
Source File: BagOfPatterns.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat)
        throws Exception {
    
    //Check all attributes are real valued, otherwise throw exception
    for (int i = 0; i < inputFormat.numAttributes(); i++) {
        if (inputFormat.classIndex() != i) {
            if (!inputFormat.attribute(i).isNumeric()) {
                throw new Exception("Non numeric attribute not allowed for BoP conversion");
            }
        }
    }

    ArrayList<Attribute> attributes = new ArrayList<>();
    for (String word : dictionary) 
        attributes.add(new Attribute(word));
    
    Instances result = new Instances("BagOfPatterns_" + inputFormat.relationName(), attributes, inputFormat.numInstances());
    
    if (inputFormat.classIndex() >= 0) {	//Classification set, set class 
        //Get the class values as a fast vector			
        Attribute target = inputFormat.attribute(inputFormat.classIndex());

        ArrayList<String> vals = new ArrayList<>(target.numValues());
        for (int i = 0; i < target.numValues(); i++) {
            vals.add(target.value(i));
        }
        
        result.insertAttributeAt(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals), result.numAttributes());
        result.setClassIndex(result.numAttributes() - 1);
    }
 
    return result;
}
 
Example #16
Source File: Dl4jMlpClassifier.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Uses the given set of layers to extract features for the given dataset
 * @param layerNames Layer
 * @param input data to featurize
 * @param poolingType pooling type to use
 * @return Instances transformed to the image features
 */
public Instances getActivationsAtLayers(String[] layerNames, Instances input, PoolingType poolingType)
    throws Exception {
  DataSetIterator iter = getDataSetIterator(input);
  INDArray result = null;
  Map<String, Long> attributesPerLayer = new LinkedHashMap<>();

  log.info("Getting features from layers: " + Arrays.toString(layerNames));

  for (String layerName : layerNames) {
    if (attributesPerLayer.containsKey(layerName)) {
      log.warn("Concatenating two identical layers not supported");
      continue;
    }

    INDArray activationsAtLayer = featurizeForLayer(layerName, iter, poolingType);

    attributesPerLayer.put(layerName, activationsAtLayer.shape()[1]);
    if (result == null) {
      result = activationsAtLayer;
    } else {
      // Concatenate the activations of this layer with the other feature extraction layers
      result = Nd4j.concat(1, result, activationsAtLayer);
    }
  }

  result = Utils.appendClasses(result, input);

  return Utils.convertToInstances(result, input, attributesPerLayer);
}
 
Example #17
Source File: UnsupervisedShapelets.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
double[] computeDistances(Instances data){
    double[] distances = new double[data.numInstances()];
    double[] shapelet = zNormalise();

    for (int i = 0; i < data.numInstances(); i++){
        Instance inst = data.get(i);
        distances[i] = Double.MAX_VALUE;
        UShapelet subseries = new UShapelet(0, length, inst);

        //Sliding window calculating distance of each section of the series to the shapelet
        for (int n = 0; n < inst.numAttributes() - length; n++){
            subseries.startPoint = n;
            double dist = euclideanDistance(shapelet, subseries.zNormalise());

            if (dist < distances[i]){
                distances[i] = dist;
            }
        }
    }

    double normaliser = Math.sqrt(length);

    for (int i = 0; i < distances.length; i++){
        distances[i] /= normaliser;
    }

    return distances;
}
 
Example #18
Source File: RDG1.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Initializes the format for the dataset produced. 
 *
 * @return the output data format
 * @throws Exception data format could not be defined 
 */
public Instances defineDataFormat() throws Exception {
  Instances dataset;
  Random random = new Random (getSeed());
  setRandom(random);

  m_DecisionList = new FastVector();

  // number of examples is the same as given per option
  setNumExamplesAct(getNumExamples());

  // define dataset
  dataset = defineDataset(random);
  return dataset; 
}
 
Example #19
Source File: SaveCSV.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns the action lister to use in the menu.
 *
 * @param history   the current history
 * @param index     the selected history item
 * @return          the listener
 */
@Override
public ActionListener getActionListener(final ResultHistoryList history, final int index) {
	final Result result = history.getResultAt(index);

	return new ActionListener() {
		@Override
		public void actionPerformed(ActionEvent e) {
			Instances performance = (Instances) result.getMeasurement(IncrementalPerformance.RESULTS_SAMPLED_OVER_TIME);

			int retVal = getFileChooser().showSaveDialog(null);
			if (retVal != MekaFileChooser.APPROVE_OPTION)
				return;
			File file = getFileChooser().getSelectedFile();


			try {

				CSVSaver saver = new CSVSaver();
				saver.setInstances(performance);
				saver.setFile(getFileChooser().getSelectedFile());
				saver.writeBatch();
			} catch (Exception ex) {
				String msg = "Failed to write to '" + file + "'!";
				System.err.println(msg);
				ex.printStackTrace();
				JOptionPane.showMessageDialog( null, msg + "\n" + e);
			}
		}
	};
}
 
Example #20
Source File: PruneableDecList.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns a newly created tree.
 *
 * @param train train data
 * @param test test data
 * @param leaf
 * @throws Exception if something goes wrong
 */
protected ClassifierDecList getNewDecList(Instances train, Instances test, 
			    boolean leaf) throws Exception {

  PruneableDecList newDecList = 
    new PruneableDecList(m_toSelectModel, m_minNumObj);
  
  newDecList.buildDecList((Instances)train, test, leaf);
  
  return newDecList;
}
 
Example #21
Source File: RBSoDDSOptimization.java    From bestconf with Apache License 2.0 5 votes vote down vote up
private void saveTrainingSet(Instances trainingSet, int round, int subround){
	try {
		DataIOFile.saveDataToArffFile(resumeFolder+"/training_"+round+"_"+subround+"_.arff", samplePoints);
		
		File file = new File(resumeFolder+"/training_"+round+"_"+subround+"_.arff"+"_OK");
		file.createNewFile();
	} catch (IOException e) {
		e.printStackTrace();
	}
}
 
Example #22
Source File: TransformExamples.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public static Instances psTransform(Instances data){
      PowerSpectrum ps=new PowerSpectrum();
      Instances psTrans=null;
      try{
          ps.setInputFormat(data);
          psTrans=Filter.useFilter(data, ps);
          ps.truncate(psTrans, data.numAttributes()/4);
      }catch(Exception e){
              System.out.println(" Exception in ACF harness="+e);
e.printStackTrace();
             System.exit(0);
      }
         return psTrans;
  }
 
Example #23
Source File: BOSSSpatialPyramids_BD.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
protected double[][] MCB(Instances data) {
    double[][][] dfts = new double[data.numInstances()][][];

    int sample = 0;
    for (Instance inst : data) {
        dfts[sample++] = performDFT(disjointWindows(toArrayNoClass(inst))); //approximation
    }

    int numInsts = dfts.length;
    int numWindowsPerInst = dfts[0].length;
    int totalNumWindows = numInsts*numWindowsPerInst;

    breakpoints = new double[wordLength][alphabetSize]; 

    for (int letter = 0; letter < wordLength; ++letter) { //for each dft coeff

        //extract this column from all windows in all instances
        double[] column = new double[totalNumWindows];
        for (int inst = 0; inst < numInsts; ++inst)
            for (int window = 0; window < numWindowsPerInst; ++window) {
                //rounding dft coefficients to reduce noise
                column[(inst * numWindowsPerInst) + window] = Math.round(dfts[inst][window][letter]*100.0)/100.0;   
            }

        //sort, and run through to find breakpoints for equi-depth bins
        Arrays.sort(column);

        double binIndex = 0;
        double targetBinDepth = (double)totalNumWindows / (double)alphabetSize; 

        for (int bp = 0; bp < alphabetSize-1; ++bp) {
            binIndex += targetBinDepth;
            breakpoints[letter][bp] = column[(int)binIndex];
        }

        breakpoints[letter][alphabetSize-1] = Double.MAX_VALUE; //last one can always = infinity
    }

    return breakpoints;
}
 
Example #24
Source File: XMeans.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Split centers in their region. Generates random vector of 
 * length = variance and
 * adds and substractsx to cluster vector to get two new clusters.
 * 
 * @param random random function
 * @param center the center that is split here
 * @param variance variance of the cluster 
 * @param model data model valid
 * @return a pair of new centers
 * @throws Exception something in AlgVector goes wrong
 */
protected Instances splitCenter(Random random,
	        Instance center,
	        double variance,
	        Instances model) throws Exception {
  m_NumSplits++;
  AlgVector r = null;
  Instances children = new Instances(model, 2);

  if (m_DebugVectorsFile.exists() && m_DebugVectorsFile.isFile()) {
    Instance nextVector = getNextDebugVectorsInstance(model);
    PFD(D_RANDOMVECTOR, "Random Vector from File " + nextVector);
    r = new AlgVector(nextVector);
  }
  else {
    // random vector of length = variance
    r = new AlgVector(model, random);
  }
  r.changeLength(Math.pow(variance, 0.5));
  PFD(D_RANDOMVECTOR, "random vector *variance "+ r);
  
  // add random vector to center
  AlgVector c = new AlgVector(center);
  AlgVector c2 = (AlgVector) c.clone();
  c = c.add(r);
  Instance newCenter = c.getAsInstance(model, random);
  children.add(newCenter);
  PFD(D_FOLLOWSPLIT, "first child "+ newCenter);
  
  // substract random vector to center
  c2 = c2.substract(r);
  newCenter = c2.getAsInstance(model, random);
  children.add(newCenter);
  PFD(D_FOLLOWSPLIT, "second child "+ newCenter);

  return children;
}
 
Example #25
Source File: Reciprocal.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public Instances transform(Instances data){
//Not ideal, should call a method to get this
        int responsePos=data.numAttributes()-1;
        double[] response=data.attributeToDoubleArray(responsePos);
//Find the min value
        double min=response[0];
        for(int i=0;i<response.length;i++)
        {
                if(response[i]<min)
                        min=response[i];
        }
        if(min<=zeroOffset)	//Cant take a log of a negative, so offset
        {
                offSet=-min+zeroOffset;
        }
        else
                offSet=0;
        System.out.println(" Min value = "+min+" offset = "+offSet);

        for(int i=0;i<data.numInstances();i++)
        {
            Instance t = data.instance(i);
            double resp=t.value(responsePos);
            System.out.print(i+" "+resp);
            resp=1/(resp+offSet);
            System.out.println(" "+resp);
            t.setValue(responsePos,resp);
        }
        return data;
    }
 
Example #26
Source File: Prism.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Does E contain any examples in the class C?
  *
  * @param E the instances to be checked
  * @param C the class
  * @return true if there are any instances of class C
  * @throws Exception if something goes wrong
  */
 private static boolean contains(Instances E, int C) throws Exception {

   Enumeration enu = E.enumerateInstances();
   while (enu.hasMoreElements()) {
     if ((int) ((Instance) enu.nextElement()).classValue() == C) {
return true;
     }
   }
   return false;
 }
 
Example #27
Source File: LMT.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Builds the classifier.
 *
 * @param data the data to train with
 * @throws Exception if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception{
	
  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances filteredData = new Instances(data);
  filteredData.deleteWithMissingClass();
  
  //replace missing values
  m_replaceMissing = new ReplaceMissingValues();
  m_replaceMissing.setInputFormat(filteredData);	
  filteredData = Filter.useFilter(filteredData, m_replaceMissing);	
	
  //possibly convert nominal attributes globally
  if (m_convertNominal) {	    
    m_nominalToBinary = new NominalToBinary();
    m_nominalToBinary.setInputFormat(filteredData);	
    filteredData = Filter.useFilter(filteredData, m_nominalToBinary);
  }

  int minNumInstances = 2;
	
  //create ModelSelection object, either for splits on the residuals or for splits on the class value 
  ModelSelection modSelection;	
  if (m_splitOnResiduals) {
    modSelection = new ResidualModelSelection(minNumInstances);
  } else {
    modSelection = new C45ModelSelection(minNumInstances, filteredData, true);
  }
	
  //create tree root
  m_tree = new LMTNode(modSelection, m_numBoostingIterations, m_fastRegression, 
	 m_errorOnProbabilities, m_minNumInstances, m_weightTrimBeta, m_useAIC);
  //build tree
  m_tree.buildClassifier(filteredData);

  if (modSelection instanceof C45ModelSelection) ((C45ModelSelection)modSelection).cleanup();
}
 
Example #28
Source File: CollectiveRandomizableSingleClassifierEnhancer.java    From collective-classification-weka-package with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Method for building this classifier.
 * 
 * @param training	the training instances
 * @param test	the test instances
 * @throws Exception	if something goes wrong
 */
public void buildClassifier(Instances training, Instances test) throws Exception {
  m_ClassifierBuilt = true;
  m_Random          = new Random(m_Seed);
  m_Trainset        = new Instances(training);
  m_Testset         = new Instances(test);

  // set class index?
  if ( (m_Trainset.classIndex() == -1) || (m_Testset.classIndex() == -1) ) {
    m_Trainset.setClassIndex(m_Trainset.numAttributes() - 1);
    m_Testset.setClassIndex(m_Trainset.numAttributes() - 1);
  }

  // are datasets correct?
  checkData();

  // any other data restrictions not met?
  checkRestrictions();
  
  // generate sets
  generateSets();
  
  // performs the restarts/iterations
  build();
  
  m_Random = null;
}
 
Example #29
Source File: Wavelet.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Processes the given data (may change the provided dataset) and returns
  * the modified version. This method is called in batchFinished().
  *
  * @param instances   the data to process
  * @return            the modified data
  * @throws Exception  in case the processing goes wrong
  * @see               #batchFinished()
  */
 public Instances process(Instances instances) throws Exception {
   if (!isFirstBatchDone())
     m_Filter.setInputFormat(instances);
   instances = Filter.useFilter(instances, m_Filter);
   
   switch (m_Algorithm) {
     case ALGORITHM_HAAR:
return processHAAR(pad(instances));
     default:
throw new IllegalStateException(
    "Algorithm type '" + m_Algorithm + "' is not recognized!");
   }
 }
 
Example #30
Source File: RegSMO.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * learn SVM parameters from data using Smola's SMO algorithm.
 * Subclasses should implement something more interesting.
 * 
 * @param instances	the data to learn from
 * @throws Exception	if something goes wrong
 */
public void buildClassifier(Instances instances) throws Exception {
  // initialize variables
  init(instances);
  // solve optimization problem
  optimize();
  // clean up
  wrapUp();
}