Java Code Examples for weka.core.Instances

The following examples show how to use weka.core.Instances. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: tsml   Source File: SimpleMI.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * Computes the distribution for a given exemplar
 *
 * @param newBag the exemplar for which distribution is computed
 * @return the distribution
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance newBag)
  throws Exception {

  double [] distribution = new double[2];
  Instances test = new Instances (newBag.dataset(), 0);	
  test.add(newBag);	

  test = transform(test);
  test.deleteAttributeAt(0);
  Instance newInst=test.firstInstance();

  distribution = m_Classifier.distributionForInstance(newInst);

  return distribution;	   
}
 
Example 2
Source Project: bestconf   Source File: BestConf.java    License: Apache License 2.0 6 votes vote down vote up
public static void getBestPerfFrom(String path){
	try {
		BestConf bestconf = new BestConf();
		Instances trainingSet = DataIOFile.loadDataFromArffFile(path);
		Instance best = trainingSet.firstInstance();
		//set the best configuration to the cluster
		Map<Attribute,Double> attsmap = new HashMap<Attribute,Double>();
		for(int i=0;i<best.numAttributes()-1;i++){
			attsmap.put(best.attribute(i), best.value(i));
		}

		double bestPerf = bestconf.setOptimal(attsmap, "getBestPerfFrom");
		System.out.println("=========================================");
		System.err.println("The actual performance for the best point is : "+bestPerf);
		System.out.println("=========================================");
	} catch (IOException e) {
		e.printStackTrace();
	}
}
 
Example 3
Source Project: tsml   Source File: PKIDiscretize.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
  * Finds the number of bins to use and creates the cut points.
  *
  * @param index the attribute index
  */
 protected void findNumBins(int index) {

   Instances toFilter = getInputFormat();

   // Find number of instances for attribute where not missing
   int numOfInstances = toFilter.numInstances();
   for (int i = 0; i < toFilter.numInstances(); i++) {
     if (toFilter.instance(i).isMissing(index))
numOfInstances--;
   }

   m_NumBins = (int)(Math.sqrt(numOfInstances));

   if (m_NumBins > 0) {
     calculateCutPointsByEqualFrequencyBinning(index);
   }
 }
 
Example 4
Source Project: tsml   Source File: BoTSWEnsemble.java    License: GNU General Public License v3.0 6 votes vote down vote up
public BoTSW_Bag buildTestBag(Instance instnc) throws Exception {
    double[][] features = extractFeatures(toArrayNoClass(instnc));

    //cluster/form histograms
    Instances testFeatures = new Instances(clusterData, features.length);
    double[] hist = new double[params.k];
    for (int i = 0; i < features.length; ++i) {
        testFeatures.add(new DenseInstance(1, features[i]));
        int cluster = kmeans.clusterInstance(testFeatures.get(i));
        ++hist[cluster];
    }

    hist = normaliseHistogramSSR(hist);
    hist = normaliseHistograml2(hist);

    return new BoTSW_Bag(hist, instnc.classValue());
}
 
Example 5
Source Project: tsml   Source File: Reorder.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
  * Sets the format of the input instances.
  *
  * @param instanceInfo an Instances object containing the input instance
  * structure (any instances contained in the object are ignored - only the
  * structure is required).
  * @return true if the outputFormat may be collected immediately
  * @throws Exception if a problem occurs setting the input format
  */
 public boolean setInputFormat(Instances instanceInfo) throws Exception {
   super.setInputFormat(instanceInfo);
   
   FastVector attributes = new FastVector();
   int outputClass = -1;
   m_SelectedAttributes = determineIndices(instanceInfo.numAttributes());
   for (int i = 0; i < m_SelectedAttributes.length; i++) {
     int current = m_SelectedAttributes[i];
     if (instanceInfo.classIndex() == current) {
outputClass = attributes.size();
     }
     Attribute keep = (Attribute)instanceInfo.attribute(current).copy();
     attributes.addElement(keep);
   }
   
   initInputLocators(instanceInfo, m_SelectedAttributes);

   Instances outputFormat = new Instances(instanceInfo.relationName(),
				   attributes, 0); 
   outputFormat.setClassIndex(outputClass);
   setOutputFormat(outputFormat);
   
   return true;
 }
 
Example 6
public static double performLDA(final Instances instances) throws Exception {
	List<Instances> split = WekaUtil.getStratifiedSplit(instances, 42, .7f);

	LDA lda = new LDA();

	Instances train = split.get(0);
	if (train.numAttributes() > 5_000) {
		RemoveUseless rem = new RemoveUseless();
		rem.setMaximumVariancePercentageAllowed(0.9);
		rem.setInputFormat(train);
		train = Filter.useFilter(train, rem);
	}
	lda.buildClassifier(train);

	Evaluation eval = new Evaluation(split.get(0));
	eval.evaluateModel(lda, split.get(1));

	return eval.pctCorrect() / 100.0;
}
 
Example 7
Source Project: tsml   Source File: ResidualSplit.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
  * Prints the condition satisfied by instances in a subset.
  */
 public final String rightSide(int index,Instances data) {

   StringBuffer text;

   text = new StringBuffer();
   if (data.attribute(m_attIndex).isNominal())
     text.append(" = "+
  data.attribute(m_attIndex).value(index));
   else
     if (index == 0)
text.append(" <= "+
    Utils.doubleToString(m_splitPoint,6));
     else
text.append(" > "+
    Utils.doubleToString(m_splitPoint,6));
   return text.toString();
 }
 
Example 8
Source Project: meka   Source File: MLUtils.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * AddZtoD - Add attribute space Z[N][H] (N rows of H columns) to Instances D, which should have N rows also.
 * @param	D 	dataset (of N instances)
 * @param	Z	attribute space (of N rows, H columns)
 * @param	L	column to add Z from in D
 */
private static Instances addZtoD(Instances D, double Z[][], int L) {

	int H = Z[0].length;
	int N = D.numInstances();

	// add attributes
	for(int a = 0; a < H; a++) {
		D.insertAttributeAt(new Attribute("A"+a),L+a);
	}

	// add values Z[0]...Z[N] to D
	for(int a = 0; a < H; a++) {
		for(int i = 0; i < N; i++) {
			D.instance(i).setValue(L+a,Z[i][a]);
		}
	}

	D.setClassIndex(L);
	return D;
}
 
Example 9
Source Project: tsml   Source File: SkewedRandomSearch.java    License: GNU General Public License v3.0 6 votes vote down vote up
@Override
public void init(Instances input){
    super.init(input);

    cumulativeDistribution = findCumulativeCounts(lengthDistribution);
    //generate the random shapelets we're going to visit.
    for(int i = 0; i< numShapeletsPerSeries; i++){
        //randomly generate values.
        int series = random.nextInt(input.numInstances());
        
        //this gives an index, we assume the length dsitribution is from min-max. so a value of 0 is == minShapeletLength
        int length = sampleCounts(cumulativeDistribution, random) + minShapeletLength; //select the random length from the distribution of lengths.
        int position  = random.nextInt(seriesLength - length); // can only have valid start positions based on the length. (numAtts-1)-l+1
        int dimension = random.nextInt(numDimensions);
        
        //find the shapelets for that series.
        ArrayList<CandidateSearchData> shapeletList = shapeletsToFind.get(series);
        if(shapeletList == null)
            shapeletList = new ArrayList<>();
        
        //add the random shapelet to the length
        shapeletList.add(new CandidateSearchData(position,length,dimension));
        //put back the updated version.
        shapeletsToFind.put(series, shapeletList);
    }
}
 
Example 10
Source Project: meka   Source File: RepeatedRuns.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * Returns the evaluation statistics generated for the dataset.
 *
 * @param classifier    the classifier to evaluate
 * @param dataset       the dataset to evaluate on
 * @return              the statistics
 */
@Override
public List<EvaluationStatistics> evaluate(MultiLabelClassifier classifier, Instances dataset) {
	List<EvaluationStatistics>  result;

	m_ActualNumThreads = ThreadUtils.getActualNumThreads(m_NumThreads, m_UpperRuns - m_LowerRuns + 1);

	log("Number of threads (" + ThreadUtils.SEQUENTIAL + " = sequential): " + m_ActualNumThreads);
	if (m_ActualNumThreads == ThreadUtils.SEQUENTIAL)
		result = evaluateSequential(classifier, dataset);
	else
		result = evaluateParallel(classifier, dataset);

	if (m_Stopped)
		result.clear();

	return result;
}
 
Example 11
Source Project: tsml   Source File: MIOptimalBall.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * Calculate the distance from one data point to a bag
 *
 * @param center the data point in instance space
 * @param bag the bag 
 * @return the double value as the distance.
 */
public double minBagDistance (Instance center, Instance bag){
  double distance;
  double minDistance = Double.MAX_VALUE;
  Instances temp = bag.relationalValue(1);  
  //calculate the distance from the data point to each instance in the bag and return the minimum distance 
  for (int i=0; i<temp.numInstances(); i++){
    distance =0;
    for (int j=0; j<center.numAttributes(); j++)
      distance += (center.value(j)-temp.instance(i).value(j))*(center.value(j)-temp.instance(i).value(j));

    if (minDistance>distance)
      minDistance = distance;
  }
  return Math.sqrt(minDistance); 
}
 
Example 12
Source Project: winter   Source File: WekaMatchingRule.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Defines the structure of a Weka table
 * 
 * @param features
 *            Holds all features including a label, if training data is
 *            created.
 * @param datasetName
 *            Holds the dataset´s name
 * @return returns the empty created dataset
 */

private Instances defineDataset(FeatureVectorDataSet features, String datasetName) {
	ArrayList<weka.core.Attribute> attributes = new ArrayList<weka.core.Attribute>();
	// create features per attributes of the FeatureVectorDataSet
	for (Iterator<Attribute> attrIterator = features.getSchema().get().iterator(); attrIterator.hasNext();) {
		Attribute attr = attrIterator.next();
		if (!attr.equals(FeatureVectorDataSet.ATTRIBUTE_LABEL)) {
			weka.core.Attribute attribute = new weka.core.Attribute(attr.getIdentifier());
			attributes.add(attribute);
		}
	}

	// Treat the label as a special case, which is always at the last
	// position of the dataset.
	ArrayList<String> labels = new ArrayList<String>();
	labels.add("1");
	labels.add("0");
	weka.core.Attribute cls = new weka.core.Attribute(FeatureVectorDataSet.ATTRIBUTE_LABEL.getIdentifier(), labels);
	attributes.add(cls);

	Instances dataset = new Instances(datasetName, attributes, 0);
	dataset.setClassIndex(attributes.size() - 1);
	return dataset;
}
 
Example 13
Source Project: tsml   Source File: NormalizeCase.java    License: GNU General Public License v3.0 6 votes vote down vote up
public void standard(Instances r) throws Exception{
	double mean,sum,sumSq,stdev,x,y;
	int size=r.numAttributes();
	int classIndex=r.classIndex();
	if(classIndex>0)
		size--;
	for(int i=0;i<r.numInstances();i++)
	{
		sum=sumSq=mean=stdev=0;
		for(int j=0;j<r.numAttributes();j++){
		if(j!=classIndex&& !r.attribute(j).isNominal()){// Ignore all nominal atts{
				x=r.instance(i).value(j);
				sum+=x;
			}
			mean=sum/size;
		}
		for(int j=0;j<r.numAttributes();j++){
			if(j!=classIndex&& !r.attribute(j).isNominal()){// Ignore all nominal atts{
				x=r.instance(i).value(j);
				r.instance(i).setValue(j,(x-mean));
			}
		}
	}
}
 
Example 14
/**
 * Test getDataSetIterator
 */
@Test
public void testGetIterator() throws Exception {
  final Instances metaData = DatasetLoader.loadMiniMnistMeta();
  this.idi.setImagesLocation(new File("datasets/nominal/mnist-minimal"));
  final int batchSize = 1;
  final DataSetIterator it = this.idi.getDataSetIterator(metaData, SEED, batchSize);

  Set<Integer> labels = new HashSet<>();
  for (Instance inst : metaData) {
    int label = Integer.parseInt(inst.stringValue(1));
    final DataSet next = Utils.getNext(it);
    int itLabel = next.getLabels().argMax().getInt(0);
    Assert.assertEquals(label, itLabel);
    labels.add(label);
  }
  final List<Integer> collect =
      it.getLabels().stream().map(Integer::valueOf).collect(Collectors.toList());
  Assert.assertEquals(10, labels.size());
  Assert.assertTrue(labels.containsAll(collect));
  Assert.assertTrue(collect.containsAll(labels));
}
 
Example 15
Source Project: tsml   Source File: BIRCHCluster.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
  * Generate all examples of the dataset. 
  * 
  * @param random the random number generator to use
  * @param format the dataset format
  * @return the instance generated
  * @throws Exception if format not defined
  */
 public Instances generateExamples(Random random,
			    Instances format) throws Exception {
   Instance example = null;
   
   if (format == null) 
     throw new Exception("Dataset format not defined.");

   // generate examples for one cluster after another
   int cNum = 0;
   for (Enumeration enm = m_ClusterList.elements();
 enm.hasMoreElements(); cNum++) {
     Cluster cl  = (Cluster) enm.nextElement();
     double stdDev = cl.getStdDev();
     int instNum = cl.getInstNum();
     double[] center = cl.getCenter();
     String cName = "c" + cNum;

     for (int i = 0; i < instNum; i++) {
// generate example
example = generateInstance(
                   format, random, stdDev, center, cName);
      
if (example != null)
  example.setDataset(format);
format.add(example);
     }
   }

   return (format);
 }
 
Example 16
public void ProcessTables(int[] table_array)
{
	
	DataBase();
	int execCount = 0;
	try {
		String SQL = "SELECT * from ArtTable where HasXML='yes' and idTable in "+Arrays.toString(table_array);
		SQL = SQL.replace("[", "(").replace("]", ")");
		Statement st = conn.createStatement();
		Instances instances = CreateInstances();
		FastVector fvWekaAttributes = new FastVector(48);
		rs = st.executeQuery(SQL);
		while (rs.next()) {
			Instance iExample = processTable(rs.getInt(1));
			instances.add(iExample);
			

			execCount ++;
			if(execCount>10000){
				conn.close();
				DataBase();
				execCount = 0;	
			}

		}
		System.out.println(instances.toString());
		ArffSaver saver = new ArffSaver();
		 saver.setInstances(instances);
		 saver.setFile(new File("spptest10.arff"));
		 //saver.setDestination(new File("./data/test.arff"));   // **not** necessary in 3.5.4 and later
		 saver.writeBatch();
	} catch (Exception ex) {
		ex.printStackTrace();
	}
}
 
Example 17
Source Project: meka   Source File: MLUtils.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * CountCombinations - return a mapping of each distinct label combination and its count.
 * NOTE: A sparse representation would be much better for many applications, i.e., instead of using toBitString(...), use toSparseRepresentation(...) instead.
 * @param	D	dataset 
 * @param	L	number of labels
 * @return	a HashMap where a String representation of each label combination is associated with an Integer count, e.g., "00010010",3
 */
public static final HashMap<String,Integer> countCombinations(Instances D, int L) {
	HashMap<String,Integer> map = new HashMap<String,Integer>();  
	for (int i = 0; i < D.numInstances(); i++) {
		//String y = MLUtils.toSparseRepresentation(D.instance(i),L);
		String y = MLUtils.toBitString(D.instance(i),L);
		Integer c = map.get(y);
		map.put(y,c == null ? 1 : c+1);
	}
	return map;
}
 
Example 18
@Test
public void ensembleEvaluatorTest() throws Exception {
    logger.info("Starting cluster evaluation test...");

    /* load dataset and create a train-test-split */
    OpenmlConnector connector = new OpenmlConnector();
    DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
    File file = ds.getDataset(DataSetUtils.API_KEY);
    Instances data = new Instances(new BufferedReader(new FileReader(file)));
    data.setClassIndex(data.numAttributes() - 1);
    List<Instances> split = WekaUtil.getStratifiedSplit(data, 42, .05f);

    Instances insts = split.get(0);

    long timeStart = System.currentTimeMillis();

    ReliefFAttributeEval eval = new ReliefFAttributeEval();
    eval.buildEvaluator(insts);

    long timeStartEval = System.currentTimeMillis();

    double attEvalSum = 0;
    for (int i = 0; i < insts.numAttributes(); i++) {
        attEvalSum += eval.evaluateAttribute(i);
    }
    attEvalSum /= insts.numAttributes();

    long timeTaken = System.currentTimeMillis() - timeStart;
    long timeTakenEval = System.currentTimeMillis() - timeStartEval;

    logger.info("Value: " + attEvalSum);
    Assert.assertTrue(attEvalSum > 0);
    logger.debug("Clustering took " + (timeTaken / 1000) + " s.");
    logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s.");
}
 
Example 19
Source Project: tsml   Source File: CSVLoader.java    License: GNU General Public License v3.0 5 votes vote down vote up
@Override
public Instances getDataSet() throws IOException {

  if (m_sourceReader == null) {
    throw new IOException("No source has been specified");
  }

  if (getRetrieval() == INCREMENTAL) {
    throw new IOException(
        "Cannot mix getting instances in both incremental and batch modes");
  }
  setRetrieval(BATCH);

  if (m_structure == null) {
    getStructure();
  }

  while (readData(true))
    ;

  m_dataDumper.flush();
  m_dataDumper.close();

  // make final structure
  makeStructure();

  Reader sr = new BufferedReader(new FileReader(m_tempFile));
  ArffReader initialArff = new ArffReader(sr, m_structure, 0);
  Instances initialInsts = initialArff.getData();
  sr.close();
  initialArff = null;

  return initialInsts;
}
 
Example 20
Source Project: bestconf   Source File: RBSoDDSOptimization.java    License: Apache License 2.0 5 votes vote down vote up
private void saveTrainingSet(Instances trainingSet, int round, int subround){
	try {
		DataIOFile.saveDataToArffFile(resumeFolder+"/training_"+round+"_"+subround+"_.arff", samplePoints);
		
		File file = new File(resumeFolder+"/training_"+round+"_"+subround+"_.arff"+"_OK");
		file.createNewFile();
	} catch (IOException e) {
		e.printStackTrace();
	}
}
 
Example 21
Source Project: tsml   Source File: PruneableDecList.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns a newly created tree.
 *
 * @param train train data
 * @param test test data
 * @param leaf
 * @throws Exception if something goes wrong
 */
protected ClassifierDecList getNewDecList(Instances train, Instances test, 
			    boolean leaf) throws Exception {

  PruneableDecList newDecList = 
    new PruneableDecList(m_toSelectModel, m_minNumObj);
  
  newDecList.buildDecList((Instances)train, test, leaf);
  
  return newDecList;
}
 
Example 22
Source Project: tsml   Source File: ConverterUtils.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
    * returns the next element and sets the specified dataset, null if 
    * none available.
    * 
    * @param dataset	the dataset to set for the instance
    * @return		the next Instance
    */
   public Instance nextElement(Instances dataset) {
     Instance	result;
     
     result = null;
     
     if (isIncremental()) {
// is there still an instance in the buffer?
if (m_IncrementalBuffer != null) {
  result              = m_IncrementalBuffer;
  m_IncrementalBuffer = null;
}
else {
  try {
    result = m_Loader.getNextInstance(dataset);
  }
  catch (Exception e) {
    e.printStackTrace();
    result = null;
  }
}
     }
     else {
if (m_BatchCounter < m_BatchBuffer.numInstances()) {
  result = m_BatchBuffer.instance(m_BatchCounter);
  m_BatchCounter++;
}
     }

     if (result != null) {
       result.setDataset(dataset);
     }
     
     return result;
   }
 
Example 23
Source Project: browserprint   Source File: Predictor.java    License: MIT License 5 votes vote down vote up
public static String browserClassify(double classifyMeArray[]) throws Exception{
	Instance classifyMe = new DenseInstance(1.0, classifyMeArray);
	Instances classifyMeDataSet = new Instances("testingDataset", browserAttributes, 0);
	classifyMeDataSet.setClass(browserClassAttribute);
	classifyMe.setDataset(classifyMeDataSet);
	classifyMe.setClassMissing();
	
	double prediction = browserClassifier.classifyInstance(classifyMe);
	return browserClassAttribute.value((int)Math.ceil(prediction));
}
 
Example 24
Source Project: tsml   Source File: DD_DTW.java    License: GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args){
    
    // option 1: simple example of the classifier
    // option 2: recreate the results from the original published work
    
    int option = 1;
    
    try{
        if(option==1){
            String dataName = "ItalyPowerDemand";
            Instances train = DatasetLoading.loadDataNullable(DATA_DIR+dataName+"/"+dataName+"_TRAIN");
            Instances test = DatasetLoading.loadDataNullable(DATA_DIR+dataName+"/"+dataName+"_TEST");
            
            // create the classifier, using DTW as the distance function as an example
            DD_DTW nndw = new DD_DTW(DistanceType.DTW);;
            
            // params a and b have not been explicitly set, so buildClassifier will cv to find them
            nndw.buildClassifier(train);
            
            int correct = 0;
            for(int i = 0; i < test.numInstances(); i++){
                if(nndw.classifyInstance(test.instance(i))==test.instance(i).classValue()){
                    correct++;
                }
            }
            System.out.println(dataName+":\t"+new DecimalFormat("#.###").format((double)correct/test.numInstances()*100)+"%");
            
        }else if(option==2){
            recreateResultsTable();
        }
    }catch(Exception e){
        e.printStackTrace();
    }
}
 
Example 25
Source Project: tsml   Source File: UnsupervisedShapelets.java    License: GNU General Public License v3.0 5 votes vote down vote up
double[] computeDistances(Instances data){
    double[] distances = new double[data.numInstances()];
    double[] shapelet = zNormalise();

    for (int i = 0; i < data.numInstances(); i++){
        Instance inst = data.get(i);
        distances[i] = Double.MAX_VALUE;
        UShapelet subseries = new UShapelet(0, length, inst);

        //Sliding window calculating distance of each section of the series to the shapelet
        for (int n = 0; n < inst.numAttributes() - length; n++){
            subseries.startPoint = n;
            double dist = euclideanDistance(shapelet, subseries.zNormalise());

            if (dist < distances[i]){
                distances[i] = dist;
            }
        }
    }

    double normaliser = Math.sqrt(length);

    for (int i = 0; i < distances.length; i++){
        distances[i] /= normaliser;
    }

    return distances;
}
 
Example 26
Source Project: meka   Source File: SaveCSV.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns the action lister to use in the menu.
 *
 * @param history   the current history
 * @param index     the selected history item
 * @return          the listener
 */
@Override
public ActionListener getActionListener(final ResultHistoryList history, final int index) {
	final Result result = history.getResultAt(index);

	return new ActionListener() {
		@Override
		public void actionPerformed(ActionEvent e) {
			Instances performance = (Instances) result.getMeasurement(IncrementalPerformance.RESULTS_SAMPLED_OVER_TIME);

			int retVal = getFileChooser().showSaveDialog(null);
			if (retVal != MekaFileChooser.APPROVE_OPTION)
				return;
			File file = getFileChooser().getSelectedFile();


			try {

				CSVSaver saver = new CSVSaver();
				saver.setInstances(performance);
				saver.setFile(getFileChooser().getSelectedFile());
				saver.writeBatch();
			} catch (Exception ex) {
				String msg = "Failed to write to '" + file + "'!";
				System.err.println(msg);
				ex.printStackTrace();
				JOptionPane.showMessageDialog( null, msg + "\n" + e);
			}
		}
	};
}
 
Example 27
Source Project: tsml   Source File: TransformExamples.java    License: GNU General Public License v3.0 5 votes vote down vote up
public static Instances psTransform(Instances data){
      PowerSpectrum ps=new PowerSpectrum();
      Instances psTrans=null;
      try{
          ps.setInputFormat(data);
          psTrans=Filter.useFilter(data, ps);
          ps.truncate(psTrans, data.numAttributes()/4);
      }catch(Exception e){
              System.out.println(" Exception in ACF harness="+e);
e.printStackTrace();
             System.exit(0);
      }
         return psTrans;
  }
 
Example 28
Source Project: tsml   Source File: BOSSSpatialPyramids_BD.java    License: GNU General Public License v3.0 5 votes vote down vote up
protected double[][] MCB(Instances data) {
    double[][][] dfts = new double[data.numInstances()][][];

    int sample = 0;
    for (Instance inst : data) {
        dfts[sample++] = performDFT(disjointWindows(toArrayNoClass(inst))); //approximation
    }

    int numInsts = dfts.length;
    int numWindowsPerInst = dfts[0].length;
    int totalNumWindows = numInsts*numWindowsPerInst;

    breakpoints = new double[wordLength][alphabetSize]; 

    for (int letter = 0; letter < wordLength; ++letter) { //for each dft coeff

        //extract this column from all windows in all instances
        double[] column = new double[totalNumWindows];
        for (int inst = 0; inst < numInsts; ++inst)
            for (int window = 0; window < numWindowsPerInst; ++window) {
                //rounding dft coefficients to reduce noise
                column[(inst * numWindowsPerInst) + window] = Math.round(dfts[inst][window][letter]*100.0)/100.0;   
            }

        //sort, and run through to find breakpoints for equi-depth bins
        Arrays.sort(column);

        double binIndex = 0;
        double targetBinDepth = (double)totalNumWindows / (double)alphabetSize; 

        for (int bp = 0; bp < alphabetSize-1; ++bp) {
            binIndex += targetBinDepth;
            breakpoints[letter][bp] = column[(int)binIndex];
        }

        breakpoints[letter][alphabetSize-1] = Double.MAX_VALUE; //last one can always = infinity
    }

    return breakpoints;
}
 
Example 29
Source Project: tsml   Source File: XMeans.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * Split centers in their region. Generates random vector of 
 * length = variance and
 * adds and substractsx to cluster vector to get two new clusters.
 * 
 * @param random random function
 * @param center the center that is split here
 * @param variance variance of the cluster 
 * @param model data model valid
 * @return a pair of new centers
 * @throws Exception something in AlgVector goes wrong
 */
protected Instances splitCenter(Random random,
	        Instance center,
	        double variance,
	        Instances model) throws Exception {
  m_NumSplits++;
  AlgVector r = null;
  Instances children = new Instances(model, 2);

  if (m_DebugVectorsFile.exists() && m_DebugVectorsFile.isFile()) {
    Instance nextVector = getNextDebugVectorsInstance(model);
    PFD(D_RANDOMVECTOR, "Random Vector from File " + nextVector);
    r = new AlgVector(nextVector);
  }
  else {
    // random vector of length = variance
    r = new AlgVector(model, random);
  }
  r.changeLength(Math.pow(variance, 0.5));
  PFD(D_RANDOMVECTOR, "random vector *variance "+ r);
  
  // add random vector to center
  AlgVector c = new AlgVector(center);
  AlgVector c2 = (AlgVector) c.clone();
  c = c.add(r);
  Instance newCenter = c.getAsInstance(model, random);
  children.add(newCenter);
  PFD(D_FOLLOWSPLIT, "first child "+ newCenter);
  
  // substract random vector to center
  c2 = c2.substract(r);
  newCenter = c2.getAsInstance(model, random);
  children.add(newCenter);
  PFD(D_FOLLOWSPLIT, "second child "+ newCenter);

  return children;
}
 
Example 30
Source Project: tsml   Source File: Reciprocal.java    License: GNU General Public License v3.0 5 votes vote down vote up
public Instances transform(Instances data){
//Not ideal, should call a method to get this
        int responsePos=data.numAttributes()-1;
        double[] response=data.attributeToDoubleArray(responsePos);
//Find the min value
        double min=response[0];
        for(int i=0;i<response.length;i++)
        {
                if(response[i]<min)
                        min=response[i];
        }
        if(min<=zeroOffset)	//Cant take a log of a negative, so offset
        {
                offSet=-min+zeroOffset;
        }
        else
                offSet=0;
        System.out.println(" Min value = "+min+" offset = "+offSet);

        for(int i=0;i<data.numInstances();i++)
        {
            Instance t = data.instance(i);
            double resp=t.value(responsePos);
            System.out.print(i+" "+resp);
            resp=1/(resp+offSet);
            System.out.println(" "+resp);
            t.setValue(responsePos,resp);
        }
        return data;
    }