weka.core.SparseInstance Java Examples

The following examples show how to use weka.core.SparseInstance. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Model.java    From AIDR with GNU Affero General Public License v3.0 6 votes vote down vote up
Instance wordsToInstance(WordSet words) {
    Instance item = new SparseInstance(
            attributeSpecification.numAttributes());
    item.setDataset(attributeSpecification);
    // Words
    for (String word : words.getWords()) {
        Attribute attribute = attributeSpecification.attribute(word);
        if (attribute != null) {
            item.setValue(attribute, 1);
        }
    }

    item.replaceMissingValues(missingVal);

    return item;
}
 
Example #2
Source File: TweetPreprocessor.java    From sentiment-analysis with Apache License 2.0 6 votes vote down vote up
private void setLexiconInstances(){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(6);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       
       atts.add(new Attribute("verb"));
       atts.add(new Attribute("noun"));
       atts.add(new Attribute("adj"));
       atts.add(new Attribute("adv"));
       atts.add(new Attribute("wordnet"));
       atts.add(new Attribute("polarity"));
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] vals = lp.getProcessed(tweet, tagger);
       textRaw.add(new SparseInstance(1.0, vals));
	lexicon_instances = new Instances(textRaw);
}
 
Example #3
Source File: TweetPreprocessor.java    From sentiment-analysis with Apache License 2.0 6 votes vote down vote up
/**Instantiates the complex-based Instances*/
private String getComplexInstances(String processed_text){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       atts.add(new Attribute("text",(ArrayList<String>)null));
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] instanceValue1 = new double[textRaw.numAttributes()];
       String tmp_cmplx = cp.getProcessed(processed_text, tagger);
       instanceValue1[1] = textRaw.attribute(1).addStringValue(tmp_cmplx);
       textRaw.add(new SparseInstance(1.0, instanceValue1));
	complex_instances = new Instances(textRaw);
	return tmp_cmplx;
}
 
Example #4
Source File: TweetPreprocessor.java    From sentiment-analysis with Apache License 2.0 6 votes vote down vote up
/**Instantiates the text-based Instances*/
private String getTextInstances(){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       atts.add(new Attribute("text",(ArrayList<String>)null));
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] instanceValue1 = new double[textRaw.numAttributes()];
       String tmp_txt = tp.getProcessed(tweet);
       instanceValue1[1] = textRaw.attribute(1).addStringValue(tmp_txt);
       textRaw.add(new SparseInstance(1.0, instanceValue1));
	text_instances = new Instances(textRaw);
       return tmp_txt;
}
 
Example #5
Source File: CLOPE.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
      * Add instance to cluster
      */
     public void AddInstance(Instance inst) {
if (inst instanceof SparseInstance) {
  //  System.out.println("AddSparceInstance");
  for (int i = 0; i < inst.numValues(); i++) {
    AddItem(inst.index(i));
    //  for(int i=0;i<inst.numAttributes();int++){
    // AddItem(inst.index(i)+inst.value(i));
  }
} else {
  for (int i = 0; i < inst.numAttributes(); i++) {

    if (!inst.isMissing(i)) {

      AddItem(i + inst.toString(i));
    }
  }
}
this.W = this.occ.size();
this.N++;
     }
 
Example #6
Source File: CLOPE.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
      * Delete instance from cluster
      */
     public void DeleteInstance(Instance inst) {
if (inst instanceof SparseInstance) {
  //   System.out.println("DeleteSparceInstance");
  for (int i = 0; i < inst.numValues(); i++) {
    DeleteItem(inst.index(i));
  }
} else {
  for (int i = 0; i <= inst.numAttributes() - 1; i++) {

    if (!inst.isMissing(i)) {
      DeleteItem(i + inst.toString(i));
    }
  }
}
this.W = this.occ.size();
this.N--;
     }
 
Example #7
Source File: FPGrowth.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
private void processSingleton(Instance current, 
    ArrayList<BinaryItem> singletons) throws Exception {
  
  if (current instanceof SparseInstance) {
    for (int j = 0; j < current.numValues(); j++) {
      int attIndex = current.index(j);
      singletons.get(attIndex).increaseFrequency();
    }
  } else {
    for (int j = 0; j < current.numAttributes(); j++) {
      if (!current.isMissing(j)) {
        if (current.attribute(j).numValues() == 1 
            || current.value(j) == m_positiveIndex - 1) {
          singletons.get(j).increaseFrequency();
        }
      }
    }
  }
}
 
Example #8
Source File: SentimentAnalyser.java    From sentiment-analysis with Apache License 2.0 6 votes vote down vote up
/**Decides upon a "disagreed" document by applying the learned model based on the last 1,000 "agreed" documents.*/
private String clarifyOnSlidingWindow(String tweet){
	String out = "";
       double[] instanceValues = new double[train.numAttributes()];
       instanceValues[0] = train.attribute(0).addStringValue(tweet);
	train.add(new SparseInstance(1.0, instanceValues));
	try {
		stwv.setInputFormat(train);
		Instances newData = Filter.useFilter(train, stwv);
		Instances train_ins = new Instances(newData, 0, train.size()-1);
		Instances test_ins = new Instances(newData, train.size()-1, 1);
		Classifier mnb = (Classifier)new NaiveBayesMultinomial();
		mnb.buildClassifier(train_ins);
		double[] preds = mnb.distributionForInstance(test_ins.get(0));
		if (preds[0]>0.5)
			out = "positive";
		else
			out = "negative";
	} catch (Exception e) {
		e.printStackTrace();
	}
	train.remove(train.numInstances()-1);
	return out;
}
 
Example #9
Source File: PartitionMembership.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Convert a single instance over. The converted instance is added to 
 * the end of the output queue.
 *
 * @param instance the instance to convert
 * @throws Exception if something goes wrong
 */
protected void convertInstance(Instance instance) throws Exception {
  
  // Make copy and set weight to one
  Instance cp = (Instance)instance.copy();
  cp.setWeight(1.0);
  
  // Set up values
  double [] instanceVals = new double[outputFormatPeek().numAttributes()];
  double [] vals = m_partitionGenerator.getMembershipValues(cp);
  System.arraycopy(vals, 0, instanceVals, 0, vals.length);
  if (instance.classIndex() >= 0) {
    instanceVals[instanceVals.length - 1] = instance.classValue();
  }
  
  push(new SparseInstance(instance.weight(), instanceVals));
}
 
Example #10
Source File: WekaHierarchicalClustering2.java    From Java-Data-Analysis with MIT License 5 votes vote down vote up
private static Instances load(double[][] data) {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("X"));
    attributes.add(new Attribute("Y"));
    Instances dataset = new Instances("Dataset", attributes, M);
    for (double[] datum : data) {
        Instance instance = new SparseInstance(2);
        instance.setValue(0, datum[0]);
        instance.setValue(1, datum[1]);
        dataset.add(instance);
    }
    return dataset;
}
 
Example #11
Source File: ClusteringTask.java    From mzmine3 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Creates the weka data set for clustering of variables (metabolites)
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createVariableWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < this.selectedRawDataFiles.length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < selectedRows.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);

    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      DecimalFormat twoDForm = new DecimalFormat("#.##");
      double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ()));
      double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT()));
      String rowName = "MZ->" + MZ + "/RT->" + RT;
      values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName);
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}
 
Example #12
Source File: ClusteringTask.java    From mzmine2 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Creates the weka data set for clustering of samples
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createSampleWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < rawData[0].length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < rawData.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      values[data.numAttributes() - 1] =
          data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName());
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}
 
Example #13
Source File: ClusteringTask.java    From mzmine2 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Creates the weka data set for clustering of variables (metabolites)
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createVariableWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < this.selectedRawDataFiles.length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < selectedRows.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);

    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      DecimalFormat twoDForm = new DecimalFormat("#.##");
      double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ()));
      double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT()));
      String rowName = "MZ->" + MZ + "/RT->" + RT;
      values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName);
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}
 
Example #14
Source File: NutchOnlineClassifier.java    From anthelion with Apache License 2.0 5 votes vote down vote up
/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}.
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
	if (url != null) {

		Instance inst = new SparseInstance(dimension);
		inst.replaceMissingValues(replaceMissingValues);

		inst.setDataset(instances);
		inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
		inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
		Set<String> tokens = new HashSet<String>();

		tokens.addAll(tokenizer(url.uri.getPath()));
		tokens.addAll(tokenizer(url.uri.getQuery()));
		tokens.addAll(tokenizer(url.uri.getFragment()));
		for (String tok : tokens) {
			inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
		}
		return inst;

	} else {
		System.out.println("Input AnthURL for convertion into instance was null.");
		return null;
	}
}
 
Example #15
Source File: NutchOnlineClassifier.java    From anthelion with Apache License 2.0 5 votes vote down vote up
/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}.
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
	if (url != null) {

		Instance inst = new SparseInstance(dimension);
		inst.replaceMissingValues(replaceMissingValues);

		inst.setDataset(instances);
		inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
		inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
		Set<String> tokens = new HashSet<String>();

		tokens.addAll(tokenizer(url.uri.getPath()));
		tokens.addAll(tokenizer(url.uri.getQuery()));
		tokens.addAll(tokenizer(url.uri.getFragment()));
		for (String tok : tokens) {
			inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
		}
		return inst;

	} else {
		System.out.println("Input AnthURL for convertion into instance was null.");
		return null;
	}
}
 
Example #16
Source File: SentimentAnalyser.java    From sentiment-analysis with Apache License 2.0 5 votes vote down vote up
/**Decides upon a "disagreed" document by applying the learned model based on the previously build model.*/
private String clarifyOnModel(String tweet){
	String out = "";
	
	// get the text-based representation of the document
       double[] instanceValues = new double[2];
       instanceValues[0] = test.attribute(0).addStringValue(tweet);
       test.add(new SparseInstance(1.0, instanceValues));
       try{
       	stwv.setInputFormat(test);
       	Instances newData = Filter.useFilter(test, stwv);
   		
       	// re-order attributes so that they are compatible with the training set's ones
       	Instances test_instance = reformatText(newData);
       	
       	// find the polarity of the document based on the previously built model
       	test_instance.setClassIndex(0);
       	double[] preds = multiNB.distributionForInstance(test_instance.get(0));
       	if (preds[0]>0.5)
       		out = "light positive";
       	else
       		out = "light negative";
       } catch (Exception e){
       	e.printStackTrace();
       }
       test.remove(0);
	return out;
}
 
Example #17
Source File: TweetPreprocessor.java    From sentiment-analysis with Apache License 2.0 5 votes vote down vote up
/**Initializes the feature-based Instances*/
private void getFeatureInstances(){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       atts.add(new Attribute("text",(ArrayList<String>)null));
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] instanceValue1 = new double[textRaw.numAttributes()];
       instanceValue1[1] = textRaw.attribute(1).addStringValue(fp.getProcessed(tweet));
       textRaw.add(new SparseInstance(1.0, instanceValue1));
	feature_instances = new Instances(textRaw);
}
 
Example #18
Source File: KMeans.java    From Java-Data-Analysis with MIT License 5 votes vote down vote up
private static Instances load(double[][] data) {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("X"));
    attributes.add(new Attribute("Y"));
    Instances dataset = new Instances("Dataset", attributes, M);
    for (double[] datum : data) {
        Instance instance = new SparseInstance(2);
        instance.setValue(0, datum[0]);
        instance.setValue(1, datum[1]);
        dataset.add(instance);
    }
    return dataset;
}
 
Example #19
Source File: ClusteringTask.java    From mzmine3 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Creates the weka data set for clustering of samples
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createSampleWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < rawData[0].length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < rawData.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      values[data.numAttributes() - 1] =
          data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName());
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}
 
Example #20
Source File: BagOfPatterns.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
public double classifyInstance(Instance instance) throws Exception {
    //convert to BOP form
    double[] hist = bop.bagToArray(bop.buildBag(instance));
    
    //stuff into Instance
    Instances newInsts = new Instances(matrix, 1); //copy attribute data
    newInsts.add(new SparseInstance(1.0, hist));
    
    return knn.classifyInstance(newInsts.firstInstance());
}
 
Example #21
Source File: BagOfPatterns.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
public double[] distributionForInstance(Instance instance) throws Exception {
    //convert to BOP form
    double[] hist = bop.bagToArray(bop.buildBag(instance));
    
    //stuff into Instance
    Instances newInsts = new Instances(matrix, 1); //copy attribute data
    newInsts.add(new SparseInstance(1.0, hist));
    
    return knn.distributionForInstance(newInsts.firstInstance());
}
 
Example #22
Source File: FPGrowth.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Inserts a single instance into the FPTree.
 * 
 * @param current the instance to insert
 * @param singletons the singleton item sets
 * @param tree the tree to insert into
 * @param minSupport the minimum support threshold
 */
private void insertInstance(Instance current, ArrayList<BinaryItem> singletons, 
    FPTreeRoot tree, int minSupport) {
  ArrayList<BinaryItem> transaction = new ArrayList<BinaryItem>();
  if (current instanceof SparseInstance) {
    for (int j = 0; j < current.numValues(); j++) {
      int attIndex = current.index(j);
      if (singletons.get(attIndex).getFrequency() >= minSupport) {
        transaction.add(singletons.get(attIndex));
      }
    }
    Collections.sort(transaction);
    tree.addItemSet(transaction, 1);
  } else {
    for (int j = 0; j < current.numAttributes(); j++) {
      if (!current.isMissing(j)) {
        if (current.attribute(j).numValues() == 1 
            || current.value(j) == m_positiveIndex - 1) {
          if (singletons.get(j).getFrequency() >= minSupport) {
            transaction.add(singletons.get(j));
          }
        }
      }
    }
    Collections.sort(transaction);
    tree.addItemSet(transaction, 1);
  }
}
 
Example #23
Source File: PrincipalComponents.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Convert a pc transformed instance back to the original space
 * 
 * @param inst        the instance to convert
 * @return            the processed instance
 * @throws Exception  if something goes wrong
 */
private Instance convertInstanceToOriginal(Instance inst)
  throws Exception {
  double[] newVals = null;

  if (m_hasClass) {
    newVals = new double[m_numAttribs+1];
  } else {
    newVals = new double[m_numAttribs];
  }

  if (m_hasClass) {
    // class is always appended as the last attribute
    newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1);
  }

  for (int i = 0; i < m_eTranspose[0].length; i++) {
    double tempval = 0.0;
    for (int j = 1; j < m_eTranspose.length; j++) {
      tempval += (m_eTranspose[j][i] * 
                  inst.value(j - 1));
     }
    newVals[i] = tempval;
    if (!m_center) {
      newVals[i] *= m_stdDevs[i];
    } 
    newVals[i] += m_means[i];
  }
  
  if (inst instanceof SparseInstance) {
    return new SparseInstance(inst.weight(), newVals);
  } else {
    return new DenseInstance(inst.weight(), newVals);
  }      
}
 
Example #24
Source File: WekaHierarchicalClustering.java    From Java-Data-Analysis with MIT License 5 votes vote down vote up
private static Instances load(double[][] data) {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("X"));
    attributes.add(new Attribute("Y"));
    Instances dataset = new Instances("Dataset", attributes, M);
    for (double[] datum : data) {
        Instance instance = new SparseInstance(2);
        instance.setValue(0, datum[0]);
        instance.setValue(1, datum[1]);
        dataset.add(instance);
    }
    return dataset;
}
 
Example #25
Source File: PrincipalComponents.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Transform an instance in original (unormalized) format. Convert back
 * to the original space if requested.
 * @param instance an instance in the original (unormalized) format
 * @return a transformed instance
 * @throws Exception if instance cant be transformed
 */
public Instance convertInstance(Instance instance) throws Exception {

  if (m_eigenvalues == null) {
    throw new Exception("convertInstance: Principal components not "
                        +"built yet");
  }

  double[] newVals = new double[m_outputNumAtts];
  Instance tempInst = (Instance)instance.copy();
  if (!instance.dataset().equalHeaders(m_trainHeader)) {
    throw new Exception("Can't convert instance: header's don't match: "
                        +"PrincipalComponents\n"
                        + instance.dataset().equalHeadersMsg(m_trainHeader));
  }

  m_replaceMissingFilter.input(tempInst);
  m_replaceMissingFilter.batchFinished();
  tempInst = m_replaceMissingFilter.output();

  /*if (m_normalize) {
    m_normalizeFilter.input(tempInst);
    m_normalizeFilter.batchFinished();
    tempInst = m_normalizeFilter.output();
  }*/

  m_nominalToBinFilter.input(tempInst);
  m_nominalToBinFilter.batchFinished();
  tempInst = m_nominalToBinFilter.output();

  if (m_attributeFilter != null) {
    m_attributeFilter.input(tempInst);
    m_attributeFilter.batchFinished();
    tempInst = m_attributeFilter.output();
  }
  
  if (!m_center) {
    m_standardizeFilter.input(tempInst);
    m_standardizeFilter.batchFinished();
    tempInst = m_standardizeFilter.output();
  } else {
    m_centerFilter.input(tempInst);
    m_centerFilter.batchFinished();
    tempInst = m_centerFilter.output();
  }

  if (m_hasClass) {
     newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());
  }

  double cumulative = 0;
  for (int i = m_numAttribs - 1; i >= 0; i--) {
    double tempval = 0.0;
    for (int j = 0; j < m_numAttribs; j++) {
      tempval += (m_eigenvectors[j][m_sortedEigens[i]] * 
                  tempInst.value(j));
     }
    newVals[m_numAttribs - i - 1] = tempval;
    cumulative+=m_eigenvalues[m_sortedEigens[i]];
    if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
      break;
    }
  }
  
  if (!m_transBackToOriginal) {
    if (instance instanceof SparseInstance) {
    return new SparseInstance(instance.weight(), newVals);
    } else {
      return new DenseInstance(instance.weight(), newVals);
    }      
  } else {
    if (instance instanceof SparseInstance) {
      return convertInstanceToOriginal(new SparseInstance(instance.weight(), 
                                                          newVals));
    } else {
      return convertInstanceToOriginal(new DenseInstance(instance.weight(),
                                                    newVals));
    }
  }
}
 
Example #26
Source File: SAXVSM.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * If skip = one of <0 ... numInstances-1>, will not include instance at that index into the corpus
 * Part of leave one out cv, while avoiding unnecessary repeats of the BoP transformation 
 */
private Instances tfxidf(Instances bopData, int skip) {
    int numClasses = bopData.numClasses();
    int numInstances = bopData.numInstances();
    int numTerms = bopData.numAttributes()-1; //minus class attribute
    
    //initialise class weights
    double[][] classWeights = new double[numClasses][numTerms];

    //build class bags
    int inst = 0;
    for (Instance in : bopData) {
        if (inst++ == skip) //skip 'this' one, for leave-one-out cv
            continue;

        int classVal = (int)in.classValue();
        for (int j = 0; j < numTerms; ++j) {
            classWeights[classVal][j] += in.value(j);
        }
    }
        
    //apply tf x idf
    for (int i = 0; i < numTerms; ++i) { //for each term
        double df = 0; //document frequency
        for (int j = 0; j < numClasses; ++j) //find how many classes (documents) this term appears in
            if (classWeights[j][i] != 0)
                ++df;
        
        if (df != 0) { //if it appears
            if (df != numClasses) { //but not in all, apply weighting
                for (int j = 0; j < numClasses; ++j) 
                    if (classWeights[j][i] != 0) 
                        classWeights[j][i] = Math.log(1 + classWeights[j][i]) * Math.log(numClasses / df);                
            }
            else { //appears in all
                //avoid log calculations
                //if df == num classes -> idf = log(N/df) = log(1) = 0
                for (int j = 0; j < numClasses; ++j) 
                    classWeights[j][i] = 0;
            }      
        }
    }
    
    Instances tfxidfCorpus = new Instances(bopData, numClasses);
    for (int i = 0; i < numClasses; ++i)
        tfxidfCorpus.add(new SparseInstance(1.0, classWeights[i]));
    
    return tfxidfCorpus;
}
 
Example #27
Source File: CLOPE.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
     * Calculate Delta
     */
     public double DeltaAdd(Instance inst, double r) {
//System.out.println("DeltaAdd");
int S_new;
int W_new;
double profit;
double profit_new;
double deltaprofit;
S_new = 0;
W_new = occ.size();

if (inst instanceof SparseInstance) {
  //System.out.println("DeltaAddSparceInstance");
  for (int i = 0; i < inst.numValues(); i++) {
    S_new++;

    if ((Integer) this.occ.get(inst.index(i)) == null) {
      W_new++;
    }
  }
} else {
  for (int i = 0; i < inst.numAttributes(); i++) {
    if (!inst.isMissing(i)) {
      S_new++;
      if ((Integer) this.occ.get(i + inst.toString(i)) == null) {
	W_new++;
      }
    }
  }
}
S_new += S;


if (N == 0) {
  deltaprofit = S_new / Math.pow(W_new, r);
} else {
  profit = S * N / Math.pow(W, r);
  profit_new = S_new * (N + 1) / Math.pow(W_new, r);
  deltaprofit = profit_new - profit;
}
return deltaprofit;
     }
 
Example #28
Source File: CLOPE.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Move instance to best cluster
  */
 public int MoveInstanceToBestCluster(Instance inst) {

   clusters.get(m_clusterAssignments.get(m_processed_InstanceID)).DeleteInstance(inst);
   m_clusterAssignments.set(m_processed_InstanceID, -1);
   double delta;
   double deltamax;
   int clustermax = -1;
   int tempS = 0;
   int tempW = 0;

   if (inst instanceof SparseInstance) {
     for (int i = 0; i < inst.numValues(); i++) {
tempS++;
tempW++;
     }
   } else {
     for (int i = 0; i < inst.numAttributes(); i++) {
if (!inst.isMissing(i)) {
  tempS++;
  tempW++;
}
     }
   }

   deltamax = tempS / Math.pow(tempW, m_Repulsion);
   for (int i = 0; i < clusters.size(); i++) {
     CLOPECluster tempcluster = clusters.get(i);
     delta = tempcluster.DeltaAdd(inst, m_Repulsion);
     // System.out.println("delta " + delta);
     if (delta > deltamax) {
deltamax = delta;
clustermax = i;
     }
   }
   if (clustermax == -1) {
     CLOPECluster newcluster = new CLOPECluster();
     clusters.add(newcluster);
     newcluster.AddInstance(inst);
     return clusters.size() - 1;
   }
   clusters.get(clustermax).AddInstance(inst);
   return clustermax;
 }
 
Example #29
Source File: AnthOnlineClassifier.java    From anthelion with Apache License 2.0 4 votes vote down vote up
/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}.
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private Instance convert(AnthURL url) {
	if (url != null) {
		try {
			Instance inst = new SparseInstance(dimension);
			inst.replaceMissingValues(replaceMissingValues);

			inst.setDataset(instances);
			inst.setValue(attributesIndex.get("class"), (url.sem ? "sem"
					: "nonsem"));
			inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1
					: 0));
			inst.setValue(attributesIndex.get("nonsempar"),
					(url.nonSemFather ? 1 : 0));
			inst.setValue(attributesIndex.get("semsib"),
					(url.semSibling ? 1 : 0));
			inst.setValue(attributesIndex.get("nonsempar"),
					(url.nonSemFather ? 1 : 0));
			inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
			Set<String> tokens = new HashSet<String>();

			tokens.addAll(tokenizer(url.uri.getPath()));
			tokens.addAll(tokenizer(url.uri.getQuery()));
			tokens.addAll(tokenizer(url.uri.getFragment()));
			for (String tok : tokens) {
				inst.setValue(attributesIndex
						.get(getAttributeNameOfHash(getHash(tok,
								hashTrickSize))), 1);
			}
			return inst;
		} catch (NullPointerException npe) {
			System.out
					.println("Could not convert AnthURL into Instance for classification of URL: "
							+ (url != null ? (url.uri != null ? url.uri
									.toString() : "URI null")
									: "AnthURL null."));
			return null;
		}
	} else {
		System.out
				.println("Input AnthURL for convertion into instance was null.");
		return null;
	}
}
 
Example #30
Source File: ReduceDimensionFilter.java    From anthelion with Apache License 2.0 4 votes vote down vote up
/**
 * Returns the next instances based on the configuration of this class.
 */
public Instance nextInstance() {
	Instance inst = this.inputStream.nextInstance();

	Instance newInst = new SparseInstance(hashSize
			+ notHashableAttributes.size());
	newInst.setDataset(newInstances);
	newInst.replaceMissingValues(replacementArray);
	if (newInstances.size() > 0)
		newInstances.remove(0);
	// newInstances.add(0, newInst);
	for (int i = 0; i < inst.numAttributes(); i++) {
		if (inst.classIndex() == i) {
			newInst.setValue(
					attributesIndex.get(inst.classAttribute().name()),
					inst.classValue());
		} else {
			// check if attributes should be manipulated
			if (ignoreAttributes.contains(i)) {
				inst.setValue(i, 0);
			}
			if (makeBinaryAttributes.contains(i) && inst.value(i) > 0) {
				inst.setValue(i, 1);
			}
			// check what should be done with the attributes.
			if (notHashableAttributes.contains(i)) {
				newInst.setValue(
						attributesIndex.get(inst.attribute(i).name()),
						inst.value(i));

			} else {
				// calculate the hash of the attribute name which is
				// included in
				// the vector and set it to 1
				if (inst.value(i) > 0) {
					newInst.setValue(attributesIndex
							.get(getAttributeNameOfHash(getHash(inst
									.attribute(i).name(), hashSize))), 1);
				}
			}
		}
	}
	// System.out.println(newInst.toString());
	return newInst;
}