Java Code Examples for weka.core.Instances#stringFreeStructure()

The following examples show how to use weka.core.Instances#stringFreeStructure() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Filter.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Sets the format of output instances. The derived class should use this
 * method once it has determined the outputformat. The 
 * output queue is cleared.
 *
 * @param outputFormat the new output format
 */
protected void setOutputFormat(Instances outputFormat) {

  if (outputFormat != null) {
    m_OutputFormat = outputFormat.stringFreeStructure();
    initOutputLocators(m_OutputFormat, null);

    // Rename the relation
    String relationName = outputFormat.relationName() 
      + "-" + this.getClass().getName();
    if (this instanceof OptionHandler) {
      String [] options = ((OptionHandler)this).getOptions();
      for (int i = 0; i < options.length; i++) {
        relationName += options[i].trim();
      }
    }
    m_OutputFormat.setRelationName(relationName);
  } else {
    m_OutputFormat = null;
  }
  m_OutputQueue = new Queue();
}
 
Example 2
Source File: FilteredAttributeEval.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Initializes a filtered attribute evaluator.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully
 */
public void buildEvaluator(Instances data) throws Exception {
  // can evaluator handle data?
  getCapabilities().testWithFail(data);

  // Structure of original
  Instances original = new Instances(data, 0);

  m_filter.setInputFormat(data);
  data = Filter.useFilter(data, m_filter);

  // Can only proceed if filter has not altered the order or
  // number of attributes in the data
  if (data.numAttributes() != original.numAttributes()) {
    throw new Exception("Filter must not alter the number of "
                        +"attributes in the data!");
  }

  // Check the class index (if set)
  if (original.classIndex() >= 0) {
    if (data.classIndex() != original.classIndex()) {
      throw new Exception("Filter must not change the class attribute!");
    }
  }

  // check the order
  for (int i = 0; i < original.numAttributes(); i++) {
    if (!data.attribute(i).name().equals(original.attribute(i).name())) {
      throw new Exception("Filter must not alter the order of the attributes!");
    }
  }

  // can the evaluator handle this data?
  ((ASEvaluation)getAttributeEvaluator()).getCapabilities().testWithFail(data);
  m_filteredInstances = data.stringFreeStructure();
  
  ((ASEvaluation)m_evaluator).buildEvaluator(data);
}
 
Example 3
Source File: FilteredSubsetEval.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Initializes a filtered attribute evaluator.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully
 */
public void buildEvaluator(Instances data) throws Exception {
  // can evaluator handle data?
  getCapabilities().testWithFail(data);

  // Structure of original
  Instances original = new Instances(data, 0);

  m_filter.setInputFormat(data);
  data = Filter.useFilter(data, m_filter);

  // Can only proceed if filter has not altered the order or
  // number of attributes in the data
  if (data.numAttributes() != original.numAttributes()) {
    throw new Exception("Filter must not alter the number of "
                        +"attributes in the data!");
  }

  // Check the class index (if set)
  if (original.classIndex() >= 0) {
    if (data.classIndex() != original.classIndex()) {
      throw new Exception("Filter must not change the class attribute!");
    }
  }

  // check the order
  for (int i = 0; i < original.numAttributes(); i++) {
    if (!data.attribute(i).name().equals(original.attribute(i).name())) {
      throw new Exception("Filter must not alter the order of the attributes!");
    }
  }

  // can the evaluator handle this data?
  ((ASEvaluation)getSubsetEvaluator()).getCapabilities().testWithFail(data);
  m_filteredInstances = data.stringFreeStructure();
  
  ((ASEvaluation)m_evaluator).buildEvaluator(data);
}
 
Example 4
Source File: MultiInstanceToPropositional.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Sets the format of the input instances.
 *
 * @param instanceInfo an Instances object containing the input 
 * instance structure (any instances contained in the object are 
 * ignored - only the structure is required).
 * @return true if the outputFormat may be collected immediately
 * @throws Exception if the input format can't be set 
 * successfully
 */
public boolean setInputFormat(Instances instanceInfo) 
  throws Exception {

  if (instanceInfo.attribute(1).type()!=Attribute.RELATIONAL) {
    throw new Exception("Can only handle relational-valued attribute!");
  }  
  super.setInputFormat(instanceInfo);   

  m_NumBags = instanceInfo.numInstances();
  m_NumInstances = 0;
  for (int i=0; i<m_NumBags; i++)
    m_NumInstances += instanceInfo.instance(i).relationalValue(1).numInstances();

  Attribute classAttribute = (Attribute) instanceInfo.classAttribute().copy();
  Attribute bagIndex = (Attribute) instanceInfo.attribute(0).copy();

  /* create a new output format (propositional instance format) */
  Instances newData = instanceInfo.attribute(1).relation().stringFreeStructure();
  newData.insertAttributeAt(bagIndex, 0);
  newData.insertAttributeAt(classAttribute, newData.numAttributes());
  newData.setClassIndex(newData.numAttributes() - 1);

  super.setOutputFormat(newData.stringFreeStructure());

  m_BagStringAtts = new StringLocator(instanceInfo.attribute(1).relation().stringFreeStructure());
  m_BagRelAtts    = new RelationalLocator(instanceInfo.attribute(1).relation().stringFreeStructure());

  return true;
}
 
Example 5
Source File: PropositionalToMultiInstance.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Sets the format of the input instances.
 *
 * @param instanceInfo an Instances object containing the input 
 * instance structure (any instances contained in the object are 
 * ignored - only the structure is required).
 * @return true if the outputFormat may be collected immediately
 * @throws Exception if the input format can't be set 
 * successfully
 */
public boolean setInputFormat(Instances instanceInfo) 
  throws Exception {

  if (instanceInfo.attribute(0).type()!= Attribute.NOMINAL) {
    throw new Exception("The first attribute type of the original propositional instance dataset must be Nominal!");
  }
  super.setInputFormat(instanceInfo);

  /* create a new output format (multi-instance format) */
  Instances newData = instanceInfo.stringFreeStructure();
  Attribute attBagIndex = (Attribute) newData.attribute(0).copy();
  Attribute attClass = (Attribute) newData.classAttribute().copy();
  // remove the bagIndex attribute
  newData.deleteAttributeAt(0);
  // remove the class attribute
  newData.setClassIndex(-1);
  newData.deleteAttributeAt(newData.numAttributes() - 1);

  FastVector attInfo = new FastVector(3); 
  attInfo.addElement(attBagIndex);
  attInfo.addElement(new Attribute("bag", newData)); // relation-valued attribute
  attInfo.addElement(attClass);
  Instances data = new Instances("Multi-Instance-Dataset", attInfo, 0); 
  data.setClassIndex(data.numAttributes() - 1);

  super.setOutputFormat(data.stringFreeStructure());

  m_BagStringAtts = new StringLocator(data.attribute(1).relation());
  m_BagRelAtts    = new RelationalLocator(data.attribute(1).relation());
  
  return true;
}
 
Example 6
Source File: PropositionalToMultiInstance.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Signify that this batch of input to the filter is finished. 
 * If the filter requires all instances prior to filtering,
 * output() may now be called to retrieve the filtered instances.
 *
 * @return true if there are instances pending output
 * @throws IllegalStateException if no input structure has been defined
 */
public boolean batchFinished() {

  if (getInputFormat() == null) {
    throw new IllegalStateException("No input instance format defined");
  }

  Instances input = getInputFormat();
  input.sort(0);   // make sure that bagID is sorted
  Instances output = getOutputFormat();
  Instances bagInsts = output.attribute(1).relation();
  Instance inst = new DenseInstance(bagInsts.numAttributes());
  inst.setDataset(bagInsts);

  double bagIndex   = input.instance(0).value(0);
  double classValue = input.instance(0).classValue(); 
  double bagWeight  = 0.0;

  // Convert pending input instances
  for(int i = 0; i < input.numInstances(); i++) {
    double currentBagIndex = input.instance(i).value(0);

    // copy the propositional instance value, except the bagIndex and the class value
    for (int j = 0; j < input.numAttributes() - 2; j++) 
      inst.setValue(j, input.instance(i).value(j + 1));
    inst.setWeight(input.instance(i).weight());

    if (currentBagIndex == bagIndex){
      bagInsts.add(inst);
      bagWeight += inst.weight();
    }
    else{
      addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);

      bagInsts   = bagInsts.stringFreeStructure();  
      bagInsts.add(inst);
      bagIndex   = currentBagIndex;
      classValue = input.instance(i).classValue();
      bagWeight  = inst.weight();
    }
  }

  // reach the last instance, create and add the last bag
  addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);

  if (getRandomize())
    output.randomize(new Random(getSeed()));
  
  for (int i = 0; i < output.numInstances(); i++)
    push(output.instance(i));
  
  // Free memory
  flushInput();

  m_NewBatch = true;
  m_FirstBatchDone = true;
  
  return (numPendingOutput() != 0);
}
 
Example 7
Source File: MauiWrapper.java    From maui-2 with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Main method to extract the main topics from a given text
 * @param text
 * @param topicsPerDocument
 * @return
 * @throws Exception
 */
public ArrayList<String> extractTopicsFromText(String text, int topicsPerDocument) throws Exception {

	if (text.length() < 5) {
		throw new Exception("Text is too short!");
	}

	extractionModel.setWikipedia("");

	FastVector atts = new FastVector(3);
	atts.addElement(new Attribute("filename", (FastVector) null));
	atts.addElement(new Attribute("doc", (FastVector) null));
	atts.addElement(new Attribute("keyphrases", (FastVector) null));
	Instances data = new Instances("keyphrase_training_data", atts, 0);

	double[] newInst = new double[3];

	newInst[0] = (double) data.attribute(0).addStringValue("inputFile");
	newInst[1] = (double) data.attribute(1).addStringValue(text);
	newInst[2] = Instance.missingValue();
	data.add(new Instance(1.0, newInst));

	extractionModel.input(data.instance(0));

	data = data.stringFreeStructure();
	Instance[] topRankedInstances = new Instance[topicsPerDocument];
	Instance inst;

	// Iterating over all extracted keyphrases (inst)
	while ((inst = extractionModel.output()) != null) {

		int index = (int) inst.value(extractionModel.getRankIndex()) - 1;

		if (index < topicsPerDocument) {
			topRankedInstances[index] = inst;
		}
	}

	ArrayList<String> topics = new ArrayList<String>();

	for (int i = 0; i < topicsPerDocument; i++) {
		if (topRankedInstances[i] != null) {
			String topic = topRankedInstances[i].stringValue(extractionModel
					.getOutputFormIndex());
		
			topics.add(topic);
		}
	}
	extractionModel.batchFinished();
	return topics;
}
 
Example 8
Source File: Filter.java    From tsml with GNU General Public License v3.0 3 votes vote down vote up
/**
 * Sets the format of the input instances. If the filter is able to
 * determine the output format before seeing any input instances, it
 * does so here. This default implementation clears the output format
 * and output queue, and the new batch flag is set. Overriders should
 * call <code>super.setInputFormat(Instances)</code>
 *
 * @param instanceInfo an Instances object containing the input instance
 * structure (any instances contained in the object are ignored - only the
 * structure is required).
 * @return true if the outputFormat may be collected immediately
 * @throws Exception if the inputFormat can't be set successfully 
 */
public boolean setInputFormat(Instances instanceInfo) throws Exception {

  testInputFormat(instanceInfo);
  
  m_InputFormat = instanceInfo.stringFreeStructure();
  m_OutputFormat = null;
  m_OutputQueue = new Queue();
  m_NewBatch = true;
  m_FirstBatchDone = false;
  initInputLocators(m_InputFormat, null);
  return false;
}