Java Code Examples for weka.core.Instance#setDataset()

The following examples show how to use weka.core.Instance#setDataset() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CR.java    From meka with GNU General Public License v3.0 6 votes vote down vote up
@Override
public double[] distributionForInstance(Instance x) throws Exception {

	int L = x.classIndex(); 

	double y[] = new double[L*2];

	for (int j = 0; j < L; j++) {
		Instance x_j = (Instance)x.copy();
		x_j.setDataset(null);
		x_j = MLUtils.keepAttributesAt(x_j,new int[]{j},L);
		x_j.setDataset(m_Templates[j]);
		double w[] = m_MultiClassifiers[j].distributionForInstance(x_j); // e.g. [0.1, 0.8, 0.1]
		y[j] = Utils.maxIndex(w);									     // e.g. 1
		y[L+j] = w[(int)y[j]];											 // e.g. 0.8
	}

	return y;
}
 
Example 2
Source File: WekaTimeseriesUtil.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Converts a double[][] matrix (number of instances x number of attributes) to
 * Weka instances without any class attribute.
 *
 * @param matrix
 *            The double[][] matrix storing all the attribute values of the
 *            instances
 * @return Returns the Weka Instances object consisting of all instances and the
 *         attribute values
 */
public static Instances matrixToWekaInstances(final double[][] matrix) {
	final ArrayList<Attribute> attributes = new ArrayList<>();
	for (int i = 0; i < matrix[0].length; i++) {
		final Attribute newAtt = new Attribute("val" + i);
		attributes.add(newAtt);
	}
	Instances wekaInstances = new Instances(I_NAME, attributes, matrix.length);
	for (int i = 0; i < matrix[0].length; i++) {
		final Instance inst = new DenseInstance(1, matrix[i]);
		inst.setDataset(wekaInstances);
		wekaInstances.add(inst);
	}

	return wekaInstances;
}
 
Example 3
Source File: DatasetLoader.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
public static Instances loadAngerMetaClassification() throws Exception {
  final Instances data = DatasetLoader
      .loadArff("src/test/resources/numeric/anger.meta.arff");
  ArrayList<Attribute> atts = new ArrayList<>();
  atts.add(data.attribute(0));
  Attribute cls = new Attribute("cls", Arrays.asList("0", "1"));
  atts.add(cls);
  Instances dataDiscretized = new Instances("anger-classification", atts, data.numInstances());
  dataDiscretized.setClassIndex(1);
  for (Instance datum : data) {
    Instance cpy = (Instance) datum.copy();
    cpy.setDataset(dataDiscretized);
    cpy.setValue(0, datum.stringValue(0));
    cpy.setValue(1, datum.classValue() > 0.5 ? "1" : "0");
    dataDiscretized.add(cpy);
  }
  return dataDiscretized;
}
 
Example 4
Source File: ZooModelTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
private Instances shrinkInstances(Instances data) {
    ArrayList<Attribute> atts = new ArrayList<>();
    for (int i = 0; i < data.numAttributes(); i++) {
        atts.add(data.attribute(i));
    }
    Instances shrunkenData = new Instances("shrinked", atts, 10);
    shrunkenData.setClassIndex(1);
    for (int i = 0; i < 10; i++) {
        Instance inst = data.get(i);
        inst.setClassValue(i % 10);
        inst.setDataset(shrunkenData);
        shrunkenData.add(inst);
    }
    return shrunkenData;
}
 
Example 5
Source File: NutchOnlineClassifier.java    From anthelion with Apache License 2.0 5 votes vote down vote up
/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}.
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
	if (url != null) {

		Instance inst = new SparseInstance(dimension);
		inst.replaceMissingValues(replaceMissingValues);

		inst.setDataset(instances);
		inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
		inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
		Set<String> tokens = new HashSet<String>();

		tokens.addAll(tokenizer(url.uri.getPath()));
		tokens.addAll(tokenizer(url.uri.getQuery()));
		tokens.addAll(tokenizer(url.uri.getFragment()));
		for (String tok : tokens) {
			inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
		}
		return inst;

	} else {
		System.out.println("Input AnthURL for convertion into instance was null.");
		return null;
	}
}
 
Example 6
Source File: MekaInstancesUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static Instance transformInstanceToWekaInstance(final ILabeledInstanceSchema schema, final ILabeledInstance instance) throws UnsupportedAttributeTypeException {
	if (instance.getNumAttributes() != schema.getNumAttributes()) {
		throw new IllegalArgumentException("Schema and instance do not coincide. The schema defines " + schema.getNumAttributes() + " attributes but the instance has " + instance.getNumAttributes() + " attributes.");
	}
	if (instance instanceof MekaInstance) {
		return ((MekaInstance) instance).getElement();
	}
	Objects.requireNonNull(schema);
	Instances dataset = createDatasetFromSchema(schema);
	Instance iNew = new DenseInstance(dataset.numAttributes());
	iNew.setDataset(dataset);
	for (int i = 0; i < instance.getNumAttributes(); i++) {
		if (schema.getAttribute(i) instanceof INumericAttribute) {
			iNew.setValue(i, ((INumericAttribute) schema.getAttribute(i)).getAsAttributeValue(instance.getAttributeValue(i)).getValue());
		} else if (schema.getAttribute(i) instanceof ICategoricalAttribute) {
			iNew.setValue(i, ((ICategoricalAttribute) schema.getAttribute(i)).getAsAttributeValue(instance.getAttributeValue(i)).getValue());
		} else {
			throw new UnsupportedAttributeTypeException("Only categorical and numeric attributes are supported!");
		}
	}

	if (schema.getLabelAttribute() instanceof INumericAttribute) {
		iNew.setValue(iNew.numAttributes() - 1, ((INumericAttribute) schema.getLabelAttribute()).getAsAttributeValue(instance.getLabel()).getValue());
	} else if (schema.getLabelAttribute() instanceof ICategoricalAttribute) {
		iNew.setValue(iNew.numAttributes() - 1, ((ICategoricalAttribute) schema.getLabelAttribute()).getAsAttributeValue(instance.getLabel()).getValue());
	} else {
		throw new UnsupportedAttributeTypeException("Only categorical and numeric attributes are supported!");
	}
	return iNew;
}
 
Example 7
Source File: PropositionalToMultiInstance.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * adds a new bag out of the given data and adds it to the output
 * 
 * @param input       the intput dataset
 * @param output      the dataset this bag is added to
 * @param bagInsts    the instances in this bag
 * @param bagIndex    the bagIndex of this bag
 * @param classValue  the associated class value
 * @param bagWeight   the weight of the bag
 */
protected void addBag(
    Instances input,
    Instances output,
    Instances bagInsts, 
    int bagIndex, 
    double classValue, 
    double bagWeight) {
  
  // copy strings/relational values
  for (int i = 0; i < bagInsts.numInstances(); i++) {
    RelationalLocator.copyRelationalValues(
 bagInsts.instance(i), false, 
 input, m_InputRelAtts,
 bagInsts, m_BagRelAtts);

    StringLocator.copyStringValues(
 bagInsts.instance(i), false, 
 input, m_InputStringAtts,
 bagInsts, m_BagStringAtts);
  }
  
  int value = output.attribute(1).addRelation(bagInsts);
  Instance newBag = new DenseInstance(output.numAttributes());        
  newBag.setValue(0, bagIndex);
  newBag.setValue(2, classValue);
  newBag.setValue(1, value);
  newBag.setWeight(bagWeight);
  newBag.setDataset(output);
  output.add(newBag);
}
 
Example 8
Source File: Word2VecLoader.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Instances getDataSet() throws IOException {
  if (m_sourceFile == null) {
    throw new IOException("No source has been specified");
  }

  if (getRetrieval() == INCREMENTAL) {
    throw new IOException("This loader cannot load instances incrementally.");
  }
  setRetrieval(BATCH);

  if (m_structure == null) {
    getStructure();
  }

  Instances result = new Instances(m_structure);

  for (String word : vec.getVocab().words()) {
    double[] values = new double[result.numAttributes()];

    for (int i = 0; i < this.vec.getWordVector(word).length; i++) {
      values[i] = this.vec.getWordVector(word)[i];
    }

    values[result.numAttributes() - 1] = result.attribute("word_id").addStringValue(word);

    Instance inst = new DenseInstance(1, values);

    inst.setDataset(result);

    result.add(inst);
  }

  return result;
}
 
Example 9
Source File: CombineScores.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
/**
 * @param attributesValues: one or more attributes used to score the result e.g., indri rank  
 * @throws Exception 
 */	
public double score(double[] attributesValues) throws Exception {
	Instance inst = new Instance(1, attributesValues);
	inst.setDataset(qResultsDataset);
	return scorerModel.distributionForInstance(inst)[1];
	//return scorerModel.classifyInstance(inst);
}
 
Example 10
Source File: DataSetUtils.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
static Instances matricesToInstances(final List<INDArray> matrices, final Instances refInstances) {
	if (matrices == null || matrices.isEmpty()) {
		throw new IllegalArgumentException("Parameter 'matrices' must not be null or empty!");
	}

	// Create attributes
	final ArrayList<Attribute> attributes = new ArrayList<>();
	for (int i = 0; i < matrices.get(0).length(); i++) {
		final Attribute newAtt = new Attribute("val" + i);
		attributes.add(newAtt);
	}

	final List<String> classValues = IntStream.range(0, refInstances.classAttribute().numValues()).asDoubleStream().mapToObj(String::valueOf).collect(Collectors.toList());
	final Attribute classAtt = new Attribute(CLASS_ATT_NAME, classValues);
	attributes.add(classAtt);

	final Instances result = new Instances(INSTANCES_DS_NAME, attributes, refInstances.size());
	result.setClassIndex(result.numAttributes() - 1);

	for (int i = 0; i < matrices.size(); i++) {

		// Initialize instance
		final Instance inst = new DenseInstance(1, ArrayUtils.addAll(Nd4j.toFlattened(matrices.get(i)).toDoubleVector(), 0));
		inst.setDataset(result);

		// Set class value
		inst.setClassValue(refInstances.get(i).classValue());

		result.add(inst);
	}

	return result;

}
 
Example 11
Source File: RelExTool.java    From Criteria2Query with Apache License 2.0 5 votes vote down vote up
public String predict(String en1, String en2, Double e1e, Double e2s, Double dis, Double shortestdeppath)
		throws Exception {
	List entity1_type = Arrays.asList(GlobalSetting.primaryEntities);
	List entity2_type = Arrays.asList(GlobalSetting.atrributes);
	List rel = Arrays.asList(GlobalSetting.relations);
	Attribute entity1_end_index = new Attribute("entity1_end_index");
	Attribute entity2_start_index = new Attribute("entity2_start_index");
	Attribute distance = new Attribute("distance");
	Attribute shortestdep = new Attribute("shortestdep");
	Attribute entity1_type_attr = new Attribute("entity1_type", entity1_type);
	Attribute entity2_type_attr = new Attribute("entity2_type", entity2_type);
	Attribute rel_attr = new Attribute("rel", rel);

	ArrayList<Attribute> atts = new ArrayList<Attribute>();
	atts.add(entity1_type_attr);
	atts.add(entity2_type_attr);
	atts.add(entity1_end_index);
	atts.add(entity2_start_index);
	atts.add(distance);
	atts.add(shortestdep);
	atts.add(rel_attr);
	Instances adataset = new Instances("TestDataSet", atts, 1);
	Instance inst = new DenseInstance(7);
	inst.setValue(entity1_type_attr, en1);
	inst.setValue(entity2_type_attr, en2);
	inst.setValue(entity2_start_index, e2s);
	inst.setValue(entity1_end_index, e1e);
	inst.setValue(distance, dis);
	inst.setValue(shortestdep, shortestdeppath);
	// inst.setValue(rel_attr, "has-relation");
	inst.setDataset(adataset);
	adataset.setClassIndex(6);
	Double d = classifier.classifyInstance(inst);
	// System.out.println("?="+d);
	return (String) rel.get(d.intValue());
}
 
Example 12
Source File: MultiClassClassifier.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Returns the individual predictions of the base classifiers
  * for an instance. Used by StackedMultiClassClassifier.
  * Returns the probability for the second "class" predicted
  * by each base classifier.
  *
  * @param inst the instance to get the prediction for
  * @return the individual predictions
  * @throws Exception if the predictions can't be computed successfully
  */
 public double[] individualPredictions(Instance inst) throws Exception {
   
   double[] result = null;

   if (m_Classifiers.length == 1) {
     result = new double[1];
     result[0] = m_Classifiers[0].distributionForInstance(inst)[1];
   } else {
     result = new double[m_ClassFilters.length];
     for(int i = 0; i < m_ClassFilters.length; i++) {
if (m_Classifiers[i] != null) {
  if (m_Method == METHOD_1_AGAINST_1) {    
    Instance tempInst = (Instance)inst.copy(); 
    tempInst.setDataset(m_TwoClassDataset);
    result[i] = m_Classifiers[i].distributionForInstance(tempInst)[1];  
  } else {
    m_ClassFilters[i].input(inst);
    m_ClassFilters[i].batchFinished();
    result[i] = m_Classifiers[i].
      distributionForInstance(m_ClassFilters[i].output())[1];
  }
}
     }
   }
   return result;
 }
 
Example 13
Source File: ContractRotationForest.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/** 
 * Transforms an instance for the i-th classifier.
 *
 * @param instance the instance to be transformed
 * @param i the base classifier number
 * @return the transformed instance
 * @throws Exception if the instance can't be converted successfully 
 */
protected Instance convertInstance( Instance instance, int i ) 
throws Exception {
  Instance newInstance = new DenseInstance( headers.get(i).numAttributes( ) );
  newInstance.setWeight(instance.weight());
  newInstance.setDataset(headers.get(i));
  int currentAttribute = 0;

  // Project the data for each group
  int[][] g=groups.get(i);
  for( int j = 0; j < g.length; j++ ) {
    Instance auxInstance = new DenseInstance(g[j].length + 1 );
    int k;
    for( k = 0; k < g[j].length; k++ ) {
      auxInstance.setValue( k, instance.value( g[j][k] ) );
    }
    auxInstance.setValue( k, instance.classValue( ) );
    auxInstance.setDataset(reducedHeaders.get(i)[ j ] );
    Filter[] projection=projectionFilters.get(i);
    projection[j].input( auxInstance );
    auxInstance = projection[j].output( );
    projection[j].batchFinished();
    for( int a = 0; a < auxInstance.numAttributes() - 1; a++ ) {
      newInstance.setValue( currentAttribute++, auxInstance.value( a ) );
    }
  }

  newInstance.setClassValue( instance.classValue() );
  return newInstance;
}
 
Example 14
Source File: C45Loader.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Read the data set incrementally---get the next instance in the data set or
 * returns null if there are no more instances to get. If the structure hasn't
 * yet been determined by a call to getStructure then method should do so
 * before returning the next instance in the data set.
 * 
 * If it is not possible to read the data set incrementally (ie. in cases
 * where the data set structure cannot be fully established before all
 * instances have been seen) then an exception should be thrown.
 * 
 * @param structure the dataset header information, will get updated in case
 *          of string or relational attributes
 * @return the next instance in the data set as an Instance object or null if
 *         there are no more instances to be read
 * @exception IOException if there is an error during parsing
 */
@Override
public Instance getNextInstance(Instances structure) throws IOException {
  if (m_sourceFile == null) {
    throw new IOException("No source has been specified");
  }

  if (getRetrieval() == BATCH) {
    throw new IOException(
        "Cannot mix getting Instances in both incremental and batch modes");
  }
  setRetrieval(INCREMENTAL);

  if (m_structure == null) {
    getStructure();
  }

  StreamTokenizer st = new StreamTokenizer(m_dataReader);
  initTokenizer(st);
  // st.ordinaryChar('.');
  Instance nextI = getInstance(st);
  if (nextI != null) {
    nextI.setDataset(m_structure);
  } else {
    try {
      // close the stream
      m_dataReader.close();
      // reset();
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
  return nextI;
}
 
Example 15
Source File: WekaTimeseriesUtil.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Converts a given simplified {@link ai.libs.jaicore.ml.classification.singlelabel.timeseries.dataset.TimeSeriesDataset2}
 * object to a Weka Instances object.
 *
 * @param dataSet
 *            Data set which is transformed
 * @return Transformed Weka Instances object
 */
public static Instances simplifiedTimeSeriesDatasetToWekaInstances(final ai.libs.jaicore.ml.classification.singlelabel.timeseries.dataset.TimeSeriesDataset2 dataSet, final List<String> classValues) {

	List<double[][]> matrices = new ArrayList<>();
	for (int i = 0; i < dataSet.getNumberOfVariables(); i++) {
		matrices.add(dataSet.getValues(i));
	}

	// Create attributes
	final ArrayList<Attribute> attributes = new ArrayList<>();
	for (int m = 0; m < matrices.size(); m++) {
		double[][] matrix = matrices.get(m);
		if (matrix == null) {
			continue;
		}

		for (int i = 0; i < matrix[0].length; i++) {
			final Attribute newAtt = new Attribute(String.format("val_%d_%d", m, i));
			attributes.add(newAtt);
		}
	}

	// Add class attribute
	final int[] targets = dataSet.getTargets();
	attributes.add(new Attribute("class", classValues));
	final Instances result = new Instances(I_NAME, attributes, dataSet.getNumberOfInstances());
	result.setClassIndex(result.numAttributes() - 1);

	// Create instances
	for (int i = 0; i < dataSet.getNumberOfInstances(); i++) {

		double[] concatenatedRow = matrices.get(0)[i];
		for (int j = 1; j < matrices.size(); j++) {
			concatenatedRow = ArrayUtils.addAll(concatenatedRow, matrices.get(j)[i]);
		}

		concatenatedRow = ArrayUtils.addAll(concatenatedRow, targets[i]);

		// Initialize instance
		final Instance inst = new DenseInstance(1, concatenatedRow);
		inst.setDataset(result);
		result.add(inst);
	}

	return result;
}
 
Example 16
Source File: FTNode.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Returns the class probabilities for an instance given by the Functional Tree.
 * @param instance the instance
 * @return the array of probabilities
 */
public double[] distributionForInstance(Instance instance) throws Exception {
  double[] probs;

  if (m_isLeaf && m_hasConstr) { //leaf
    //leaf: use majoraty class or constructor model
    probs = modelDistributionForInstance(instance);
  } else { 
    if (m_isLeaf && !m_hasConstr)
      {
        probs=new double[instance.numClasses()];
        probs[m_leafclass]=(double)1;
      }else{
             
      probs = modelDistributionForInstance(instance);
      //Built auxiliary split instance    
      Instance instanceSplit=new DenseInstance(instance.numAttributes()+instance.numClasses());
      instanceSplit.setDataset(instance.dataset());
         
      // Inserts attribute and their value
      for(int i=0; i< instance.numClasses();i++)
        {
          instanceSplit.dataset().insertAttributeAt( new Attribute("N"+ (instance.numClasses()-i)), 0);
          instanceSplit.setValue(i,probs[i]);
        }
      for(int i=0; i< instance.numAttributes();i++)
        instanceSplit.setValue(i+instance.numClasses(),instance.value(i));
         
      //chooses best branch           
      int branch = m_localModel.whichSubset(instanceSplit); //split
         
      //delete added attributes
      for(int i=0; i< instance.numClasses();i++)
        instanceSplit.dataset().deleteAttributeAt(0);
          
      probs = m_sons[branch].distributionForInstance(instance);
    }
  }
  return probs;
	
}
 
Example 17
Source File: FTInnerNode.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Returns the class probabilities for an instance given by the Functional tree.
 * @param instance the instance
 * @return the array of probabilities
 */
public double[] distributionForInstance(Instance instance) throws Exception {
  double[] probs;
                                             
  //also needed for logitboost
  if (m_isLeaf && m_hasConstr) { //leaf
    //leaf: use majoraty class or constructor model
    probs = modelDistributionForInstance(instance);
  } else {
    if (m_isLeaf && !m_hasConstr)
      {
        probs=new double[instance.numClasses()];
        probs[m_leafclass]=(double)1;  
      }else{
             
      probs = modelDistributionForInstance(instance);
      //Built auxiliary split instance    
      Instance instanceSplit=new DenseInstance(instance.numAttributes()+instance.numClasses());
         
      instanceSplit.setDataset(instance.dataset());
   
      for(int i=0; i< instance.numClasses();i++)
        {
          instanceSplit.dataset().insertAttributeAt( new Attribute("N"+ (instance.numClasses()-i)), 0);
          instanceSplit.setValue(i,probs[i]);
        }
      for(int i=0; i< instance.numAttributes();i++)
        instanceSplit.setValue(i+instance.numClasses(),instance.value(i));
        
         
         
      int branch = m_localModel.whichSubset(instanceSplit); //split
      for(int i=0; i< instance.numClasses();i++)
        instanceSplit.dataset().deleteAttributeAt(0);
          
      //probs = m_sons[branch].distributionForInstance(instance);
      probs = m_sons[branch].distributionForInstance(instance);
    }
  }
  return probs;	
}
 
Example 18
Source File: CSV.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Store the prediction made by the classifier as a string.
 * 
 * @param dist        the distribution to use
 * @param inst        the instance to generate text from
 * @param index       the index in the dataset
 * @throws Exception  if something goes wrong
 */
protected void doPrintClassification(double[] dist, Instance inst, int index) throws Exception {
  int prec = m_NumDecimals;

  Instance withMissing = (Instance)inst.copy();
  withMissing.setDataset(inst.dataset());
  
  double predValue = 0;
  if (Utils.sum(dist) == 0) {
    predValue = Utils.missingValue();
  } else {
    if (inst.classAttribute().isNominal()) {
      predValue = Utils.maxIndex(dist);
    } else {
      predValue = dist[0];                         
    }
  }
  
  // index
  append("" + (index+1));

  if (inst.dataset().classAttribute().isNumeric()) {
    // actual
    if (inst.classIsMissing())
      append(m_Delimiter + "?");
    else
      append(m_Delimiter + Utils.doubleToString(inst.classValue(), prec));
    // predicted
    if (Utils.isMissingValue(predValue))
      append(m_Delimiter + "?");
    else
      append(m_Delimiter + Utils.doubleToString(predValue, prec));
    // error
    if (Utils.isMissingValue(predValue) || inst.classIsMissing())
      append(m_Delimiter + "?");
    else
      append(m_Delimiter + Utils.doubleToString(predValue - inst.classValue(), prec));
  } else {
    // actual
    append(m_Delimiter + ((int) inst.classValue()+1) + ":" + inst.toString(inst.classIndex()));
    // predicted
    if (Utils.isMissingValue(predValue))
      append(m_Delimiter + "?");
    else
      append(m_Delimiter + ((int) predValue+1) + ":" + inst.dataset().classAttribute().value((int)predValue));
    // error?
    if (!Utils.isMissingValue(predValue) && !inst.classIsMissing() && ((int) predValue+1 != (int) inst.classValue()+1))
      append(m_Delimiter + "+");
    else
      append(m_Delimiter + "");
    // prediction/distribution
    if (m_OutputDistribution) {
      if (Utils.isMissingValue(predValue)) {
        append(m_Delimiter + "?");
      }
      else {
        append(m_Delimiter);
        for (int n = 0; n < dist.length; n++) {
          if (n > 0)
            append(m_Delimiter);
          if (n == (int) predValue)
            append("*");
          append(Utils.doubleToString(dist[n], prec));
        }
      }
    }
    else {
      if (Utils.isMissingValue(predValue))
        append(m_Delimiter + "?");
      else
        append(m_Delimiter + Utils.doubleToString(dist[(int)predValue], prec));
    }
  }

  // attributes
  if (m_Attributes != null)
    append(m_Delimiter + attributeValuesString(withMissing));
  append("\n");
}
 
Example 19
Source File: TweetToSparseFeatureVector.java    From AffectiveTweets with GNU General Public License v3.0 2 votes vote down vote up
@Override
protected Instances process(Instances instances) throws Exception {



	Instances result = getOutputFormat();

	// if we are in the testing data we calculate the word vectors again
	if (this.isFirstBatchDone()) {
		this.tweetsToVectors(instances);
	}


	int i = 0;
	for (Object2IntMap<String> vec : this.procTweets) {
		double[] values = new double[result.numAttributes()];

		// copy previous attributes values
		for (int n = 0; n < instances.numAttributes(); n++)
			values[n] = instances.instance(i).value(n);

		// add words using the frequency as attribute value
		for (String innerAtt : vec.keySet()) {
			// we only add the value of valid attributes
			if (result.attribute(innerAtt) != null){
				int attIndex=result.attribute(innerAtt).index();					
				values[attIndex]=(double)vec.getInt(innerAtt);

			}


		}


		Instance inst=new SparseInstance(1, values);


		inst.setDataset(result);
		// copy possible strings, relational values...
		copyValues(inst, false, instances, result);

		result.add(inst);
		i++;

	}

	return result;
}
 
Example 20
Source File: TweetToSentiStrengthFeatureVector.java    From AffectiveTweets with GNU General Public License v3.0 2 votes vote down vote up
@Override
protected Instances process(Instances instances) throws Exception {


	// set upper value for text index
	m_textIndex.setUpper(instances.numAttributes() - 1);

	Instances result = getOutputFormat();


	// reference to the content of the message, users index start from zero
	Attribute attrCont = instances.attribute(this.m_textIndex.getIndex());


	// SentiStrength is re-intialized in each batch as it is not serializable
	SentiStrengthEvaluator sentiStrengthEvaluator=new SentiStrengthEvaluator(
			this.langFolder.getAbsolutePath()+File.separator,"SentiStrength");
	sentiStrengthEvaluator.processDict();

	for (int i = 0; i < instances.numInstances(); i++) {
		double[] values = new double[result.numAttributes()];
		for (int n = 0; n < instances.numAttributes(); n++)
			values[n] = instances.instance(i).value(n);

		String content = instances.instance(i).stringValue(attrCont);
		List<String> words = affective.core.Utils.tokenize(content, this.toLowerCase, this.standarizeUrlsUsers, this.reduceRepeatedLetters, this.m_tokenizer,this.m_stemmer,this.m_stopwordsHandler);

		Map<String,Double> featuresForLex=sentiStrengthEvaluator.evaluateTweet(words);
		for(String featName:featuresForLex.keySet()){
			values[result.attribute(featName).index()] = featuresForLex.get(featName);
		}



		Instance inst = new SparseInstance(1, values);

		inst.setDataset(result);

		// copy possible strings, relational values...
		copyValues(inst, false, instances, result);

		result.add(inst);

	}

	return result;
}