Java Code Examples for weka.core.Instances#numInstances()

The following examples show how to use weka.core.Instances#numInstances() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: XMeans.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Clusters an instance.
 * 
 * @param instance
 *          the instance to assign a cluster to.
 * @param centers
 *          the centers to cluster the instance to.
 * @return a cluster index.
 */
protected int clusterProcessedInstance(Instance instance, Instances centers) {
  
  double minDist = Integer.MAX_VALUE;
  int bestCluster = 0;
  for (int i = 0; i < centers.numInstances(); i++) {
    double dist = m_DistanceF.distance(instance, centers.instance(i));

    if (dist < minDist) {
      minDist = dist;
      bestCluster = i;
    }
  }
  ;
  return bestCluster;
}
 
Example 2
Source File: CitationKNN.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Wether the instances of two exemplars are or  are not equal
 * @param exemplar1 first exemplar
 * @param exemplar2 second exemplar
 * @return if the instances of the exemplars are equal or not
 */
public boolean equalExemplars(Instance exemplar1, Instance exemplar2){
  if(exemplar1.relationalValue(1).numInstances() == 
      exemplar2.relationalValue(1).numInstances()){
    Instances instances1 = exemplar1.relationalValue(1);
    Instances instances2 = exemplar2.relationalValue(1);
    for(int i = 0; i < instances1.numInstances(); i++){
      Instance instance1 = instances1.instance(i);
      Instance instance2 = instances2.instance(i);
      for(int j = 0; j < instance1.numAttributes(); j++){
        if(instance1.value(j) != instance2.value(j)){
          return false;
        }
      }
    }
    return true;
      }
  return false;
}
 
Example 3
Source File: RuleStats.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
  * Static utility function to count the data covered by the 
  * rules after the given index in the given rules, and then
  * remove them.  It returns the data not covered by the
  * successive rules.
  *
  * @param data the data to be processed
  * @param rules the ruleset
  * @param index the given index
  * @return the data after processing
  */
 public static Instances rmCoveredBySuccessives(Instances data, FastVector rules, int index){
   Instances rt = new Instances(data, 0);

   for(int i=0; i < data.numInstances(); i++){
     Instance datum = data.instance(i);
     boolean covered = false;	    
    
     for(int j=index+1; j<rules.size();j++){
Rule rule = (Rule)rules.elementAt(j);
if(rule.covers(datum)){
  covered = true;
  break;
}
     }

     if(!covered)
rt.add(datum);
   }	
   return rt;
 }
 
Example 4
Source File: C45PruneableClassifierTreeG.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Initializes variables for grafting.
 * sets up limits array (for numeric attributes) and calls 
 * the recursive function traverseTree.
 *
 * @param data the data for the tree
 * @throws Exception if anything goes wrong
 */
public void doGrafting(Instances data) throws Exception {

  // 2d array for the limits
  double [][] limits = new double[data.numAttributes()][2];
  // 2nd dimension: index 0 == lower limit, index 1 == upper limit
  // initialise to no limit
  for(int i = 0; i < data.numAttributes(); i++) {
     limits[i][0] = Double.NEGATIVE_INFINITY;
     limits[i][1] = Double.POSITIVE_INFINITY;
  }

  // use an index instead of creating new Insances objects all the time
  // instanceIndex[0] == array for weights at leaf
  // instanceIndex[1] == array for weights in atbop
  double [][] instanceIndex = new double[2][data.numInstances()];
  // initialize the weight for each instance
  for(int x = 0; x < data.numInstances(); x++) {
      instanceIndex[0][x] = 1;
      instanceIndex[1][x] = 1;  // leaf instances are in atbop
  }

  // first call to graft
  traverseTree(data, instanceIndex, limits, this, 0, -1);
}
 
Example 5
Source File: InstanceTools.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public static double[][] fromWekaInstancesArray(Instances ds, boolean removeLastVal) {
    int numFeatures = ds.numAttributes() - (removeLastVal ? 1 : 0);
    int numInstances = ds.numInstances();

   double[][] data = new double[numInstances][numFeatures];

    for (int i = 0; i < numInstances; i++) {
        for (int j = 0; j < numFeatures; j++) {
            data[i][j] = ds.get(i).value(j);
        }
    }

    return data;
}
 
Example 6
Source File: SAXVSM.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Leave-one-out CV without re-doing bop transformation every fold (still re-applying tfxidf)
 * 
 * @return cv accuracy
 */
private double crossValidate(Instances data) throws Exception {
    transformedData = bop.process(data);
    
    double correct = 0;
    for (int i = 0; i < data.numInstances(); ++i) {
        corpus = tfxidf(transformedData, i); //apply tfxidf while ignoring BOP bag i 
        
        if (classifyInstance(data.get(i)) == data.get(i).classValue())
            ++correct;
    }
        
    return correct /  data.numInstances();
}
 
Example 7
Source File: MultiInstanceToPropositional.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Sets the format of the input instances.
 *
 * @param instanceInfo an Instances object containing the input 
 * instance structure (any instances contained in the object are 
 * ignored - only the structure is required).
 * @return true if the outputFormat may be collected immediately
 * @throws Exception if the input format can't be set 
 * successfully
 */
public boolean setInputFormat(Instances instanceInfo) 
  throws Exception {

  if (instanceInfo.attribute(1).type()!=Attribute.RELATIONAL) {
    throw new Exception("Can only handle relational-valued attribute!");
  }  
  super.setInputFormat(instanceInfo);   

  m_NumBags = instanceInfo.numInstances();
  m_NumInstances = 0;
  for (int i=0; i<m_NumBags; i++)
    m_NumInstances += instanceInfo.instance(i).relationalValue(1).numInstances();

  Attribute classAttribute = (Attribute) instanceInfo.classAttribute().copy();
  Attribute bagIndex = (Attribute) instanceInfo.attribute(0).copy();

  /* create a new output format (propositional instance format) */
  Instances newData = instanceInfo.attribute(1).relation().stringFreeStructure();
  newData.insertAttributeAt(bagIndex, 0);
  newData.insertAttributeAt(classAttribute, newData.numAttributes());
  newData.setClassIndex(newData.numAttributes() - 1);

  super.setOutputFormat(newData.stringFreeStructure());

  m_BagStringAtts = new StringLocator(instanceInfo.attribute(1).relation().stringFreeStructure());
  m_BagRelAtts    = new RelationalLocator(instanceInfo.attribute(1).relation().stringFreeStructure());

  return true;
}
 
Example 8
Source File: MergeNominalValues.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Processes the given data.
 * 
 * @param instances the data to process
 * @return the modified data
 * @throws Exception in case the processing goes wrong
 */
@Override
public Instances process(Instances instances) throws Exception {

  // Generate the output and return it
  Instances result = new Instances(getOutputFormat(),
      instances.numInstances());
  for (int i = 0; i < instances.numInstances(); i++) {
    Instance inst = instances.instance(i);
    double[] newData = new double[instances.numAttributes()];
    for (int j = 0; j < instances.numAttributes(); j++) {
      if (m_AttToBeModified[j] && !inst.isMissing(j)) {
        newData[j] = m_Indicators[j][(int) inst.value(j)];
      } else {
        newData[j] = inst.value(j);
      }
    }
    DenseInstance instNew = new DenseInstance(1.0, newData);
    instNew.setDataset(result);

    // copy possible strings, relational values...
    copyValues(instNew, false, inst.dataset(), getOutputFormat());

    // Add instance to output
    result.add(instNew);
  }
  return result;
}
 
Example 9
Source File: PowerCepstrum.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public void logDataSet(Instances out ){
    for(int i=0;i<out.numInstances();i++){
        Instance ins=out.instance(i);
        for(int j=0;j<ins.numAttributes();j++){
            if(j!=ins.classIndex())
                ins.setValue(j,Math.log(ins.value(j)));
        }
    }


}
 
Example 10
Source File: StatUtils.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
 * LEAD - Performs LEAD on dataset 'D', using BR with base classifier 'h', under random seed 'r'.
 * <br>
 * WARNING: changing this method will affect the perfomance of e.g., BCC -- on the other hand the original BCC paper did not use LEAD, so don't worry.
 */
public static double[][] LEAD(Instances D, Classifier h, Random r)  throws Exception {
	Instances D_r = new Instances(D);
	D_r.randomize(r);
	Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100);
	Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances());
	BR br = new BR();
	br.setClassifier(h);
	Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1");
	return LEAD2(D_test,result);
}
 
Example 11
Source File: DD_DTW.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args){
    
    // option 1: simple example of the classifier
    // option 2: recreate the results from the original published work
    
    int option = 1;
    
    try{
        if(option==1){
            String dataName = "ItalyPowerDemand";
            Instances train = DatasetLoading.loadDataNullable(DATA_DIR+dataName+"/"+dataName+"_TRAIN");
            Instances test = DatasetLoading.loadDataNullable(DATA_DIR+dataName+"/"+dataName+"_TEST");
            
            // create the classifier, using DTW as the distance function as an example
            DD_DTW nndw = new DD_DTW(DistanceType.DTW);;
            
            // params a and b have not been explicitly set, so buildClassifier will cv to find them
            nndw.buildClassifier(train);
            
            int correct = 0;
            for(int i = 0; i < test.numInstances(); i++){
                if(nndw.classifyInstance(test.instance(i))==test.instance(i).classValue()){
                    correct++;
                }
            }
            System.out.println(dataName+":\t"+new DecimalFormat("#.###").format((double)correct/test.numInstances()*100)+"%");
            
        }else if(option==2){
            recreateResultsTable();
        }
    }catch(Exception e){
        e.printStackTrace();
    }
}
 
Example 12
Source File: OnlineCachedShapeletDistance.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void init(Instances dataInst)
{
    stats = new Stats();
    
    //Normalise all time series for further processing
    int dataSize = dataInst.numInstances();
    
    data = new double[dataSize][];
    for (int i = 0; i < dataSize; i++)
    {
        data[i] = seriesRescaler.rescaleSeries(dataInst.get(i).toDoubleArray(), true);
    }
}
 
Example 13
Source File: StatUtils.java    From meka with GNU General Public License v3.0 5 votes vote down vote up
/**
 * GetP - Get a pairwise empirical joint-probability matrix P[][] from dataset D.
 * <br>
 * NOTE multi-label only
 */
public static double[][] getP(Instances D) {
	double N = (double)D.numInstances();
	int L = D.classIndex();
	double P[][] = new double[L][L];
	for(int j = 0; j < L; j++) {
		P[j][j] = p(D,j,1);
		for(int k = j+1; k < L; k++) {
			P[j][k] = P(D,j,1,k,1);
		}
	}
	return P;
}
 
Example 14
Source File: CitationKNN.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Build the list of nearest k neighbors to the given test instance.
 * @param bag the bag to search for neighbors of
 * @param kNN the number of nearest neighbors
 * @param bags the data
 * @return a list of neighbors
 */
protected NeighborList findNeighbors(Instance bag, int kNN, Instances bags){
  double distance;
  int index = 0;

  if(kNN > bags.numInstances())
    kNN = bags.numInstances() - 1;

  NeighborList neighborList = new NeighborList(kNN);
  for(int i = 0; i < bags.numInstances(); i++){
    if(bag != bags.instance(i)){ // for hold-one-out cross-validation
      distance =  distanceSet(bag, bags.instance(i)) ; //mDistanceSet.distance(bag, mInstances, bags.exemplar(i), mInstances);
      if(m_NeighborListDebug)
        System.out.println("distance(bag, " + i + "): " + distance);
      if(neighborList.isEmpty() || (index < kNN) || (distance <= neighborList.mLast.mDistance))
        neighborList.insertSorted(distance, bags.instance(i), i);
      index++;
    } 
  }

  if(m_NeighborListDebug){
    System.out.println("bag neighbors:");
    neighborList.printReducedList();
  }

  return neighborList;
}
 
Example 15
Source File: ClassifierSplitModel.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
  * Splits the given set of instances into subsets.
  *
  * @exception Exception if something goes wrong
  */
 public final Instances [] split(Instances data) 
      throws Exception { 

   Instances [] instances = new Instances [m_numSubsets];
   double [] weights;
   double newWeight;
   Instance instance;
   int subset, i, j;

   for (j=0;j<m_numSubsets;j++)
     instances[j] = new Instances((Instances)data,
				    data.numInstances());
   for (i = 0; i < data.numInstances(); i++) {
     instance = ((Instances) data).instance(i);
     weights = weights(instance);
     subset = whichSubset(instance);
     if (subset > -1)
instances[subset].add(instance);
     else
for (j = 0; j < m_numSubsets; j++)
  if (Utils.gr(weights[j],0)) {
    newWeight = weights[j]*instance.weight();
    instances[j].add(instance);
    instances[j].lastInstance().setWeight(newWeight);
  }
   }
   for (j = 0; j < m_numSubsets; j++)
     instances[j].compactify();
   
   return instances;
 }
 
Example 16
Source File: PowerCepstrum.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    //Set up instances size and format.

    int length=(fftFilter.findLength(inputFormat));
    length/=2;
    ArrayList<Attribute> atts=new ArrayList<>();
    String name;
    for(int i=0;i<length;i++){
        name = "PowerSpectrum_"+i;
        atts.add(new Attribute(name));
    }

    if(inputFormat.classIndex()>=0){	//Classification set, set class
        //Get the class values as a fast vector
        Attribute target =inputFormat.attribute(inputFormat.classIndex());

        ArrayList<String> vals=new ArrayList<>(target.numValues());
        for(int i=0;i<target.numValues();i++)
            vals.add(target.value(i));
        atts.add(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(),vals));
    }

    Instances result = new Instances("Cepstrum"+inputFormat.relationName(),atts,inputFormat.numInstances());
    if(inputFormat.classIndex()>=0)
        result.setClassIndex(result.numAttributes()-1);

    return result;
}
 
Example 17
Source File: NSR.java    From meka with GNU General Public License v3.0 4 votes vote down vote up
public Instances convertInstances(Instances D, int L) throws Exception {

		//Gather combinations
		HashMap<String,Integer> distinctCombinations = MLUtils.classCombinationCounts(D);
		if(getDebug())
			System.out.println("Found "+distinctCombinations.size()+" unique combinations");

		//Prune combinations
		MLUtils.pruneCountHashMap(distinctCombinations,m_P);
		if(getDebug())
			System.out.println("Pruned to "+distinctCombinations.size()+" with P="+m_P);

		// Remove all class attributes
		Instances D_ = MLUtils.deleteAttributesAt(new Instances(D),MLUtils.gen_indices(L));
		// Add a new class attribute
		D_.insertAttributeAt(new Attribute("CLASS", new ArrayList(distinctCombinations.keySet())),0); // create the class attribute
		D_.setClassIndex(0);

		//Add class values
		for (int i = 0; i < D.numInstances(); i++) {
			String y = MLUtils.encodeValue(MLUtils.toIntArray(D.instance(i),L));
			// add it
			if(distinctCombinations.containsKey(y)) 	//if its class value exists
				D_.instance(i).setClassValue(y);
			// decomp
			else if(m_N > 0) { 
				String d_subsets[] = SuperLabelUtils.getTopNSubsets(y, distinctCombinations, m_N);
				for (String s : d_subsets) {
					int w = distinctCombinations.get(s);
					Instance copy = (Instance)(D_.instance(i)).copy();
					copy.setClassValue(s);
					copy.setWeight(1.0 / d_subsets.length);
					D_.add(copy);
				}
			}
		}

		// remove with missing class
		D_.deleteWithMissingClass();

		// keep the header of new dataset for classification
		m_InstancesTemplate = new Instances(D_, 0);

		if (getDebug())
			System.out.println(""+D_);

		return D_;
	}
 
Example 18
Source File: MIWrapper.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Computes the distribution for a given exemplar
  *
  * @param exmp the exemplar for which distribution is computed
  * @return the distribution
  * @throws Exception if the distribution can't be computed successfully
  */
 public double[] distributionForInstance(Instance exmp) 
   throws Exception {	

   Instances testData = new Instances (exmp.dataset(),0);
   testData.add(exmp);

   // convert the training dataset into single-instance dataset
   m_ConvertToProp.setWeightMethod(
       new SelectedTag(
         MultiInstanceToPropositional.WEIGHTMETHOD_ORIGINAL, 
         MultiInstanceToPropositional.TAGS_WEIGHTMETHOD));
   testData = Filter.useFilter(testData, m_ConvertToProp);
   testData.deleteAttributeAt(0); //remove the bag index attribute

   // Compute the log-probability of the bag
   double [] distribution = new double[m_NumClasses];
   double nI = (double)testData.numInstances();
   double [] maxPr = new double [m_NumClasses];

   for(int i=0; i<nI; i++){
     double[] dist = m_Classifier.distributionForInstance(testData.instance(i));
     for(int j=0; j<m_NumClasses; j++){

       switch(m_Method){
         case TESTMETHOD_ARITHMETIC:
           distribution[j] += dist[j]/nI;
           break;
         case TESTMETHOD_GEOMETRIC:
           // Avoid 0/1 probability
           if(dist[j]<0.001)
             dist[j] = 0.001;
           else if(dist[j]>0.999)
             dist[j] = 0.999;

           distribution[j] += Math.log(dist[j])/nI;
           break;
         case TESTMETHOD_MAXPROB:
           if (dist[j]>maxPr[j]) 
             maxPr[j] = dist[j];
           break;
       }
     }
   }

   if(m_Method == TESTMETHOD_GEOMETRIC)
     for(int j=0; j<m_NumClasses; j++)
       distribution[j] = Math.exp(distribution[j]);

   if(m_Method == TESTMETHOD_MAXPROB){   // for positive bag
     distribution[1] = maxPr[1];
     distribution[0] = 1 - distribution[1];
   }

   if (Utils.eq(Utils.sum(distribution), 0)) {
     for (int i = 0; i < distribution.length; i++)
distribution[i] = 1.0 / (double) distribution.length;
   }
   else {
     Utils.normalize(distribution);
   }
   
   return distribution;
 }
 
Example 19
Source File: BoxTidwell.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public static Instances transformRegressor(Instances data, int pos,int resultPos, double[] powers)
	{

//1. Get values of the attribute of interest. 
		
//Confusingly, am working with attributes in rows not columns		
		double[] temp=data.attributeToDoubleArray(pos);
		double[] originalData= new double[temp.length];
		double[] logData= new double[temp.length];
		
		for(int i=0;i<temp.length;i++)
		{
			originalData[i]=temp[i];
			logData[i]=Math.log(temp[i]);	
		}
		double[] y =data.attributeToDoubleArray(data.classIndex()); 
//		I'm not sure if this is a memory copy or a reference copy, so be safe
		double[][] transposeFirst = new double[data.numAttributes()][data.numInstances()];
		double[][] transposeSecond = new double[data.numAttributes()+1][data.numInstances()];
		for(int j=0;j<data.numInstances();j++)
		{
			transposeFirst[0][j]=transposeSecond[0][j]=1;
		}
		for(int i=1;i<data.numAttributes();i++)
		{
			transposeFirst[i]=transposeSecond[i]=data.attributeToDoubleArray(i-1);
		}
//		Add one to pos cos of the ones
		pos=pos+1;
//		Second has an attribute at the end of data for transform
		int workingPos=data.numAttributes();
		LinearModel l1,l2;
		double alpha=1, b1,b2;
		double min=0.1;
		boolean finished=false;
		int count=0;
		final int MaxIterations=10;
		//		Initialise alpha to 1
//Find Base SSE		
		//While not termination condition
		while(!finished)
		{
//			System.out.println(" Iteration = "+(count+1)+" alpha = "+alpha);
			//Create new attributes
			//1. Calculate x^alpha
			for(int j=0;j<originalData.length;j++)
			{
				transposeSecond[pos][j]=transposeFirst[pos][j]=Math.pow(originalData[j],alpha);
			}

			//2. Fit y=b1+ .. b_pos	x^alpha (+ other terms)-> get b_pos
			l1=new LinearModel(transposeFirst,y);	
			l1.fitModel();
			
//Not necessary: 
//			l1.formTrainPredictions();
//			l1.findTrainStatistics();
//			System.out.println(l1+"\nVariance for L1 = "+l1.variance);
			
			b1=l1.paras[pos];
			//3. Fit y=b*1+ .. b*_pos	x^alpha +b*_workingPos x^alpha*log(x) (+ other terms)-> get b*2
			//2. Calculate x^alpha*log(x)
			for(int j=0;j<originalData.length;j++)
				transposeSecond[workingPos][j]=transposeFirst[pos][j]*logData[j];
			l2=new LinearModel(transposeSecond,y);	
			l2.fitModel();
			
//			Not necessary: 
//			l2.formTrainPredictions();
//			l2.findTrainStatistics();
//			System.out.println(l2+"\nVariance for L2 = "+l2.variance);
			
			b2=l2.paras[workingPos];
			
			alpha+=b2/b1;
			//Work out change term alpha = b*2/b1+alpha0
//			System.out.println("New Alpha ="+alpha+" b1 = "+b1+" b2 = "+b2);
			//Update termination criteria: stop if small change: check notes
			count++;
			if(Math.abs(b2/b1)<min || count>=MaxIterations)
				finished=true;
			else if(Math.abs(alpha)>10)
			{
				alpha=1;
				finished=true;
			}
		}
//Fix original 
		powers[resultPos]=alpha;
		pos=pos-1;
		Instance inst;
		for(int i=0;i<data.numInstances();i++)
		{
			inst=data.instance(i);
			inst.setValue(pos,Math.pow(originalData[i],alpha));
		}
		return data;
	}
 
Example 20
Source File: LexiconDistantSupervision.java    From AffectiveTweets with GNU General Public License v3.0 4 votes vote down vote up
@Override
protected Instances process(Instances instances) throws Exception {


	// set upper value for text index
	m_textIndex.setUpper(instances.numAttributes() - 1);

	Instances result = getOutputFormat();

	// reference to the content of the message, users index start from zero
	Attribute attrCont = instances.attribute(this.m_textIndex.getIndex());


	for (int i = 0; i < instances.numInstances(); i++) {

		boolean addTweet=false;

		String content = instances.instance(i).stringValue(attrCont);

		ArrayList<String> posWords=new ArrayList<String>();
		ArrayList<String> negWords=new ArrayList<String>();

		this.m_tokenizer.tokenize(content);
		for(;this.m_tokenizer.hasMoreElements();){
			String word=this.m_tokenizer.nextElement();
			if(this.lex.getNomDict().containsKey(word)){
				String value=this.lex.getNomDict().get(word).get(this.polarityAttName);
				if(value.equals(this.polarityAttPosValName))
					posWords.add(word);
				else if(value.equals(this.polarityAttNegValName))
					negWords.add(word);
			}

		}

		if(posWords.size()>0 && negWords.size()==0){
			addTweet=true;				
			// the matching words are removed from the content if flag is set
			if(this.removeMatchingWord)
				content=content.replaceAll(patternFromList(posWords), "");			
		}
		else if(	negWords.size()>0 && posWords.size()==0){
			addTweet=true;
			if(this.removeMatchingWord)
				content=content.replaceAll(patternFromList(negWords), "");				
		}

		if(addTweet){
			double[] values = new double[result.numAttributes()];

			// copy other attributes
			for (int n = 0; n < instances.numAttributes(); n++){
				if(n!=this.m_textIndex.getIndex())
					values[n] = instances.instance(i).value(n);
			}

			// add the content
			values[this.m_textIndex.getIndex()]= attrCont.addStringValue(content);

			// label tweet according to the word's polarity
			if(posWords.size()>0)
				values[result.numAttributes()-1]=1;
			else
				values[result.numAttributes()-1]=0;


			Instance inst = new SparseInstance(1, values);

			inst.setDataset(result);

			// copy possible strings, relational values...
			copyValues(inst, false, instances, result);

			result.add(inst);

		}

	}

	return result;
}