Java Code Examples for weka.core.Instance#stringValue()

The following examples show how to use weka.core.Instance#stringValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ArffMetaDataLabelGenerator.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Default constructor which sets the metaData
 *
 * @param metaData Meta data with mapping: filename to label
 * @param path Directory path
 */
public ArffMetaDataLabelGenerator(Instances metaData, String path) {

  // If this path is absolute set it as basepath
  if (new File(path).isAbsolute()) {
    this.basePath = path;
  } else {
    String currentPath = Paths.get(System.getProperty("user.dir")).toString();
    this.basePath = Paths.get(currentPath, path).toString();
  }

  // Fill mapping from image path to
  fileLabelMap = new TreeMap<>();
  paths = new ArrayList<>();
  labels = new ArrayList<>();
  for (Instance inst : metaData) {
    String fileName = inst.stringValue(0);
    String label = inst.stringValue(1);
    String absPath = Paths.get(this.basePath, fileName).toFile().getAbsolutePath();
    paths.add(absPath);
    labels.add(label);
    fileLabelMap.put(absPath, label);
  }
}
 
Example 2
Source File: ArffMetaDataLabelGenerator.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Default constructor which sets the metaData
 *
 * @param metaData Meta data with mapping: filename to label
 * @param path Directory path
 */
public ArffMetaDataLabelGenerator(Instances metaData, String path) {

  // If this path is absolute set it as basepath
  if (new File(path).isAbsolute()) {
    this.basePath = path;
  } else {
    String currentPath = Paths.get(System.getProperty("user.dir")).toString();
    this.basePath = Paths.get(currentPath, path).toString();
  }

  // Fill mapping from image path to
  fileLabelMap = new TreeMap<>();
  paths = new ArrayList<>();
  labels = new ArrayList<>();
  for (Instance inst : metaData) {
    String fileName = inst.stringValue(0);
    String label = inst.stringValue(1);
    String absPath = Paths.get(this.basePath, fileName).toFile().getAbsolutePath();
    paths.add(absPath);
    labels.add(label);
    fileLabelMap.put(absPath, label);
  }
}
 
Example 3
Source File: ImageInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Test
 */
@Test
public void testGetImageRecordReader() throws Exception {
  final Instances metaData = DatasetLoader.loadMiniMnistMeta();
  Method method =
      ImageInstanceIterator.class.getDeclaredMethod("getImageRecordReader", Instances.class);
  method.setAccessible(true);
  this.idi.setTrainBatchSize(1);
  final ImageRecordReader irr = (ImageRecordReader) method.invoke(this.idi, metaData);

  Set<String> labels = new HashSet<>();
  for (Instance inst : metaData) {
    String label = inst.stringValue(1);
    String itLabel = irr.next().get(1).toString();
    Assert.assertEquals(label, itLabel);
    labels.add(label);
  }
  Assert.assertEquals(10, labels.size());
  Assert.assertTrue(labels.containsAll(irr.getLabels()));
  Assert.assertTrue(irr.getLabels().containsAll(labels));
}
 
Example 4
Source File: HNode.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Update the class frequency distribution with the supplied instance
 * 
 * @param inst the instance to update with
 */
public void updateDistribution(Instance inst) {
  if (inst.classIsMissing()) {
    return;
  }
  String classVal = inst.stringValue(inst.classAttribute());

  WeightMass m = m_classDistribution.get(classVal);
  if (m == null) {
    m = new WeightMass();
    m.m_weight = 1.0;

    m_classDistribution.put(classVal, m);
  }
  m.m_weight += inst.weight();
}
 
Example 5
Source File: StringToNominal.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Input an instance for filtering. The instance is processed and made
 * available for output immediately.
 * 
 * @param instance the input instance.
 * @return true if the filtered instance may now be collected with output().
 * @throws IllegalStateException if no input structure has been defined.
 */
@Override
public boolean input(Instance instance) {

  if (getInputFormat() == null) {
    throw new IllegalStateException("No input instance format defined");
  }
  if (m_NewBatch) {
    resetQueue();
    m_NewBatch = false;
  }

  if (isOutputFormatDefined()) {
    Instance newInstance = (Instance) instance.copy();

    // make sure that we get the right indexes set for the converted
    // string attributes when operating on a second batch of instances
    for (int i = 0; i < newInstance.numAttributes(); i++) {
      if (newInstance.attribute(i).isString() && !newInstance.isMissing(i)
          && m_AttIndices.isInRange(i)) {
        Attribute outAtt = getOutputFormat().attribute(
            newInstance.attribute(i).name());
        String inVal = newInstance.stringValue(i);
        int outIndex = outAtt.indexOfValue(inVal);
        if (outIndex < 0) {
          newInstance.setMissing(i);
        } else {
          newInstance.setValue(i, outIndex);
        }
      }
    }
    push(newInstance);
    return true;
  }

  bufferInput(instance);
  return false;
}
 
Example 6
Source File: CnnTextFilesEmbeddingInstanceIterator.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
@Override
public LabeledSentenceProvider getSentenceProvider(Instances data) {
  List<File> files = new ArrayList<>();
  List<String> labels = new ArrayList<>();
  final int clsIdx = data.classIndex();
  for (Instance inst : data) {
    labels.add(String.valueOf(inst.value(clsIdx)));
    final String path = inst.stringValue(1 - clsIdx);
    final File file = Paths.get(textsLocation.getAbsolutePath(), path).toFile();
    files.add(file);
  }

  return new FileLabeledSentenceProvider(files, labels, data.numClasses());
}
 
Example 7
Source File: RnnTextFilesEmbeddingInstanceIterator.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
@Override
public LabeledSentenceProvider getSentenceProvider(Instances data) {
  List<File> files = new ArrayList<>();
  List<String> labels = new ArrayList<>();
  final int clsIdx = data.classIndex();
  for (Instance inst : data) {
    labels.add(String.valueOf(inst.value(clsIdx)));
    final String path = inst.stringValue(1 - clsIdx);
    final File file = Paths.get(textsLocation.getAbsolutePath(), path).toFile();
    files.add(file);
  }

  return new FileLabeledSentenceProvider(files, labels, data.numClasses());
}
 
Example 8
Source File: ArffMetaDataLabelGeneratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Test the getLabelForPath method.
 */
@Test
public void testGetLabelForPath() {
  for (Instance inst : this.metaData) {
    String path = Paths.get(this.basePath, inst.stringValue(0)).toString();
    String label = inst.stringValue(1);

    Assert.assertEquals(label, this.gen.getLabelForPath(path).toString());
    Assert.assertEquals(label, this.gen.getLabelForPath(new File(path).toURI()).toString());
  }
}
 
Example 9
Source File: CnnTextFilesEmbeddingInstanceIterator.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
@Override
public LabeledSentenceProvider getSentenceProvider(Instances data) {
  List<File> files = new ArrayList<>();
  List<String> labels = new ArrayList<>();
  final int clsIdx = data.classIndex();
  for (Instance inst : data) {
    labels.add(String.valueOf(inst.value(clsIdx)));
    final String path = inst.stringValue(1 - clsIdx);
    final File file = Paths.get(textsLocation.getAbsolutePath(), path).toFile();
    files.add(file);
  }

  return new FileLabeledSentenceProvider(files, labels, data.numClasses());
}
 
Example 10
Source File: ArffMetaDataLabelGeneratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Test the getLabelForPath method.
 */
@Test
public void testGetLabelForPath() {
  for (Instance inst : this.metaData) {
    String path = Paths.get(this.basePath, inst.stringValue(0)).toString();
    String label = inst.stringValue(1);

    Assert.assertEquals(label, this.gen.getLabelForPath(path).toString());
    Assert.assertEquals(label, this.gen.getLabelForPath(new File(path).toURI()).toString());
  }
}
 
Example 11
Source File: MergeManyValues.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Input an instance for filtering. The instance is processed
  * and made available for output immediately.
  *
  * @param instance 	the input instance
  * @return 		true if the filtered instance may now be
  * 			collected with output().
  * @throws IllegalStateException	if no input format has been set.
  */
 public boolean input(Instance instance) {
   if (getInputFormat() == null) {
     throw new IllegalStateException("No input instance format defined");
   }
   if (m_NewBatch) {
     resetQueue();
     m_NewBatch = false;
   }

   Attribute att = getInputFormat().attribute(m_AttIndex.getIndex());
   FastVector newVals = new FastVector(att.numValues() - 1);
   for (int i = 0; i < att.numValues(); i++) {
     boolean inMergeList = false;

     if(att.value(i).equalsIgnoreCase(m_Label)){
//don't want to add this one.
inMergeList = true;		
     }else{
inMergeList = m_MergeRange.isInRange(i);
     }

     if(!inMergeList){
//add it.
newVals.addElement(att.value(i));
     }
   }
   newVals.addElement(m_Label);

   Attribute temp = new Attribute(att.name(), newVals);

   Instance newInstance = (Instance)instance.copy();    
   if (!newInstance.isMissing(m_AttIndex.getIndex())) {
     String currValue = newInstance.stringValue(m_AttIndex.getIndex());
     if(temp.indexOfValue(currValue) == -1)
newInstance.setValue(m_AttIndex.getIndex(), temp.indexOfValue(m_Label));
     else
newInstance.setValue(m_AttIndex.getIndex(), temp.indexOfValue(currValue));
   }

   push(newInstance);
   return true;
 }
 
Example 12
Source File: NaiveDTW.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
	
	warpingMatrix = new double[maxLength][maxLength];
	U = new double[maxLength];
	L = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[maxWindow+1][train.length];
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}
 
Example 13
Source File: WindowSearcher.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * This is similar to buildClassifier but it is an estimate.
 * This is used for large dataset where it takes very long to run.
 * The main purpose of this is to get the run time and not actually search for the best window.
 * We use this to draw Figure 1 of our SDM18 paper
 *
 * @param data
 * @param estimate
 * @throws Exception
 */
public void buildClassifierEstimate(Instances data, int estimate) throws Exception {
    // Initialise training dataset
    Attribute classAttribute = data.classAttribute();

    classedData = new HashMap <>();
    classedDataIndices = new HashMap <>();
    for (int c = 0; c < data.numClasses(); c++) {
        classedData.put(data.classAttribute().value(c), new ArrayList <SymbolicSequence>());
        classedDataIndices.put(data.classAttribute().value(c), new ArrayList <Integer>());
    }

    train = new SymbolicSequence[data.numInstances()];
    classMap = new String[train.length];
    maxLength = 0;
    for (int i = 0; i < train.length; i++) {
        Instance sample = data.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        maxLength = Math.max(maxLength, sequence.length);
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }
        train[i] = new SymbolicSequence(sequence);
        String clas = sample.stringValue(classAttribute);
        classMap[i] = clas;
        classedData.get(clas).add(train[i]);
        classedDataIndices.get(clas).add(i);
    }

    warpingMatrix = new double[maxLength][maxLength];
    U = new double[maxLength];
    L = new double[maxLength];

    maxWindow = Math.round(1 * maxLength);
    searchResults = new String[maxWindow + 1];
    nns = new int[maxWindow + 1][train.length];
    dist = new double[maxWindow + 1][train.length];

    int[] nErrors = new int[maxWindow + 1];
    double[] score = new double[maxWindow + 1];
    double bestScore = Double.MAX_VALUE;
    double minD;
    bestWarpingWindow = -1;

    // Start searching for the best window.
    // Only loop through a given size of the dataset, but still search for NN from the whole train
    // for every sequence in train, we find NN for all window
    // then in the end, update the best score
    for (int i = 0; i < estimate; i++) {
        SymbolicSequence testSeq = train[i];

        for (int w = 0; w <= maxWindow; w++) {
            testSeq.LB_KeoghFillUL(w, U, L);

            minD = Double.MAX_VALUE;
            String classValue = null;
            for (int j = 0; j < train.length; j++) {
                if (i == j)
                    continue;
                SymbolicSequence trainSeq = train[j];
                if (SymbolicSequence.LB_KeoghPreFilled(trainSeq, U, L) < minD) {
                    double tmpD = testSeq.DTW(trainSeq, w, warpingMatrix);
                    if (tmpD < minD) {
                        minD = tmpD;
                        classValue = classMap[j];
                        nns[w][i] = j;
                    }
                    dist[w][j] = tmpD * tmpD;
                }
            }
            if (classValue == null || !classValue.equals(classMap[i])) {
                nErrors[w]++;
            }
            score[w] = 1.0 * nErrors[w] / train.length;
        }
    }

    for (int w = 0; w < maxWindow; w++) {
        if (score[w] < bestScore) {
            bestScore = score[w];
            bestWarpingWindow = w;
        }
    }

    // Saving best windows found
    System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1 - bestScore));
}
 
Example 14
Source File: WindowSearcher.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
    // Initialise training dataset
    Attribute classAttribute = data.classAttribute();

    classedData = new HashMap <>();
    classedDataIndices = new HashMap <>();
    for (int c = 0; c < data.numClasses(); c++) {
        classedData.put(data.classAttribute().value(c), new ArrayList <SymbolicSequence>());
        classedDataIndices.put(data.classAttribute().value(c), new ArrayList <Integer>());
    }

    train = new SymbolicSequence[data.numInstances()];
    classMap = new String[train.length];
    maxLength = 0;
    for (int i = 0; i < train.length; i++) {
        Instance sample = data.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        maxLength = Math.max(maxLength, sequence.length);
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }
        train[i] = new SymbolicSequence(sequence);
        String clas = sample.stringValue(classAttribute);
        classMap[i] = clas;
        classedData.get(clas).add(train[i]);
        classedDataIndices.get(clas).add(i);
    }

    warpingMatrix = new double[maxLength][maxLength];
    U = new double[maxLength];
    L = new double[maxLength];

    maxWindow = Math.round(1 * maxLength);
    nns = new int[maxWindow + 1][train.length];
    dist = new double[maxWindow + 1][train.length];

    // Start searching for the best window
    searchBestWarpingWindow();

    // if we are doing length, find the best window in percentage
    if (bestWindowPercent < 0)
        bestWindowPercent = lengthToPercent(bestWarpingWindow);

    // Saving best windows found
    System.out.println("Windows found=" + bestWarpingWindow +
            "(" + bestWindowPercent + ") Best Acc=" + (1 - bestScore));
}
 
Example 15
Source File: UCRSuitePrunedDTW.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
			
	warpingMatrix = new double[maxLength][maxLength];
	U = new double[maxLength];
	L = new double[maxLength];
	U1 = new double[maxLength];
	L1 = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[train.length][train.length];

	cache = new SequenceStatsCache(train, maxWindow);
	
	lazyUCR = new LazyAssessNNEarlyAbandon[train.length][train.length];
	
	for (int i = 0; i < train.length; i++) {
		for (int j  = 0; j < train.length; j++) {
			lazyUCR[i][j] = new LazyAssessNNEarlyAbandon(cache);
		}
	}
	
	// Start searching for the best window
	searchBestWarpingWindow();

	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}
 
Example 16
Source File: Trillion.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();

	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}

	U = new double[maxLength];
	L = new double[maxLength];
	maxWindow = Math.round(1 * maxLength);
	cache = new SequenceStatsCache(train, maxWindow);

	int nbErrors = 0;
	double score;
	bestScore = Double.MAX_VALUE;
	bestWarpingWindow=-1;

	// Start searching for the best window
	for (int w = 0; w <= maxWindow; w++) {
		currentW = w;
		nbErrors = 0;
		for (int i = 0; i < train.length; i++) {
			query = train[i];
			indexQuery = i;
			bestMinDist = Double.MAX_VALUE;
			String classValue = null;
			for (int j = 0; j < train.length; j++) {
				if (i==j)
					continue;
				reference = train[j];
				indexReference = j;

				// LB Kim
				doLBKim();
				if (minDist < bestMinDist) {
					minDist = 0;
					indexStoppedLB = 0;
					// LB Keogh(Q,R)
					doLBKeoghQR(bestMinDist);
					if (minDist < bestMinDist) {
						minDist = 0;
						indexStoppedLB = 0;
						// LB Keogh(R,Q)
						doLBKeoghRQ(bestMinDist);
						if (minDist < bestMinDist) {
							// DTW
							double res = query.DTW(reference, currentW);
							minDist = res * res;
							if(minDist < bestMinDist){
								bestMinDist = minDist;
								classValue = classMap[j];
							}
						}
					}
				}
			}
			if (classValue == null || !classValue.equals(classMap[i])) {
				nbErrors++;
			}
		}
		score = 1.0 * nbErrors / train.length;
		if (score < bestScore) {
			bestScore = score;
			bestWarpingWindow = w;
		}
	}

	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}
 
Example 17
Source File: LbKeoghPrunedDTW.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
@Override
public void buildClassifier(Instances data) throws Exception {
   	// Initialise training dataset
	Attribute classAttribute = data.classAttribute();
	
	classedData = new HashMap<>();
	classedDataIndices = new HashMap<>();
	for (int c = 0; c < data.numClasses(); c++) {
		classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>());
		classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>());
	}

	train = new SymbolicSequence[data.numInstances()];
	classMap = new String[train.length];
	maxLength = 0;
	for (int i = 0; i < train.length; i++) {
		Instance sample = data.instance(i);
		MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
		maxLength = Math.max(maxLength, sequence.length);
		int shift = (sample.classIndex() == 0) ? 1 : 0;
		for (int t = 0; t < sequence.length; t++) {
			sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
		}
		train[i] = new SymbolicSequence(sequence);
		String clas = sample.stringValue(classAttribute);
		classMap[i] = clas;
		classedData.get(clas).add(train[i]);
		classedDataIndices.get(clas).add(i);
	}
	warpingMatrix = new double[maxLength][maxLength];	
	U = new double[maxLength];
	L = new double[maxLength];
	
	maxWindow = Math.round(1 * maxLength);
	searchResults = new String[maxWindow+1];
	nns = new int[maxWindow+1][train.length];
	dist = new double[train.length][train.length];
	
	// Start searching for the best window
	searchBestWarpingWindow();
	
	// Saving best windows found
	System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore));
}
 
Example 18
Source File: MauiFilter.java    From maui-2 with GNU General Public License v3.0 3 votes vote down vote up
private void  selectCandidates() throws Exception {

		if (debugMode) {
			System.err.println("--- Computing candidates...");
			
		}

		allCandidates = new HashMap<Instance, HashMap<String, Candidate>>();

		// Convert pending input instances into data for classifier
		int totalDocuments = getInputFormat().numInstances();

		if(debugMode){
		    System.err.println("--- totalDocuments: " + totalDocuments);
		}

		for (int i = 0; i < totalDocuments; i++) {

			Instance current = getInputFormat().instance(i);

			String fileName = current.stringValue(fileNameAtt);
			int j = i+1;
			if (debugMode) {
				System.err.println("---- Processing document " + fileName
						+ ", " + j + " out of " + totalDocuments + "...");
			}

			// Get the phrases for the document
			String documentText = current.stringValue(documentAtt);

			HashMap<String, Candidate> candidateList = getCandidates(documentText);

			if (debugMode) {
			    System.err.println("---- " + candidateList.size() + " candidates");
			}
			allCandidates.put(current, candidateList);
			
		}

	}
 
Example 19
Source File: ArffLexiconWordLabeller.java    From AffectiveTweets with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Processes  all the dictionary files.
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
	BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
	Instances lexInstances=new Instances(reader);


	// set upper value for word index
	lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);
	
	// checks all numeric and nominal attributes and discards the word attribute
	for(int i=0;i<lexInstances.numAttributes();i++){

		if(i!=this.lexiconWordIndex.getIndex()){
			if(lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal()  ){
				this.attributes.add(lexInstances.attribute(i));
			}

		}

	}


	// Maps all words with their affective scores discarding missing values
	for(Instance inst:lexInstances){
		if(inst.attribute(this.lexiconWordIndex.getIndex()).isString()){
			String word=inst.stringValue(this.lexiconWordIndex.getIndex());
			// stems the word
			word=this.m_stemmer.stem(word);

			// map numeric scores
			if(!attributes.isEmpty()){
				Map<Attribute,Double> wordVals=new HashMap<Attribute,Double>();
				for(Attribute na:attributes){
					wordVals.put(na,inst.value(na));
				}
				this.attValMap.put(word, wordVals);					
			}



		}

	}




}
 
Example 20
Source File: DistantSupervisionSyntheticFilter.java    From AffectiveTweets with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Maps tweets from the second batch into instances that are compatible with the ones generated 
 * @param inp input Instances
 * @return convertes Instances
 */
public Instances mapTargetInstance(Instances inp){

	// Creates instances with the same format
	Instances result=getOutputFormat();


	Attribute contentAtt=inp.attribute(this.m_textIndex.getIndex());


	for(Instance inst:inp){
		String content=inst.stringValue(contentAtt);



		// tokenizes the content 
		List<String> tokens = affective.core.Utils.tokenize(content, this.toLowerCase, this.standarizeUrlsUsers, this.reduceRepeatedLetters, this.m_tokenizer,this.m_stemmer,this.m_stopwordsHandler);

		// Identifies the distinct terms
		AbstractObjectSet<String> terms=new  ObjectOpenHashSet<String>(); 
		terms.addAll(tokens);


		Object2IntMap<String> docVec=this.calculateDocVec(tokens);

		double[] values = new double[result.numAttributes()];


		values[result.classIndex()]= inst.classValue();

		for(String att:docVec.keySet()){

			if(this.m_Dictionary.containsKey(att)){
				int attIndex=this.m_Dictionary.getInt(att);
				// we normalise the value by the number of documents
				values[attIndex]=docVec.getInt(att);					
			}


		}


		Instance outInst=new SparseInstance(1, values);

		inst.setDataset(result);

		result.add(outInst);

	}

	return result;

}