Java Code Examples for weka.core.Instance#stringValue()
The following examples show how to use
weka.core.Instance#stringValue() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ArffMetaDataLabelGenerator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Default constructor which sets the metaData * * @param metaData Meta data with mapping: filename to label * @param path Directory path */ public ArffMetaDataLabelGenerator(Instances metaData, String path) { // If this path is absolute set it as basepath if (new File(path).isAbsolute()) { this.basePath = path; } else { String currentPath = Paths.get(System.getProperty("user.dir")).toString(); this.basePath = Paths.get(currentPath, path).toString(); } // Fill mapping from image path to fileLabelMap = new TreeMap<>(); paths = new ArrayList<>(); labels = new ArrayList<>(); for (Instance inst : metaData) { String fileName = inst.stringValue(0); String label = inst.stringValue(1); String absPath = Paths.get(this.basePath, fileName).toFile().getAbsolutePath(); paths.add(absPath); labels.add(label); fileLabelMap.put(absPath, label); } }
Example 2
Source File: ArffMetaDataLabelGenerator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Default constructor which sets the metaData * * @param metaData Meta data with mapping: filename to label * @param path Directory path */ public ArffMetaDataLabelGenerator(Instances metaData, String path) { // If this path is absolute set it as basepath if (new File(path).isAbsolute()) { this.basePath = path; } else { String currentPath = Paths.get(System.getProperty("user.dir")).toString(); this.basePath = Paths.get(currentPath, path).toString(); } // Fill mapping from image path to fileLabelMap = new TreeMap<>(); paths = new ArrayList<>(); labels = new ArrayList<>(); for (Instance inst : metaData) { String fileName = inst.stringValue(0); String label = inst.stringValue(1); String absPath = Paths.get(this.basePath, fileName).toFile().getAbsolutePath(); paths.add(absPath); labels.add(label); fileLabelMap.put(absPath, label); } }
Example 3
Source File: ImageInstanceIteratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 6 votes |
/** * Test */ @Test public void testGetImageRecordReader() throws Exception { final Instances metaData = DatasetLoader.loadMiniMnistMeta(); Method method = ImageInstanceIterator.class.getDeclaredMethod("getImageRecordReader", Instances.class); method.setAccessible(true); this.idi.setTrainBatchSize(1); final ImageRecordReader irr = (ImageRecordReader) method.invoke(this.idi, metaData); Set<String> labels = new HashSet<>(); for (Instance inst : metaData) { String label = inst.stringValue(1); String itLabel = irr.next().get(1).toString(); Assert.assertEquals(label, itLabel); labels.add(label); } Assert.assertEquals(10, labels.size()); Assert.assertTrue(labels.containsAll(irr.getLabels())); Assert.assertTrue(irr.getLabels().containsAll(labels)); }
Example 4
Source File: HNode.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Update the class frequency distribution with the supplied instance * * @param inst the instance to update with */ public void updateDistribution(Instance inst) { if (inst.classIsMissing()) { return; } String classVal = inst.stringValue(inst.classAttribute()); WeightMass m = m_classDistribution.get(classVal); if (m == null) { m = new WeightMass(); m.m_weight = 1.0; m_classDistribution.put(classVal, m); } m.m_weight += inst.weight(); }
Example 5
Source File: StringToNominal.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Input an instance for filtering. The instance is processed and made * available for output immediately. * * @param instance the input instance. * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined. */ @Override public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isOutputFormatDefined()) { Instance newInstance = (Instance) instance.copy(); // make sure that we get the right indexes set for the converted // string attributes when operating on a second batch of instances for (int i = 0; i < newInstance.numAttributes(); i++) { if (newInstance.attribute(i).isString() && !newInstance.isMissing(i) && m_AttIndices.isInRange(i)) { Attribute outAtt = getOutputFormat().attribute( newInstance.attribute(i).name()); String inVal = newInstance.stringValue(i); int outIndex = outAtt.indexOfValue(inVal); if (outIndex < 0) { newInstance.setMissing(i); } else { newInstance.setValue(i, outIndex); } } } push(newInstance); return true; } bufferInput(instance); return false; }
Example 6
Source File: CnnTextFilesEmbeddingInstanceIterator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
@Override public LabeledSentenceProvider getSentenceProvider(Instances data) { List<File> files = new ArrayList<>(); List<String> labels = new ArrayList<>(); final int clsIdx = data.classIndex(); for (Instance inst : data) { labels.add(String.valueOf(inst.value(clsIdx))); final String path = inst.stringValue(1 - clsIdx); final File file = Paths.get(textsLocation.getAbsolutePath(), path).toFile(); files.add(file); } return new FileLabeledSentenceProvider(files, labels, data.numClasses()); }
Example 7
Source File: RnnTextFilesEmbeddingInstanceIterator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
@Override public LabeledSentenceProvider getSentenceProvider(Instances data) { List<File> files = new ArrayList<>(); List<String> labels = new ArrayList<>(); final int clsIdx = data.classIndex(); for (Instance inst : data) { labels.add(String.valueOf(inst.value(clsIdx))); final String path = inst.stringValue(1 - clsIdx); final File file = Paths.get(textsLocation.getAbsolutePath(), path).toFile(); files.add(file); } return new FileLabeledSentenceProvider(files, labels, data.numClasses()); }
Example 8
Source File: ArffMetaDataLabelGeneratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
/** * Test the getLabelForPath method. */ @Test public void testGetLabelForPath() { for (Instance inst : this.metaData) { String path = Paths.get(this.basePath, inst.stringValue(0)).toString(); String label = inst.stringValue(1); Assert.assertEquals(label, this.gen.getLabelForPath(path).toString()); Assert.assertEquals(label, this.gen.getLabelForPath(new File(path).toURI()).toString()); } }
Example 9
Source File: CnnTextFilesEmbeddingInstanceIterator.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
@Override public LabeledSentenceProvider getSentenceProvider(Instances data) { List<File> files = new ArrayList<>(); List<String> labels = new ArrayList<>(); final int clsIdx = data.classIndex(); for (Instance inst : data) { labels.add(String.valueOf(inst.value(clsIdx))); final String path = inst.stringValue(1 - clsIdx); final File file = Paths.get(textsLocation.getAbsolutePath(), path).toFile(); files.add(file); } return new FileLabeledSentenceProvider(files, labels, data.numClasses()); }
Example 10
Source File: ArffMetaDataLabelGeneratorTest.java From wekaDeeplearning4j with GNU General Public License v3.0 | 5 votes |
/** * Test the getLabelForPath method. */ @Test public void testGetLabelForPath() { for (Instance inst : this.metaData) { String path = Paths.get(this.basePath, inst.stringValue(0)).toString(); String label = inst.stringValue(1); Assert.assertEquals(label, this.gen.getLabelForPath(path).toString()); Assert.assertEquals(label, this.gen.getLabelForPath(new File(path).toURI()).toString()); } }
Example 11
Source File: MergeManyValues.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Input an instance for filtering. The instance is processed * and made available for output immediately. * * @param instance the input instance * @return true if the filtered instance may now be * collected with output(). * @throws IllegalStateException if no input format has been set. */ public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } Attribute att = getInputFormat().attribute(m_AttIndex.getIndex()); FastVector newVals = new FastVector(att.numValues() - 1); for (int i = 0; i < att.numValues(); i++) { boolean inMergeList = false; if(att.value(i).equalsIgnoreCase(m_Label)){ //don't want to add this one. inMergeList = true; }else{ inMergeList = m_MergeRange.isInRange(i); } if(!inMergeList){ //add it. newVals.addElement(att.value(i)); } } newVals.addElement(m_Label); Attribute temp = new Attribute(att.name(), newVals); Instance newInstance = (Instance)instance.copy(); if (!newInstance.isMissing(m_AttIndex.getIndex())) { String currValue = newInstance.stringValue(m_AttIndex.getIndex()); if(temp.indexOfValue(currValue) == -1) newInstance.setValue(m_AttIndex.getIndex(), temp.indexOfValue(m_Label)); else newInstance.setValue(m_AttIndex.getIndex(), temp.indexOfValue(currValue)); } push(newInstance); return true; }
Example 12
Source File: NaiveDTW.java From tsml with GNU General Public License v3.0 | 4 votes |
@Override public void buildClassifier(Instances data) throws Exception { // Initialise training dataset Attribute classAttribute = data.classAttribute(); classedData = new HashMap<>(); classedDataIndices = new HashMap<>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>()); classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>()); } train = new SymbolicSequence[data.numInstances()]; classMap = new String[train.length]; maxLength = 0; for (int i = 0; i < train.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; maxLength = Math.max(maxLength, sequence.length); int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } train[i] = new SymbolicSequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(train[i]); classedDataIndices.get(clas).add(i); } warpingMatrix = new double[maxLength][maxLength]; U = new double[maxLength]; L = new double[maxLength]; maxWindow = Math.round(1 * maxLength); searchResults = new String[maxWindow+1]; nns = new int[maxWindow+1][train.length]; dist = new double[maxWindow+1][train.length]; // Start searching for the best window searchBestWarpingWindow(); // Saving best windows found System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore)); }
Example 13
Source File: WindowSearcher.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * This is similar to buildClassifier but it is an estimate. * This is used for large dataset where it takes very long to run. * The main purpose of this is to get the run time and not actually search for the best window. * We use this to draw Figure 1 of our SDM18 paper * * @param data * @param estimate * @throws Exception */ public void buildClassifierEstimate(Instances data, int estimate) throws Exception { // Initialise training dataset Attribute classAttribute = data.classAttribute(); classedData = new HashMap <>(); classedDataIndices = new HashMap <>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList <SymbolicSequence>()); classedDataIndices.put(data.classAttribute().value(c), new ArrayList <Integer>()); } train = new SymbolicSequence[data.numInstances()]; classMap = new String[train.length]; maxLength = 0; for (int i = 0; i < train.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; maxLength = Math.max(maxLength, sequence.length); int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } train[i] = new SymbolicSequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(train[i]); classedDataIndices.get(clas).add(i); } warpingMatrix = new double[maxLength][maxLength]; U = new double[maxLength]; L = new double[maxLength]; maxWindow = Math.round(1 * maxLength); searchResults = new String[maxWindow + 1]; nns = new int[maxWindow + 1][train.length]; dist = new double[maxWindow + 1][train.length]; int[] nErrors = new int[maxWindow + 1]; double[] score = new double[maxWindow + 1]; double bestScore = Double.MAX_VALUE; double minD; bestWarpingWindow = -1; // Start searching for the best window. // Only loop through a given size of the dataset, but still search for NN from the whole train // for every sequence in train, we find NN for all window // then in the end, update the best score for (int i = 0; i < estimate; i++) { SymbolicSequence testSeq = train[i]; for (int w = 0; w <= maxWindow; w++) { testSeq.LB_KeoghFillUL(w, U, L); minD = Double.MAX_VALUE; String classValue = null; for (int j = 0; j < train.length; j++) { if (i == j) continue; SymbolicSequence trainSeq = train[j]; if (SymbolicSequence.LB_KeoghPreFilled(trainSeq, U, L) < minD) { double tmpD = testSeq.DTW(trainSeq, w, warpingMatrix); if (tmpD < minD) { minD = tmpD; classValue = classMap[j]; nns[w][i] = j; } dist[w][j] = tmpD * tmpD; } } if (classValue == null || !classValue.equals(classMap[i])) { nErrors[w]++; } score[w] = 1.0 * nErrors[w] / train.length; } } for (int w = 0; w < maxWindow; w++) { if (score[w] < bestScore) { bestScore = score[w]; bestWarpingWindow = w; } } // Saving best windows found System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1 - bestScore)); }
Example 14
Source File: WindowSearcher.java From tsml with GNU General Public License v3.0 | 4 votes |
@Override public void buildClassifier(Instances data) throws Exception { // Initialise training dataset Attribute classAttribute = data.classAttribute(); classedData = new HashMap <>(); classedDataIndices = new HashMap <>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList <SymbolicSequence>()); classedDataIndices.put(data.classAttribute().value(c), new ArrayList <Integer>()); } train = new SymbolicSequence[data.numInstances()]; classMap = new String[train.length]; maxLength = 0; for (int i = 0; i < train.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; maxLength = Math.max(maxLength, sequence.length); int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } train[i] = new SymbolicSequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(train[i]); classedDataIndices.get(clas).add(i); } warpingMatrix = new double[maxLength][maxLength]; U = new double[maxLength]; L = new double[maxLength]; maxWindow = Math.round(1 * maxLength); nns = new int[maxWindow + 1][train.length]; dist = new double[maxWindow + 1][train.length]; // Start searching for the best window searchBestWarpingWindow(); // if we are doing length, find the best window in percentage if (bestWindowPercent < 0) bestWindowPercent = lengthToPercent(bestWarpingWindow); // Saving best windows found System.out.println("Windows found=" + bestWarpingWindow + "(" + bestWindowPercent + ") Best Acc=" + (1 - bestScore)); }
Example 15
Source File: UCRSuitePrunedDTW.java From tsml with GNU General Public License v3.0 | 4 votes |
@Override public void buildClassifier(Instances data) throws Exception { // Initialise training dataset Attribute classAttribute = data.classAttribute(); classedData = new HashMap<>(); classedDataIndices = new HashMap<>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>()); classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>()); } train = new SymbolicSequence[data.numInstances()]; classMap = new String[train.length]; maxLength = 0; for (int i = 0; i < train.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; maxLength = Math.max(maxLength, sequence.length); int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } train[i] = new SymbolicSequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(train[i]); classedDataIndices.get(clas).add(i); } warpingMatrix = new double[maxLength][maxLength]; U = new double[maxLength]; L = new double[maxLength]; U1 = new double[maxLength]; L1 = new double[maxLength]; maxWindow = Math.round(1 * maxLength); searchResults = new String[maxWindow+1]; nns = new int[maxWindow+1][train.length]; dist = new double[train.length][train.length]; cache = new SequenceStatsCache(train, maxWindow); lazyUCR = new LazyAssessNNEarlyAbandon[train.length][train.length]; for (int i = 0; i < train.length; i++) { for (int j = 0; j < train.length; j++) { lazyUCR[i][j] = new LazyAssessNNEarlyAbandon(cache); } } // Start searching for the best window searchBestWarpingWindow(); // Saving best windows found System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore)); }
Example 16
Source File: Trillion.java From tsml with GNU General Public License v3.0 | 4 votes |
@Override public void buildClassifier(Instances data) throws Exception { // Initialise training dataset Attribute classAttribute = data.classAttribute(); classedData = new HashMap<>(); classedDataIndices = new HashMap<>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>()); classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>()); } train = new SymbolicSequence[data.numInstances()]; classMap = new String[train.length]; maxLength = 0; for (int i = 0; i < train.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; maxLength = Math.max(maxLength, sequence.length); int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } train[i] = new SymbolicSequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(train[i]); classedDataIndices.get(clas).add(i); } U = new double[maxLength]; L = new double[maxLength]; maxWindow = Math.round(1 * maxLength); cache = new SequenceStatsCache(train, maxWindow); int nbErrors = 0; double score; bestScore = Double.MAX_VALUE; bestWarpingWindow=-1; // Start searching for the best window for (int w = 0; w <= maxWindow; w++) { currentW = w; nbErrors = 0; for (int i = 0; i < train.length; i++) { query = train[i]; indexQuery = i; bestMinDist = Double.MAX_VALUE; String classValue = null; for (int j = 0; j < train.length; j++) { if (i==j) continue; reference = train[j]; indexReference = j; // LB Kim doLBKim(); if (minDist < bestMinDist) { minDist = 0; indexStoppedLB = 0; // LB Keogh(Q,R) doLBKeoghQR(bestMinDist); if (minDist < bestMinDist) { minDist = 0; indexStoppedLB = 0; // LB Keogh(R,Q) doLBKeoghRQ(bestMinDist); if (minDist < bestMinDist) { // DTW double res = query.DTW(reference, currentW); minDist = res * res; if(minDist < bestMinDist){ bestMinDist = minDist; classValue = classMap[j]; } } } } } if (classValue == null || !classValue.equals(classMap[i])) { nbErrors++; } } score = 1.0 * nbErrors / train.length; if (score < bestScore) { bestScore = score; bestWarpingWindow = w; } } // Saving best windows found System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore)); }
Example 17
Source File: LbKeoghPrunedDTW.java From tsml with GNU General Public License v3.0 | 4 votes |
@Override public void buildClassifier(Instances data) throws Exception { // Initialise training dataset Attribute classAttribute = data.classAttribute(); classedData = new HashMap<>(); classedDataIndices = new HashMap<>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList<SymbolicSequence>()); classedDataIndices.put(data.classAttribute().value(c), new ArrayList<Integer>()); } train = new SymbolicSequence[data.numInstances()]; classMap = new String[train.length]; maxLength = 0; for (int i = 0; i < train.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; maxLength = Math.max(maxLength, sequence.length); int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } train[i] = new SymbolicSequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(train[i]); classedDataIndices.get(clas).add(i); } warpingMatrix = new double[maxLength][maxLength]; U = new double[maxLength]; L = new double[maxLength]; maxWindow = Math.round(1 * maxLength); searchResults = new String[maxWindow+1]; nns = new int[maxWindow+1][train.length]; dist = new double[train.length][train.length]; // Start searching for the best window searchBestWarpingWindow(); // Saving best windows found System.out.println("Windows found=" + bestWarpingWindow + " Best Acc=" + (1-bestScore)); }
Example 18
Source File: MauiFilter.java From maui-2 with GNU General Public License v3.0 | 3 votes |
private void selectCandidates() throws Exception { if (debugMode) { System.err.println("--- Computing candidates..."); } allCandidates = new HashMap<Instance, HashMap<String, Candidate>>(); // Convert pending input instances into data for classifier int totalDocuments = getInputFormat().numInstances(); if(debugMode){ System.err.println("--- totalDocuments: " + totalDocuments); } for (int i = 0; i < totalDocuments; i++) { Instance current = getInputFormat().instance(i); String fileName = current.stringValue(fileNameAtt); int j = i+1; if (debugMode) { System.err.println("---- Processing document " + fileName + ", " + j + " out of " + totalDocuments + "..."); } // Get the phrases for the document String documentText = current.stringValue(documentAtt); HashMap<String, Candidate> candidateList = getCandidates(documentText); if (debugMode) { System.err.println("---- " + candidateList.size() + " candidates"); } allCandidates.put(current, candidateList); } }
Example 19
Source File: ArffLexiconWordLabeller.java From AffectiveTweets with GNU General Public License v3.0 | 2 votes |
/** * Processes all the dictionary files. * @throws IOException an IOException will be raised if an invalid file is supplied */ public void processDict() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile)); Instances lexInstances=new Instances(reader); // set upper value for word index lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1); // checks all numeric and nominal attributes and discards the word attribute for(int i=0;i<lexInstances.numAttributes();i++){ if(i!=this.lexiconWordIndex.getIndex()){ if(lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal() ){ this.attributes.add(lexInstances.attribute(i)); } } } // Maps all words with their affective scores discarding missing values for(Instance inst:lexInstances){ if(inst.attribute(this.lexiconWordIndex.getIndex()).isString()){ String word=inst.stringValue(this.lexiconWordIndex.getIndex()); // stems the word word=this.m_stemmer.stem(word); // map numeric scores if(!attributes.isEmpty()){ Map<Attribute,Double> wordVals=new HashMap<Attribute,Double>(); for(Attribute na:attributes){ wordVals.put(na,inst.value(na)); } this.attValMap.put(word, wordVals); } } } }
Example 20
Source File: DistantSupervisionSyntheticFilter.java From AffectiveTweets with GNU General Public License v3.0 | 2 votes |
/** * Maps tweets from the second batch into instances that are compatible with the ones generated * @param inp input Instances * @return convertes Instances */ public Instances mapTargetInstance(Instances inp){ // Creates instances with the same format Instances result=getOutputFormat(); Attribute contentAtt=inp.attribute(this.m_textIndex.getIndex()); for(Instance inst:inp){ String content=inst.stringValue(contentAtt); // tokenizes the content List<String> tokens = affective.core.Utils.tokenize(content, this.toLowerCase, this.standarizeUrlsUsers, this.reduceRepeatedLetters, this.m_tokenizer,this.m_stemmer,this.m_stopwordsHandler); // Identifies the distinct terms AbstractObjectSet<String> terms=new ObjectOpenHashSet<String>(); terms.addAll(tokens); Object2IntMap<String> docVec=this.calculateDocVec(tokens); double[] values = new double[result.numAttributes()]; values[result.classIndex()]= inst.classValue(); for(String att:docVec.keySet()){ if(this.m_Dictionary.containsKey(att)){ int attIndex=this.m_Dictionary.getInt(att); // we normalise the value by the number of documents values[attIndex]=docVec.getInt(att); } } Instance outInst=new SparseInstance(1, values); inst.setDataset(result); result.add(outInst); } return result; }