Java Code Examples for weka.core.Instance#replaceMissingValues()
The following examples show how to use
weka.core.Instance#replaceMissingValues() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Model.java From AIDR with GNU Affero General Public License v3.0 | 6 votes |
Instance wordsToInstance(WordSet words) { Instance item = new SparseInstance( attributeSpecification.numAttributes()); item.setDataset(attributeSpecification); // Words for (String word : words.getWords()) { Attribute attribute = attributeSpecification.attribute(word); if (attribute != null) { item.setValue(attribute, 1); } } item.replaceMissingValues(missingVal); return item; }
Example 2
Source File: NNge.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Performs the update of the classifier * * @param instance the new instance * @throws Exception if the update fails */ private void update(Instance instance) throws Exception { if (instance.classIsMissing()) { return; } instance.replaceMissingValues(m_MissingVector); m_Train.add(instance); /* Update the minimum and maximum for all the attributes */ updateMinMax(instance); /* update the mutual information datas */ updateMI(instance); /* Nearest Exemplar */ Exemplar nearest = nearestExemplar(instance); /* Adjust */ if(nearest == null){ Exemplar newEx = new Exemplar(this, m_Train, 10, instance.classValue()); newEx.generalise(instance); initWeight(newEx); addExemplar(newEx); return; } adjust(instance, nearest); /* Generalise */ generalise(instance); }
Example 3
Source File: NutchOnlineClassifier.java From anthelion with Apache License 2.0 | 5 votes |
/** * Converts an {@link AnthURL} into an {@link Instance} which can be handled * by the {@link Classifier}. * * @param url * the {@link AnthURL} which should be transformed/converted. * @return the resulting {@link Instance}. */ private static Instance convert(AnthURL url) { if (url != null) { Instance inst = new SparseInstance(dimension); inst.replaceMissingValues(replaceMissingValues); inst.setDataset(instances); inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem")); inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("domain"), url.uri.getHost()); Set<String> tokens = new HashSet<String>(); tokens.addAll(tokenizer(url.uri.getPath())); tokens.addAll(tokenizer(url.uri.getQuery())); tokens.addAll(tokenizer(url.uri.getFragment())); for (String tok : tokens) { inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1); } return inst; } else { System.out.println("Input AnthURL for convertion into instance was null."); return null; } }
Example 4
Source File: NutchOnlineClassifier.java From anthelion with Apache License 2.0 | 5 votes |
/** * Converts an {@link AnthURL} into an {@link Instance} which can be handled * by the {@link Classifier}. * * @param url * the {@link AnthURL} which should be transformed/converted. * @return the resulting {@link Instance}. */ private static Instance convert(AnthURL url) { if (url != null) { Instance inst = new SparseInstance(dimension); inst.replaceMissingValues(replaceMissingValues); inst.setDataset(instances); inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem")); inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("domain"), url.uri.getHost()); Set<String> tokens = new HashSet<String>(); tokens.addAll(tokenizer(url.uri.getPath())); tokens.addAll(tokenizer(url.uri.getQuery())); tokens.addAll(tokenizer(url.uri.getFragment())); for (String tok : tokens) { inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1); } return inst; } else { System.out.println("Input AnthURL for convertion into instance was null."); return null; } }
Example 5
Source File: ReduceDimensionFilter.java From anthelion with Apache License 2.0 | 4 votes |
/** * Returns the next instances based on the configuration of this class. */ public Instance nextInstance() { Instance inst = this.inputStream.nextInstance(); Instance newInst = new SparseInstance(hashSize + notHashableAttributes.size()); newInst.setDataset(newInstances); newInst.replaceMissingValues(replacementArray); if (newInstances.size() > 0) newInstances.remove(0); // newInstances.add(0, newInst); for (int i = 0; i < inst.numAttributes(); i++) { if (inst.classIndex() == i) { newInst.setValue( attributesIndex.get(inst.classAttribute().name()), inst.classValue()); } else { // check if attributes should be manipulated if (ignoreAttributes.contains(i)) { inst.setValue(i, 0); } if (makeBinaryAttributes.contains(i) && inst.value(i) > 0) { inst.setValue(i, 1); } // check what should be done with the attributes. if (notHashableAttributes.contains(i)) { newInst.setValue( attributesIndex.get(inst.attribute(i).name()), inst.value(i)); } else { // calculate the hash of the attribute name which is // included in // the vector and set it to 1 if (inst.value(i) > 0) { newInst.setValue(attributesIndex .get(getAttributeNameOfHash(getHash(inst .attribute(i).name(), hashSize))), 1); } } } } // System.out.println(newInst.toString()); return newInst; }
Example 6
Source File: AnthOnlineClassifier.java From anthelion with Apache License 2.0 | 4 votes |
/** * Converts an {@link AnthURL} into an {@link Instance} which can be handled * by the {@link Classifier}. * * @param url * the {@link AnthURL} which should be transformed/converted. * @return the resulting {@link Instance}. */ private Instance convert(AnthURL url) { if (url != null) { try { Instance inst = new SparseInstance(dimension); inst.replaceMissingValues(replaceMissingValues); inst.setDataset(instances); inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem")); inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0)); inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0)); inst.setValue(attributesIndex.get("domain"), url.uri.getHost()); Set<String> tokens = new HashSet<String>(); tokens.addAll(tokenizer(url.uri.getPath())); tokens.addAll(tokenizer(url.uri.getQuery())); tokens.addAll(tokenizer(url.uri.getFragment())); for (String tok : tokens) { inst.setValue(attributesIndex .get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1); } return inst; } catch (NullPointerException npe) { System.out .println("Could not convert AnthURL into Instance for classification of URL: " + (url != null ? (url.uri != null ? url.uri .toString() : "URI null") : "AnthURL null.")); return null; } } else { System.out .println("Input AnthURL for convertion into instance was null."); return null; } }
Example 7
Source File: DataStore.java From AIDR with GNU Affero General Public License v3.0 | 4 votes |
static Instances createFormattedInstances(Instances headerSet, ArrayList<String[]> wordVectors, ArrayList<String> labels) throws Exception { if (wordVectors.size() != labels.size()) { throw new Exception(); } // Build a dictionary based on words in the documents, and transform // documents into word vectors HashSet<String> uniqueWords = new HashSet<String>(); for (String[] words : wordVectors) { uniqueWords.addAll(Arrays.asList(words)); } // Create the dataset Instances instances = new Instances(headerSet, wordVectors.size()); double[] missingVal = new double[headerSet.numAttributes()]; // Set class index instances.setClassIndex(headerSet.numAttributes() - 1); Attribute classAttribute = instances.classAttribute(); // Get valid class labels HashSet<String> classValues = new HashSet<String>(); Enumeration<?> classEnum = classAttribute.enumerateValues(); while (classEnum.hasMoreElements()) { classValues.add((String) classEnum.nextElement()); } // Add each document as an instance for (int i = 0; i < wordVectors.size(); i++) { if (!classValues.contains(labels.get(i))) { logger.error("New class label found in evaluation set. Discarding value."); continue; /* * TODO: Handle unseen labels in a better way, as this will * over-estimate classification performance. Adding new values * to class attributes requires recreation of the header and * copying of all data to a new Instances. See: * http://comments.gmane.org/gmane.comp.ai.weka/7806 */ } Instance item = new DenseInstance(instances.numAttributes()); item.setDataset(instances); // Words for (String word : wordVectors.get(i)) { Attribute attribute = instances.attribute(word); if (attribute != null) { item.setValue(attribute, 1); } } item.setValue(classAttribute, labels.get(i)); item.replaceMissingValues(missingVal); instances.add(item); } return instances; }