it.unimi.dsi.fastutil.objects.ObjectOpenHashSet Java Examples

The following examples show how to use it.unimi.dsi.fastutil.objects.ObjectOpenHashSet. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultipleBlendingRetriever.java    From samantha with MIT License 6 votes vote down vote up
public RetrievedResult retrieve(RequestContext requestContext) {
    ObjectSet<String> items = new ObjectOpenHashSet<>();
    List<ObjectNode> entities = new ArrayList<>(maxHits);
    for (Retriever retriever : retrievers) {
        long start = System.currentTimeMillis();
        RetrievedResult results = retriever.retrieve(requestContext);
        Logger.debug("{} time: {}", retriever, System.currentTimeMillis() - start);
        List<ObjectNode> initial = results.getEntityList();
        initial = ExpanderUtilities.expand(initial, expanders, requestContext);
        for (ObjectNode entity : initial) {
            String item = FeatureExtractorUtilities.composeConcatenatedKey(entity, itemAttrs);
            if (!items.contains(item)) {
                items.add(item);
                entities.add(entity);
                if (maxHits != null && entities.size() >= maxHits) {
                    return new RetrievedResult(entities, maxHits);
                }
            }
        }
    }
    return new RetrievedResult(entities, entities.size());
}
 
Example #2
Source File: KnnModelTrigger.java    From samantha with MIT License 6 votes vote down vote up
public List<ObjectNode> getTriggeredFeaturesWithoutScore(List<ObjectNode> bases) {
    ObjectSet<String> items = new ObjectOpenHashSet<>();
    for (ObjectNode inter : bases) {
        double weight = 1.0;
        if (inter.has(weightAttr)) {
            weight = inter.get(weightAttr).asDouble();
        }
        String key = FeatureExtractorUtilities.composeConcatenatedKey(inter, feaAttrs);
        if (weight >= 0.5 && featureKnnModel != null) {
            getNeighbors(items, featureKnnModel, key);
        }
        if (weight < 0.5 && featureKdnModel != null) {
            getNeighbors(items, featureKdnModel, key);
        }
    }
    List<ObjectNode> results = new ArrayList<>();
    for (String item : items) {
        ObjectNode entity = Json.newObject();
        Map<String, String> attrVals = FeatureExtractorUtilities.decomposeKey(item);
        for (Map.Entry<String, String> ent : attrVals.entrySet()) {
            entity.put(ent.getKey(), ent.getValue());
        }
        results.add(entity);
    }
    return results;
}
 
Example #3
Source File: ObjDictionaryMinimization.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Minimize only the objects that are considered to have "non-frequent patterns"
 * @param obj: the object phrase
 * @param sg: semantic graph of the sentence
 * @param freqObjs: dictionary of multi-word expressions (frequent objects)
 */
public static void minimizeObject(AnnotatedPhrase obj, SemanticGraph sg, ObjectOpenHashSet<String> collocations){
    // Do the safe minimization first
    ObjSafeMinimization.minimizeObject(obj, sg);
    
    // If the object is frequent, don't minimize anything
    if (collocations.contains(CoreNLPUtils.listOfWordsToLemmaString(obj.getWordList()).toLowerCase())){
        return;
    }
    
    // Minimization object
    Minimization simp = new Minimization(obj, sg, collocations);
    
    // remWords: list of words to be removed (reusable variable)
    // matchWords: list of matched words from the regex (reusable variable)
    List<CoreMap> remWords = new ArrayList<>();
    List<CoreMap> matchWords = new ArrayList<>(); 
    
    // Safe minimization on the noun phrases and named entities within the subj. phrase
    simp.nounPhraseDictMinimization(remWords, matchWords);
    simp.namedEntityDictionaryMinimization(remWords, matchWords);
}
 
Example #4
Source File: SubjDictionaryMinimization.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
public static void minimizeSubject(AnnotatedPhrase subject, SemanticGraph sg, ObjectOpenHashSet<String> collocations){
    // Do the safe minimization first
    SubjSafeMinimization.minimizeSubject(subject, sg);
    
    // If the subject is frequent, don't minimize anything
    if (collocations.contains(CoreNLPUtils.listOfWordsToLemmaString(subject.getWordList()).toLowerCase())){
        return;
    }
    
    // Minimization object
    Minimization simp = new Minimization(subject, sg, collocations);
    
    // remWords: list of words to be removed (reusable variable)
    // matchWords: list of matched words from the regex (reusable variable)
    List<CoreMap> remWords = new ArrayList<>();
    List<CoreMap> matchWords = new ArrayList<>(); 
    
    // Safe minimization on the noun phrases and named entities within the subj. phrase
    simp.nounPhraseDictMinimization(remWords, matchWords);
    simp.removeVerbsBeforeNouns(remWords, matchWords);
    simp.namedEntityDictionaryMinimization(remWords, matchWords);
}
 
Example #5
Source File: RelDictionaryMinimization.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Minimize only the relations that are considered to have "non-frequent patterns"
 * @param rel: the relation phrase
 * @param sg: semantic graph of the sentence
 * @param freqRels: dictionary of multi-word expressions (frequent relations)
 */
public static void minimizeRelation(AnnotatedPhrase rel, SemanticGraph sg, ObjectOpenHashSet<String> collocations){
    // Do the safe minimization first
    RelSafeMinimization.minimizeRelation(rel, sg);
    
    // If the subject is frequent, don't minimize anything
    if (collocations.contains(CoreNLPUtils.listOfWordsToLemmaString(rel.getWordList()).toLowerCase())){
        return;
    }
    
    // Do the safe minimization first
    RelSafeMinimization.minimizeRelation(rel, sg);
    
    // remWords: list of words to be removed (reusable variable)
    // matchWords: list of matched words from the regex (reusable variable)
    List<CoreMap> remWords = new ArrayList<>();
    List<CoreMap> matchWords = new ArrayList<>(); 
    
    // Move to the dict. minimization of the noun phrases within the relation
    Minimization simp = new Minimization(rel, sg, collocations);
    simp.nounPhraseDictMinimization(remWords, matchWords);
    simp.namedEntityDictionaryMinimization(remWords, matchWords);
}
 
Example #6
Source File: AnnotatedPhrase.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Parametric constructor: given a list of indexed words and the root of the phrase, create annotated phrase with 
 * empty quantities list.
 * @param wList: list of words for the phrase
 * @param root: the root of the phrase
 */
public AnnotatedPhrase(ObjectArrayList<IndexedWord> wList, IndexedWord root) {
    super(wList, root);
    this.quantities = new ObjectArrayList<>();
    this.droppedEdges = new ObjectOpenHashSet<>();
    this.droppedWords = new ObjectOpenHashSet<>();
}
 
Example #7
Source File: Minimization.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Given a list of matched core maps (a phrase) and a list of words which are candidates for dropping ('remWords'), 
 * check if some of them form sub-constituents of 'matchCoreMaps' which are found in the dictionary.
 * If there are, remove them from 'remWords'. The words left in 'remWords' are the ones that couldn't be matched
 * with a sub-constituent found in the dictionary, i.e. those are the ones that we drop.
 * @param matchCoreMaps: list of words as a list of CoreMap object (a phrase)
 * @param remWords: list of candidates to be dropped (each word in 'remWord' can also be found in 'matchCoreMaps')
 */
public void dropWordsNotFoundInDict(List<CoreMap> matchCoreMaps, List<CoreMap> remWords){
    // Get all the sub-constituents
    ObjectArrayList<IndexedWord> words = CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(matchCoreMaps);
    SubConstituent sc = new SubConstituent(this.sg, CoreNLPUtils.getRootFromWordList(this.sg, words), words);
    sc.generateSubConstituentsFromLeft();
    ObjectOpenHashSet<String> subconstituents = sc.getStringSubConstituents();
    
    // Sub-constituents' strings found in the dictionary
    ObjectArrayList<String> scStringsInDict = new ObjectArrayList<>();
    for (String s: subconstituents){
        if (this.mwe.contains(s)){
            scStringsInDict.add(s);
        }
    }
    
    // If sub-constituents's strings are found in the dictionary, detect the words associated with them
    // and remove them.
    if (scStringsInDict.size() > 0){
        Iterator<CoreMap> iter = remWords.iterator();
        for (String stInDict: scStringsInDict){
            while (iter.hasNext()){   
                CoreMap cm = iter.next();
                CoreLabel cl = new CoreLabel(cm);
                if (stInDict.contains(cl.lemma().toLowerCase())){
                    iter.remove();
                }
            }
        }
    }
    
    // Drop the words not found in frequent/collocation sub-constituents
    this.dropWords(remWords, matchCoreMaps);
}
 
Example #8
Source File: AnnotatedPhrase.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Parametric constructor: given a list of indexed words, create annotated phrase with empty quantities list
 * @param wList: list of indexed words for the phrase
 */
public AnnotatedPhrase(ObjectArrayList<IndexedWord> wList) {
    super(wList);
    this.quantities = new ObjectArrayList<>();
    this.droppedEdges = new ObjectOpenHashSet<>();
    this.droppedWords = new ObjectOpenHashSet<>();
}
 
Example #9
Source File: AnnotatedPhrase.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Parametric constructor 
 * @param p: the phrase to be annotated
 * @param q: the quantities for phrase 'p'
 */
public AnnotatedPhrase(Phrase p, ObjectArrayList<Quantity> q){
    super(p);
    this.quantities = q;
    this.droppedEdges = new ObjectOpenHashSet<>();
    this.droppedWords = new ObjectOpenHashSet<>();
}
 
Example #10
Source File: AnnotatedPhrase.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/** Default constructor **/
public AnnotatedPhrase(){
    super();
    this.quantities = new ObjectArrayList<>();
    this.droppedEdges = new ObjectOpenHashSet<>();
    this.droppedWords = new ObjectOpenHashSet<>();
}
 
Example #11
Source File: SubConstituent.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/** Initialize an object with semantic graph only. Everything else is empty. **/
public SubConstituent(SemanticGraph sentenceSg){
    this.sg = sentenceSg;
    this.phraseRoot = new IndexedWord();
    this.phraseWords = new ObjectArrayList<>();
    this.chainedCandidates = new ObjectOpenHashSet<>();
    this.subTreeCandidates = new ObjectOpenHashSet<>();
    this.subConstituents = new ObjectOpenHashSet<>();
    this.stSubconstituents = new ObjectOpenHashSet<>();
    this.siblingCandidates = new ObjectOpenHashSet<>();
}
 
Example #12
Source File: SubConstituent.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/** Default constructor (empty elements) **/
public SubConstituent(){
    this.sg = new SemanticGraph();
    this.phraseRoot = new IndexedWord();
    this.phraseWords = new ObjectArrayList<>();
    this.chainedCandidates = new ObjectOpenHashSet<>();
    this.subTreeCandidates = new ObjectOpenHashSet<>();
    this.subConstituents = new ObjectOpenHashSet<>();
    this.stSubconstituents = new ObjectOpenHashSet<>();
    this.siblingCandidates = new ObjectOpenHashSet<>();
}
 
Example #13
Source File: DVAAlgorithm.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public void execute() throws AlgorithmExecutionException {

    ////////////////////////////////////////////
    // THE DISCOVERY ALGORITHM LIVES HERE :-) //
    ////////////////////////////////////////////
    // initialisation

    input = this.inputGenerator.generateNewCopy();
    this.relationName = input.relationName();
    this.columnNames = input.columnNames();
    Columns=new ArrayList<>();
    for (int i = 0; i < columnNames.size(); i++)
      Columns.add(new ObjectOpenHashSet<String>());
   
      //pass over the data
    while (input.hasNext()) {
      List<String> CurrentTuple=input.next();
      // pass for each column
      for (int i = 0; i < columnNames.size(); i++)
      {String currentvalue=CurrentTuple.get(i);
        if(currentvalue!=null && !currentvalue.trim().isEmpty())
            Columns.get(i).add(CurrentTuple.get(i).trim());
      }
      }
    
    // add the statistic for that column
    for (int i = 0; i < columnNames.size(); i++)
    addStatistic(NUMBEROFDISTINCT, Columns.get(i).size(), columnNames.get(i), relationName);   
  }
 
Example #14
Source File: SubConstituent.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/** Parametric constructor **/
public SubConstituent(SemanticGraph sentenceSg, IndexedWord pRoot, ObjectArrayList<IndexedWord> pWords){
    this.sg = sentenceSg;
    this.phraseRoot = pRoot;
    this.phraseWords = pWords;
    // The rest of the elements should be empty (there are functions for generating the candidate sub-constituents)
    this.chainedCandidates = new ObjectOpenHashSet<>();
    this.subTreeCandidates = new ObjectOpenHashSet<>();
    this.subConstituents = new ObjectOpenHashSet<>();
    this.stSubconstituents = new ObjectOpenHashSet<>();
    this.siblingCandidates = new ObjectOpenHashSet<>();
}
 
Example #15
Source File: URLRespectsRobotsTest.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
@Test
public void testPrefixesDeep() {
	Set<String> inset = new ObjectOpenHashSet<>();
	Set<String> pfset = new ObjectOpenHashSet<>();
	Random rand = new Random(0);
	for (int i = 100; i < 999; i++) {
		if (rand.nextDouble() < 0.3) {
			String commonPref = String.valueOf(i);
			boolean putPrefix = rand.nextDouble() < 0.9;
			if (putPrefix) {
				pfset.add(commonPref);
				inset.add(commonPref);
			}
			for (int j = 100; j < 450; j++) {
				if (rand.nextDouble() < 0.3) {
					inset.add(commonPref + j);
					if (! putPrefix) pfset.add(commonPref + j);
				}
			}
		}
	}
	char[][] resultArray = URLRespectsRobots.toSortedPrefixFreeCharArrays(inset);
	Set<String> result = new ObjectOpenHashSet<>();
	for (char[] a: resultArray) result.add(new String(a));
	assertEquals(result, pfset);


}
 
Example #16
Source File: CollectionUtils.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public static boolean removeIntersectionFrom(Set<String> first, Set<String> second) {
	// TODO: test: Set<String> intersection = Sets.intersection(first, second);
	
	Set<String> intersection = new ObjectOpenHashSet<String>(first);
	intersection.retainAll(second);
	
	first.removeAll(intersection);
	second.removeAll(intersection);
	
	return !intersection.isEmpty();
}
 
Example #17
Source File: HostEndsWithOneOf.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
/**
 * Compare this object with a given generic one
 *
 * @param x the object to be compared
 * @return <code>true</code> if <code>x</code> is an instance of <code>HostEndsWithOneOf</code> and the suffixes allowed by <code>x</code> are allowed by this and vice versa
 */
@Override
public boolean equals(Object x) {
	if (x instanceof HostEndsWithOneOf) {
		Set<String> suffixSet = new ObjectOpenHashSet<>(suffixes);
		Set<String> xSuffixSet = new ObjectOpenHashSet<>(((HostEndsWithOneOf)x).suffixes);
		return suffixSet.equals(xSuffixSet);
	}
	else return false;
}
 
Example #18
Source File: AnnotatedPhrase.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Parametric constructor: given a list of indexed words and semantic graph, create annotated phrase, with empty
 * quantities list
 * @param wList: the list of words for the phrase
 * @param sg: the semantic graph of the phrase (should be the sentence subgraph)
 */
public AnnotatedPhrase(ObjectArrayList<IndexedWord> wList, SemanticGraph sg){
    super(wList, sg);
    this.quantities = new ObjectArrayList<>();
    this.droppedEdges = new ObjectOpenHashSet<>();
    this.droppedWords = new ObjectOpenHashSet<>();
}
 
Example #19
Source File: AnnotatedPhrase.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Parametric constructor: given a phrase as a parameter, set it as a 'phrase', and make an empty quantities list
 * @param p: the phrase to be initialized
 */
public AnnotatedPhrase(Phrase p){
    super(p);
    this.quantities = new ObjectArrayList<>();
    this.droppedEdges = new ObjectOpenHashSet<>();
    this.droppedWords = new ObjectOpenHashSet<>();
}
 
Example #20
Source File: ObjSafeMinimization.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Minimize only the objects that are considered to have "safe patterns"
 * @param object: the objects phrase
 * @param sg: the semantic graph of the whole sentence
 */
public static void minimizeObject(AnnotatedPhrase object, SemanticGraph sg){
    Minimization simp = new Minimization(object, sg, new ObjectOpenHashSet<String>());
    
    // remWords: list of words to be removed (reusable variable)
    // matchWords: list of matched words from the regex (reusable variable)
    List<CoreMap> remWords = new ArrayList<>();
    List<CoreMap> matchWords = new ArrayList<>(); 
    
    // Safe minimization on the noun phrases and named entities
    simp.nounPhraseSafeMinimization(remWords, matchWords);
    simp.namedEntitySafeMinimization(remWords, matchWords);
}
 
Example #21
Source File: Minimization.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/** When phrase and semantic graph are given, initialize those, but the rest are empty fields **/
public Minimization(AnnotatedPhrase phrase, SemanticGraph sg, ObjectOpenHashSet<String> mwe) {
    this.tPattern = null;
    this.tMatcher = null;
    this.phrase = phrase;
    this.sg = sg;
    this.mwe = mwe;
}
 
Example #22
Source File: Minimization.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/** Default constructor **/
public Minimization(){
    this.sg = null;
    this.phrase = null;
    this.mwe = new ObjectOpenHashSet<String>();
    this.tPattern = null;
    this.tMatcher = null;
}
 
Example #23
Source File: Modality.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/** Default constructor. Assumes 'certainty' modality type, creates empty lists of poss/cert words and edges **/
public Modality(){
    this.modalityType = Modality.Type.CERTAINTY;
    this.possibilityEdges = new ObjectOpenHashSet<SemanticGraphEdge>();
    this.possibilityWords = new ObjectOpenHashSet<IndexedWord>();
    this.certaintyWords = new ObjectOpenHashSet<IndexedWord>();
    this.certaintyEdges = new ObjectOpenHashSet<SemanticGraphEdge>();
}
 
Example #24
Source File: Modality.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Given the modality type, the list of edges and words are empty lists 
 * @param t: Modality type
 */
public Modality(Modality.Type t){
    this.modalityType = t;
    this.possibilityEdges = new ObjectOpenHashSet<SemanticGraphEdge>();
    this.possibilityWords = new ObjectOpenHashSet<IndexedWord>();
    this.certaintyWords = new ObjectOpenHashSet<IndexedWord>();
    this.certaintyEdges = new ObjectOpenHashSet<SemanticGraphEdge>();
}
 
Example #25
Source File: Modality.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Constructor with given the modality type, list of possibility words and possibility edges. The certainty 
 * lists of words and edges are empty. 
 * @param t: modality type
 * @param possWords: possibility words
 * @param possEdges: possibility edges
 */
public Modality(Modality.Type t, ObjectOpenHashSet<IndexedWord> possWords, 
        ObjectOpenHashSet<SemanticGraphEdge> possEdges){
    this.modalityType = t;
    this.possibilityWords = possWords;
    this.possibilityEdges = possEdges;
    this.certaintyWords = new ObjectOpenHashSet<IndexedWord>();
    this.certaintyEdges = new ObjectOpenHashSet<SemanticGraphEdge>();
}
 
Example #26
Source File: SubjSafeMinimization.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Minimize only the subjects that are considered to have "safe patterns"
 * @param subject: the subject phrase
 * @param sg: the semantic graph of the whole sentence
 */
public static void minimizeSubject(AnnotatedPhrase subject, SemanticGraph sg){
    Minimization simp = new Minimization(subject, sg, new ObjectOpenHashSet<String>());
    
    // remWords: list of words to be removed (reusable variable)
    // matchWords: list of matched words from the regex (reusable variable)
    List<CoreMap> remWords = new ArrayList<>();
    List<CoreMap> matchWords = new ArrayList<>(); 
    
    // Safe minimization on the noun phrases and named entities
    simp.nounPhraseSafeMinimization(remWords, matchWords);
    simp.namedEntitySafeMinimization(remWords, matchWords);
}
 
Example #27
Source File: MinIE.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * @param sentence - input sentence
 * @param sg - dependency parse graph of the sentence
 * @param mode - the minimization mode
 * @param d - dictionary of multi-word expressions (for MinIE-D)
 */
public MinIE(String sentence, SemanticGraph sg, Mode mode, Dictionary dict) {
    this.propositions = new ObjectArrayList<AnnotatedProposition>();
    this.sentenceSemGraph = new SemanticGraph();
    this.sentence = new ObjectArrayList<>();
    this.propsWithAttribution = new ObjectOpenHashSet<>();
    
    this.minimize(sentence, sg, mode, dict);
}
 
Example #28
Source File: MinIE.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * @param sentence - input sentence
 * @param parser - dependency parse pipeline of the sentence
 * @param mode - the minimization mode
 * 
 * NOTE: If mode is MinIE-D, then this will proceed as MinIE-D but with empty dictionary 
 * (i.e. will drop every word that is a candidate)
 */
public MinIE(String sentence, StanfordCoreNLP parser, Mode mode) {
    this.propositions = new ObjectArrayList<AnnotatedProposition>();
    this.sentenceSemGraph = new SemanticGraph();
    this.sentence = new ObjectArrayList<>();
    this.propsWithAttribution = new ObjectOpenHashSet<>();
    
    this.minimize(sentence, parser, mode, new Dictionary());
}
 
Example #29
Source File: MinIE.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * @param sentence - input sentence
 * @param parser - dependency parse pipeline of the sentence
 * @param mode - the minimization mode
 * @param d - dictionary of multi-word expressions (for MinIE-D)
 */
public MinIE(String sentence, StanfordCoreNLP parser, Mode mode, Dictionary d) {
    // Initializations
    this.propositions = new ObjectArrayList<AnnotatedProposition>();
    this.sentenceSemGraph = new SemanticGraph();
    this.sentence = new ObjectArrayList<>();
    this.propsWithAttribution = new ObjectOpenHashSet<>();
    
    this.minimize(sentence, parser, mode, d);
}
 
Example #30
Source File: MinIE.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/** Default constructor **/
public MinIE(){
    this.propositions = new ObjectArrayList<AnnotatedProposition>();
    this.sentenceSemGraph = new SemanticGraph();
    this.sentence = new ObjectArrayList<>();
    this.propsWithAttribution = new ObjectOpenHashSet<>();
}