Java Code Examples for it.unimi.dsi.fastutil.ints.IntArrayList#add()

The following examples show how to use it.unimi.dsi.fastutil.ints.IntArrayList#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: EntityScorer.java    From entity-fishing with Apache License 2.0 6 votes vote down vote up
public ScorerContext context(List<String> words) {
    Multiset<String> counter = TreeMultiset.create();
    counter.addAll(words);

    int word_dim = kb.getEmbeddingsSize();
    // word_vecs is the concatenation of all word vectors of the word list
    float[] word_vecs = new float[counter.size() * word_dim];
    IntArrayList word_counts = new IntArrayList();
    int n_words = 0;

    for(Multiset.Entry<String> entry : counter.entrySet()) {
        short[] vector = kb.getWordEmbeddings(entry.getElement());
        if (vector != null) {
            word_counts.add(entry.getCount());
            for (int i=0; i<kb.getEmbeddingsSize(); i++) {
                word_vecs[n_words * word_dim + i] = vector[i];
            }
            n_words += 1;
        }
    }
    word_counts.trim();

    return create_context(word_vecs, word_counts.elements());
}
 
Example 2
Source File: FunctionalDependency.java    From metanome-algorithms with Apache License 2.0 6 votes vote down vote up
@Override
public String toString() {
	IntArrayList lhsAttributes = new IntArrayList(this.lhs.cardinality());
	for (int attribute = this.lhs.nextSetBit(0); attribute >= 0; attribute = this.lhs.nextSetBit(attribute + 1))
		lhsAttributes.add(attribute);
	IntArrayList rhsAttributes = new IntArrayList(this.rhs.cardinality());
	for (int attribute = this.rhs.nextSetBit(0); attribute >= 0; attribute = this.rhs.nextSetBit(attribute + 1))
		rhsAttributes.add(attribute);
	return "[" + CollectionUtils.concat(lhsAttributes, ",") + "] --> " + CollectionUtils.concat(rhsAttributes, ",") + "\t(" + 
				this.keyScore() + " | " + this.fdScore() + ")\t" +
				this.keyLengthScore() + "\t" +
				this.keyValueScore() + "\t" +
				this.keyPositionScore() + "\t" + 
				this.fdLengthScore() + "\t" +
				this.fdPositionScore() + "\t" +
				this.fdDensityScore();
}
 
Example 3
Source File: FastUtil.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Given a list of lists, return all the combinations between the lists (i.e. their indices). For example, suppose we
 * have the list of lists: [[1, 2, 3], [4, 5], [6, 7, 8]]. Then, this function will return:
 * [[0, 1], [1, 0], [0, 2], [2, 0], [1, 2], [2, 1], 
 *  [0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 1, 0], [2, 0, 1]]
 * @param lists: list of lists
 * @return
 */
public static <T> ObjectArrayList<IntArrayList> getListsCombinationIndices(ObjectArrayList<ObjectArrayList<T>> lists){
    ObjectArrayList<IntArrayList> combinationsInd = new ObjectArrayList<>();
    ObjectArrayList<IntArrayList> result = new ObjectArrayList<>();
    int[][] combinations;
    
    for (int k = 2; k <= lists.size(); k++){
        result.clear();
        combinations = null;
        
        combinations = getCombinations(k, lists.size());
        
        for (int i = 0; i < combinations.length; i++) {
            IntArrayList indices = new IntArrayList();
            for (int j = 0; j < combinations[i].length; j++) {
                indices.add(combinations[i][j]);
            }
            permute(indices, 0, result);
        }
        combinationsInd.addAll(result);
    }
    return combinationsInd;
}
 
Example 4
Source File: CoreNLPUtils.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Given a sequence of words and a pivot-word index, return the chained verbs from the left and from the right
 * of the pivot word.  
 * @param sequence: a sequence of words (list of IndexedWord)
 * @param wordInd: the index of the pivot word
 * @return a list of chained verbs to the left and the right of the pivot word (the pivot word is included)
 */
public static ObjectArrayList<IndexedWord> getChainedVerbs(ObjectArrayList<IndexedWord> sequence, int wordInd){
    IntArrayList chainedVerbsInd = new IntArrayList();
    
    // Get the chained verbs from left and right
    IntArrayList chainedVerbsLeft = getChainedVerbsFromLeft(sequence, chainedVerbsInd.clone(), wordInd);
    IntArrayList chainedVerbsRight = getChainedVerbsFromRight(sequence, chainedVerbsInd.clone(), wordInd);
    
    // Add all the words to the chained verbs
    chainedVerbsInd.addAll(chainedVerbsLeft);
    chainedVerbsInd.add(wordInd);
    chainedVerbsInd.addAll(chainedVerbsRight);
    
    // IndexedWord chained verbs
    ObjectArrayList<IndexedWord> iChainedVerbs = new ObjectArrayList<IndexedWord>();
    for (int i: FastUtil.sort(chainedVerbsInd)){
        iChainedVerbs.add(sequence.get(i));
    }
    
    return iChainedVerbs;
}
 
Example 5
Source File: CoreNLPUtils.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Given a sequence of words and a pivot-word index, return the chained nouns from the left and from the right
 * of the pivot word.  
 * @param sequence: a sequence of words (list of IndexedWord)
 * @param wordInd: the index of the pivot word
 * @return a list of chained nouns to the left and the right of the pivot word (the pivot word is included)
 */
public static ObjectArrayList<IndexedWord> getChainedNouns(ObjectArrayList<IndexedWord> sequence, int wordInd){
    IntArrayList chainedNounsInd = new IntArrayList();
    
    // Get the chained nouns from left and right
    IntArrayList chainedNounsLeft = getChainedNounsFromLeft(sequence, chainedNounsInd.clone(), wordInd);
    IntArrayList chainedNounsRight = getChainedNounsFromRight(sequence, chainedNounsInd.clone(), wordInd);
    
    // Add all the words to the chained nouns
    chainedNounsInd.addAll(chainedNounsLeft);
    chainedNounsInd.add(wordInd);
    chainedNounsInd.addAll(chainedNounsRight);
    
    // IndexedWord chained nouns
    ObjectArrayList<IndexedWord> iChainedNouns = new ObjectArrayList<IndexedWord>();
    for (int i: FastUtil.sort(chainedNounsInd)){
        iChainedNouns.add(sequence.get(i));
    }
    
    return iChainedNouns;
}
 
Example 6
Source File: CoreNLPUtils.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Given a sequence of indexed words and a noun, get all the nouns 'chained' to the word from the right.
 * @param sequence: a list of words
 * @param wordInd: the word index from where the search starts 
 * @return a list of nouns which precede 'word'
 */
private static IntArrayList getChainedNounsFromRight(ObjectArrayList<IndexedWord> sequence, 
        IntArrayList chainedNouns, int wordInd){
    // If the word is the rightiest word or it's not a noun - return
    if (wordInd < sequence.size()-1 && isNoun(sequence.get(wordInd+1).tag())){
        chainedNouns.add(wordInd + 1);
        getChainedNounsFromRight(sequence, chainedNouns, wordInd + 1);
    }
    
    return chainedNouns;
}
 
Example 7
Source File: PLIBuilder.java    From winter with Apache License 2.0 5 votes vote down vote up
protected List<HashMap<String, IntArrayList>> calculateClusterMaps(RelationalInput relationalInput, int numAttributes) throws InputIterationException {
	List<HashMap<String, IntArrayList>> clusterMaps = new ArrayList<>();
	for (int i = 0; i < numAttributes; i++)
		clusterMaps.add(new HashMap<String, IntArrayList>());
	
	this.numRecords = 0;
	while (relationalInput.hasNext() && (this.inputRowLimit <= 0 || this.inputRowLimit != this.numRecords)) {
		List<String> record = relationalInput.next();
		
		int attributeId = 0;
		for (String value : record) {
			HashMap<String, IntArrayList> clusterMap = clusterMaps.get(attributeId);
			
			if (clusterMap.containsKey(value)) {
				clusterMap.get(value).add(this.numRecords);
			}
			else {
				IntArrayList newCluster = new IntArrayList();
				newCluster.add(this.numRecords);
				clusterMap.put(value, newCluster);
			}
			
			attributeId++;
		}
		this.numRecords++;
		if (this.numRecords == Integer.MAX_VALUE - 1)
			throw new RuntimeException("PLI encoding into integer based PLIs is not possible, because the number of records in the dataset exceeds Integer.MAX_VALUE. Use long based plis instead! (NumRecords = " + this.numRecords + " and Integer.MAX_VALUE = " + Integer.MAX_VALUE);
	}
	
	return clusterMaps;
}
 
Example 8
Source File: DeltaEncodedIntegerCollectionSerializer.java    From lsmtree with Apache License 2.0 5 votes vote down vote up
@Override
public Collection<Integer> read(final DataInput in) throws IOException {
    final int length = in.readInt();
    IntArrayList ret = new IntArrayList(length);
    int previous = 0;
    for (int i = 0; i < length; i++) {
        final int delta = VIntUtils.readVInt(in);
        final int id = previous+delta;
        previous = id;
        ret.add(id);
    }
    return ret;
}
 
Example 9
Source File: DimensionalConfigurationSchema.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
/**
 * add the aggregatorID into list if not existed
 *
 * @param aggIDList
 * @param aggregatorID
 */
protected void mergeAggregatorID(IntArrayList aggIDList, int aggregatorID)
{
  for (int index = 0; index < aggIDList.size(); ++index) {
    if (aggIDList.get(index) == aggregatorID) {
      return;
    }
  }
  aggIDList.add(aggregatorID);
}
 
Example 10
Source File: PLIBuilder.java    From winter with Apache License 2.0 5 votes vote down vote up
protected static List<HashMap<String, IntArrayList>> calculateClusterMapsStatic(ObjectArrayList<List<String>> records, int numAttributes) throws InputIterationException {
	List<HashMap<String, IntArrayList>> clusterMaps = new ArrayList<>();
	for (int i = 0; i < numAttributes; i++)
		clusterMaps.add(new HashMap<String, IntArrayList>());
	
	int recordId = 0;
	for (List<String> record : records) {
		int attributeId = 0;
		for (String value : record) {
			HashMap<String, IntArrayList> clusterMap = clusterMaps.get(attributeId);
			
			if (clusterMap.containsKey(value)) {
				clusterMap.get(value).add(recordId);
			}
			else {
				IntArrayList newCluster = new IntArrayList();
				newCluster.add(recordId);
				clusterMap.put(value, newCluster);
			}
			
			attributeId++;
		}
		recordId++;
	}
	
	return clusterMaps;
}
 
Example 11
Source File: PLIBuilder.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
protected List<HashMap<String, IntArrayList>> calculateClusterMaps(RelationalInput relationalInput, int numAttributes) throws InputIterationException {
	List<HashMap<String, IntArrayList>> clusterMaps = new ArrayList<>();
	for (int i = 0; i < numAttributes; i++)
		clusterMaps.add(new HashMap<String, IntArrayList>());
	
	this.numRecords = 0;
	while (relationalInput.hasNext() && (this.inputRowLimit <= 0 || this.inputRowLimit != this.numRecords)) {
		List<String> record = relationalInput.next();
		
		int attributeId = 0;
		for (String value : record) {
			HashMap<String, IntArrayList> clusterMap = clusterMaps.get(attributeId);
			
			if (clusterMap.containsKey(value)) {
				clusterMap.get(value).add(this.numRecords);
			}
			else {
				IntArrayList newCluster = new IntArrayList();
				newCluster.add(this.numRecords);
				clusterMap.put(value, newCluster);
			}
			
			attributeId++;
		}
		this.numRecords++;
		if (this.numRecords == Integer.MAX_VALUE - 1)
			throw new RuntimeException("PLI encoding into integer based PLIs is not possible, because the number of records in the dataset exceeds Integer.MAX_VALUE. Use long based plis instead! (NumRecords = " + this.numRecords + " and Integer.MAX_VALUE = " + Integer.MAX_VALUE);
	}
	
	return clusterMaps;
}
 
Example 12
Source File: PositionListIndex.java    From winter with Apache License 2.0 5 votes vote down vote up
protected IntArrayList buildClusterIdentifier(int recordId, int[][] lhsInvertedPlis) { 
	IntArrayList clusterIdentifier = new IntArrayList(lhsInvertedPlis.length);
	
	for (int attributeIndex = 0; attributeIndex < lhsInvertedPlis.length; attributeIndex++) {
		int clusterId = lhsInvertedPlis[attributeIndex][recordId];
		
		if (clusterId < 0)
			return null;
		
		clusterIdentifier.add(clusterId);
	}
	return clusterIdentifier;
}
 
Example 13
Source File: PLIBuilder.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
protected static List<HashMap<String, IntArrayList>> calculateClusterMapsStatic(ObjectArrayList<List<String>> records, int numAttributes) throws InputIterationException {
	List<HashMap<String, IntArrayList>> clusterMaps = new ArrayList<>();
	for (int i = 0; i < numAttributes; i++)
		clusterMaps.add(new HashMap<String, IntArrayList>());
	
	int recordId = 0;
	for (List<String> record : records) {
		int attributeId = 0;
		for (String value : record) {
			HashMap<String, IntArrayList> clusterMap = clusterMaps.get(attributeId);
			
			if (clusterMap.containsKey(value)) {
				clusterMap.get(value).add(recordId);
			}
			else {
				IntArrayList newCluster = new IntArrayList();
				newCluster.add(recordId);
				clusterMap.put(value, newCluster);
			}
			
			attributeId++;
		}
		recordId++;
	}
	
	return clusterMaps;
}
 
Example 14
Source File: CoreNLPUtils.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Given a list of indexed words and a semantic graph, return the root word of the word list. We assume that
 * all the words from the list can be found in the semantic graph sg, and the words in wordList are connected
 * within the semantic graph of the sentence - sg. If there are multiple words which have the shortest distance
 * to the sentence root, then choose the most-left verb. 
 * 
 * @param sg: sentence semantic graph
 * @param wordsList: list of words from which to choose "root" from
 * @return
 */
public static IndexedWord getVerbRootFromWordList(SemanticGraph sg, ObjectArrayList<IndexedWord> wordList){
    IndexedWord constituentRoot = null;
    IntArrayList shortestDirectedPathDistances = new IntArrayList();
    
    int minPathToRoot = Integer.MAX_VALUE;
    int pathToRoot = -1;
    
    for (int i = 0; i < wordList.size(); i++){
        // The words with index -2 are the ones that cannot be found in the semantic graph (synthetic words)
        // This happens in the relations (see in clausie.ClauseDetector.java), and those words are the head words
        if (wordList.get(i).index() == -2){
            return wordList.get(i);
        }
        pathToRoot = sg.getShortestDirectedPathNodes(sg.getFirstRoot(), wordList.get(i)).size();
        if (pathToRoot < minPathToRoot){
            minPathToRoot = pathToRoot;
        }
        shortestDirectedPathDistances.add(pathToRoot);
    }
    
    // If the shortest path is one element, return it, else, return the first verb containing that index
    if (FastUtil.countElement(minPathToRoot, shortestDirectedPathDistances) == 1)
        return wordList.get(shortestDirectedPathDistances.indexOf(minPathToRoot));
    else {
        for (int i = 0; i < shortestDirectedPathDistances.size(); i++){
            if (shortestDirectedPathDistances.getInt(i) == minPathToRoot){
                if (isVerb(wordList.get(i).tag())){
                    constituentRoot = wordList.get(i);
                    break;
                }
            }
        }
    }
    
    return constituentRoot;
}
 
Example 15
Source File: CoreNLPUtils.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Given a list of indexed words 'words', return an integer list of indices of the words
 * @param words: list of indexed words
 * @return list of indices of the words
 */
public static IntArrayList listOfWordsToIndexList(ObjectArrayList<IndexedWord> words){
    IntArrayList indices = new IntArrayList();
    for (IndexedWord word: words){
        indices.add(word.index());
    }
    return indices;
}
 
Example 16
Source File: PLIBuilder.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
protected static List<HashMap<String, IntArrayList>> calculateClusterMapsStatic(ObjectArrayList<List<String>> records, int numAttributes) throws InputIterationException {
	List<HashMap<String, IntArrayList>> clusterMaps = new ArrayList<>();
	for (int i = 0; i < numAttributes; i++)
		clusterMaps.add(new HashMap<String, IntArrayList>());
	
	int recordId = 0;
	for (List<String> record : records) {
		int attributeId = 0;
		for (String value : record) {
			HashMap<String, IntArrayList> clusterMap = clusterMaps.get(attributeId);
			
			if (clusterMap.containsKey(value)) {
				clusterMap.get(value).add(recordId);
			}
			else {
				IntArrayList newCluster = new IntArrayList();
				newCluster.add(recordId);
				clusterMap.put(value, newCluster);
			}
			
			attributeId++;
		}
		recordId++;
	}
	
	return clusterMaps;
}
 
Example 17
Source File: PLIBuilder.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
protected List<HashMap<String, IntArrayList>> calculateClusterMaps(RelationalInput relationalInput, int numAttributes) throws InputIterationException {
	List<HashMap<String, IntArrayList>> clusterMaps = new ArrayList<>();
	for (int i = 0; i < numAttributes; i++)
		clusterMaps.add(new HashMap<String, IntArrayList>());
	
	this.numRecords = 0;
	while (relationalInput.hasNext() && (this.inputRowLimit <= 0 || this.inputRowLimit != this.numRecords)) {
		List<String> record = relationalInput.next();
		
		int attributeId = 0;
		for (String value : record) {
			HashMap<String, IntArrayList> clusterMap = clusterMaps.get(attributeId);
			
			if (clusterMap.containsKey(value)) {
				clusterMap.get(value).add(this.numRecords);
			}
			else {
				IntArrayList newCluster = new IntArrayList();
				newCluster.add(this.numRecords);
				clusterMap.put(value, newCluster);
			}
			
			attributeId++;
		}
		this.numRecords++;
		if (this.numRecords == Integer.MAX_VALUE - 1)
			throw new RuntimeException("PLI encoding into integer based PLIs is not possible, because the number of records in the dataset exceeds Integer.MAX_VALUE. Use long based plis instead! (NumRecords = " + this.numRecords + " and Integer.MAX_VALUE = " + Integer.MAX_VALUE);
	}
	
	return clusterMaps;
}
 
Example 18
Source File: CoreNLPUtils.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Given a sequence of indexed words and a NER word, get all the NERs 'chained' to the word from the right (they all 
 * must have the same NER).
 * @param sequence: a list of words
 * @param wordInd: the word index from where the search starts (the pivot word)
 * @param ner: the NE type of the pivot word
 * @return a list of nouns which preced 'word'
 */
private static IntArrayList getChainedNERsFromRight(ObjectArrayList<IndexedWord> sequence, 
        IntArrayList chainedNERs, int wordInd, String ner){
    // If the word is the rightiest word or it's not a noun - return
    if (wordInd < sequence.size()-1 && sequence.get(wordInd+1).ner().equals(ner)){
        chainedNERs.add(wordInd + 1);
        getChainedNERsFromRight(sequence, chainedNERs, wordInd + 1, ner);
    }
    
    return chainedNERs;
}
 
Example 19
Source File: TestRowBlock.java    From presto with Apache License 2.0 5 votes vote down vote up
private IntArrayList generatePositionList(int numRows, int numPositions)
{
    IntArrayList positions = new IntArrayList(numPositions);
    for (int i = 0; i < numPositions; i++) {
        positions.add((7 * i + 3) % numRows);
    }
    Collections.sort(positions);
    return positions;
}
 
Example 20
Source File: TestDictionaryAwarePageFilter.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public SelectedPositions filter(ConnectorSession session, Page page)
{
    assertEquals(page.getChannelCount(), 1);
    Block block = page.getBlock(0);

    boolean sequential = true;
    IntArrayList selectedPositions = new IntArrayList();
    for (int position = 0; position < block.getPositionCount(); position++) {
        long value = block.getLong(position, 0);
        verifyPositive(value);

        boolean selected = isSelected(filterRange, value);
        if (selected) {
            if (sequential && !selectedPositions.isEmpty()) {
                sequential = (position == selectedPositions.getInt(selectedPositions.size() - 1) + 1);
            }
            selectedPositions.add(position);
        }
    }
    if (selectedPositions.isEmpty()) {
        return SelectedPositions.positionsRange(0, 0);
    }
    if (sequential) {
        return SelectedPositions.positionsRange(selectedPositions.getInt(0), selectedPositions.size());
    }
    // add 3 invalid elements to the head and tail
    for (int i = 0; i < 3; i++) {
        selectedPositions.add(0, -1);
        selectedPositions.add(-1);
    }

    // verify the input block is the expected type (this is to assure that
    // dictionary processing enabled and disabled as expected)
    // this check is performed last so that dictionary processing that fails
    // is not checked (only the fall back processing is checked)
    assertTrue(expectedType.isInstance(block));

    return SelectedPositions.positionsList(selectedPositions.elements(), 3, selectedPositions.size() - 6);
}