it.unimi.dsi.fastutil.ints.IntSet Java Examples

The following examples show how to use it.unimi.dsi.fastutil.ints.IntSet. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(NotEq<T> notEq) {
  T value = notEq.getValue();
  if (value == null) {
    return IndexIterator.filter(getPageCount(), pageIndex -> !nullPages[pageIndex]);
  }

  if (nullCounts == null) {
    // Nulls match so if we don't have null related statistics we have to return all pages
    return IndexIterator.all(getPageCount());
  }

  // Merging value filtering with pages containing nulls
  IntSet matchingIndexes = new IntOpenHashSet();
  getBoundaryOrder().notEq(createValueComparator(value))
      .forEachRemaining((int index) -> matchingIndexes.add(index));
  return IndexIterator.filter(getPageCount(),
      pageIndex -> nullCounts[pageIndex] > 0 || matchingIndexes.contains(pageIndex));
}
 
Example #2
Source File: MAPEvaluator.java    From jstarcraft-ai with Apache License 2.0 6 votes vote down vote up
@Override
protected float measure(IntSet checkCollection, IntList rankList) {
    if (rankList.size() > size) {
        rankList = rankList.subList(0, size);
    }
    int count = 0;
    float map = 0F;
    for (int index = 0; index < rankList.size(); index++) {
        int itemIndex = rankList.get(index);
        if (checkCollection.contains(itemIndex)) {
            count++;
            map += 1F * count / (index + 1);
        }
    }
    return map / (checkCollection.size() < rankList.size() ? checkCollection.size() : rankList.size());
}
 
Example #3
Source File: HiddenCategoriesWIDs.java    From tagme with Apache License 2.0 6 votes vote down vote up
@Override
protected IntSet parseSet() throws IOException {
	final IntSet set = new IntOpenHashSet();
	File input = WikipediaFiles.CAT_LINKS.getSourceFile(lang);

	SQLWikiParser parser = new SQLWikiParser(log) {
		@Override
		public boolean compute(ArrayList<String> values) throws IOException {
			if(cleanPageName(values.get(SQLWikiParser.CAT_TITLE)).equals("Hidden categories")){
				set.add(Integer.parseInt(values.get(SQLWikiParser.CATLINKS_ID_FROM)));
				return true;
			}else return false;
		}

	};
	InputStreamReader reader = new InputStreamReader(new FileInputStream(input), Charset.forName("UTF-8"));
	parser.compute(reader);
	reader.close();

	return set;
}
 
Example #4
Source File: TopicDocs.java    From tagme with Apache License 2.0 6 votes vote down vote up
@Override
protected int[] parseSet() throws IOException
{
	IntSet WIDs = new AllWIDs(lang).getDataset();
	int max_wid = 0;
	for(int wid: WIDs)
		if (wid > max_wid)
			max_wid = wid;
	
	IndexReader topics = Indexes.getReader(RepositoryDirs.TOPICS.getPath(lang));
	int max = topics.maxDoc();
	
	int[] map = new int[max_wid+1];
	for(int i=0;i<map.length; i++) map[i]=-1;
	
	PLogger plog = new PLogger(log, Step.MINUTE)
		.setEnd(max)
		.start();
	for(int i=0;i<max;i++) {
		map[Integer.parseInt(topics.document(i).get(TopicIndexer.FIELD_WID))] = i;
		plog.update();
	}
	plog.stop();
	
	return map;
}
 
Example #5
Source File: StandaloneExp.java    From data-polygamy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public Attribute createNewAttribute(HashMap<String, Attribute> attributes,
           String attribute, int noMonths) {
    Attribute a = attributes.get(attribute);
       int ct = 0;
       IntSet keys = a.data.keySet();
       IntIterator it = keys.iterator();
       ArrayList<SpatioTemporalVal> arr = new ArrayList<SpatioTemporalVal>();
       while(ct < noMonths) {
           if(!it.hasNext()) {
               Utilities.er("no. of months is greater than what is present");
           }
           int month = it.nextInt();
           arr.addAll(a.data.get(month));
           ct++;
       }
       Collections.sort(arr);
       Attribute na = new Attribute();
       na.data.put(0, arr);
       na.nodeSet = a.nodeSet;
       return na;
}
 
Example #6
Source File: PositionListIndex.java    From metanome-algorithms with Apache License 2.0 6 votes vote down vote up
@Override
public int hashCode() {
	final int prime = 31;
	int result = 1;

	List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters);

	Collections.sort(setCluster, new Comparator<IntSet>() {
		@Override
		public int compare(IntSet o1, IntSet o2) {
			return o1.hashCode() - o2.hashCode();
		}
	});
	result = prime * result + (setCluster.hashCode());
	return result;
}
 
Example #7
Source File: NoveltyEvaluator.java    From jstarcraft-ai with Apache License 2.0 6 votes vote down vote up
/**
 * Evaluate on the test set with the the list of recommended items.
 *
 * @param testMatrix      the given test set
 * @param recommendedList the list of recommended items
 * @return evaluate result
 */
@Override
protected float measure(IntSet checkCollection, IntList rankList) {
    if (rankList.size() > size) {
        rankList = rankList.subList(0, size);
    }
    float sum = 0F;
    for (int rank : rankList) {
        int count = itemCounts[rank];
        if (count > 0) {
            float probability = ((float) count) / numberOfUsers;
            float entropy = (float) -Math.log(probability);
            sum += entropy;
        }
    }
    return (float) (sum / Math.log(2F));
}
 
Example #8
Source File: DioriteRandomUtils.java    From Diorite with MIT License 6 votes vote down vote up
@Nullable
public static <T> T getWeightedRandomReversed(Random random, Int2ObjectMap<T> choices)
{
    long i = 0;
    IntSet ints = choices.keySet();
    for (IntIterator iterator = ints.iterator(); iterator.hasNext(); )
    {
        int x = iterator.nextInt();
        i += x;
    }
    i = getRandomLong(random, 0, i);
    for (Int2ObjectMap.Entry<T> entry : choices.int2ObjectEntrySet())
    {
        i -= entry.getIntKey();
        if (i < 0)
        {
            return entry.getValue();
        }
    }
    return null;
}
 
Example #9
Source File: AgreeSetGenerator.java    From metanome-algorithms with Apache License 2.0 6 votes vote down vote up
private boolean isSubset(IntList actuelList, Map<Integer, IntSet> index) {

        boolean first = true;
        IntSet positions = new IntArraySet();
        for (int e : actuelList) {
            if (!index.containsKey(Integer.valueOf(e))) {
                return false;
            }
            if (first) {
                positions.addAll(index.get(Integer.valueOf(e)));
                first = false;
            } else {

                this.intersect(positions, index.get(Integer.valueOf(e)));
                // FIXME: Throws UnsupportedOperationExeption within fastUtil
                // positions.retainAll(index.get(e));
            }
            if (positions.size() == 0) {
                return false;
            }
        }
        return true;
    }
 
Example #10
Source File: PositionListIndex.java    From metanome-algorithms with Apache License 2.0 6 votes vote down vote up
@Override
public int hashCode() {
	final int prime = 31;
	int result = 1;

	List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters);

	Collections.sort(setCluster, new Comparator<IntSet>() {
		@Override
		public int compare(IntSet o1, IntSet o2) {
			return o1.hashCode() - o2.hashCode();
		}
	});
	result = prime * result + (setCluster.hashCode());
	return result;
}
 
Example #11
Source File: ObjectSerDeUtils.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
public static ObjectType getObjectType(Object value) {
  if (value instanceof String) {
    return ObjectType.String;
  } else if (value instanceof Long) {
    return ObjectType.Long;
  } else if (value instanceof Double) {
    return ObjectType.Double;
  } else if (value instanceof DoubleArrayList) {
    return ObjectType.DoubleArrayList;
  } else if (value instanceof AvgPair) {
    return ObjectType.AvgPair;
  } else if (value instanceof MinMaxRangePair) {
    return ObjectType.MinMaxRangePair;
  } else if (value instanceof HyperLogLog) {
    return ObjectType.HyperLogLog;
  } else if (value instanceof QuantileDigest) {
    return ObjectType.QuantileDigest;
  } else if (value instanceof Map) {
    return ObjectType.Map;
  } else if (value instanceof IntSet) {
    return ObjectType.IntSet;
  } else if (value instanceof TDigest) {
    return ObjectType.TDigest;
  } else if (value instanceof DistinctTable) {
    return ObjectType.DistinctTable;
  } else if (value instanceof Sketch) {
    return ObjectType.DataSketch;
  } else {
    throw new IllegalArgumentException("Unsupported type of value: " + value.getClass().getSimpleName());
  }
}
 
Example #12
Source File: DateColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
@Override
public DateColumn unique() {
  IntSet ints = new IntOpenHashSet(data.size());
  for (int i = 0; i < size(); i++) {
    ints.add(data.getInt(i));
  }
  DateColumn copy = emptyCopy(ints.size());
  copy.setName(name() + " Unique values");
  copy.data = IntArrayList.wrap(ints.toIntArray());
  return copy;
}
 
Example #13
Source File: ThresholdMapBuilder.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
ThresholdMapBuilder add(int left, Iterable<To> similarities) {
	RowBuilder rowBuilder = RowBuilder.emptyRow();
	similarities.forEach(rowBuilder::process);
	Double2ObjectSortedMap<IntSet> thresholdRow = rowBuilder.getRow();
	map.put(left, thresholdRow);
	return this;
}
 
Example #14
Source File: SetSimilarity.java    From RankSys with Mozilla Public License 2.0 5 votes vote down vote up
@Override
public IntToDoubleFunction similarity(int idx1) {
    IntSet set = new IntOpenHashSet();
    data.getUidxPreferences(idx1).map(IdxPref::v1).forEach(set::add);

    return idx2 -> {
        int coo = (int) data.getUidxPreferences(idx2)
                .map(IdxPref::v1)
                .filter(set::contains)
                .count();

        return sim(coo, set.size(), data.numItems(idx2));
    };
}
 
Example #15
Source File: IntColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
@Override
public IntColumn unique() {
  final IntSet values = new IntOpenHashSet();
  for (int i = 0; i < size(); i++) {
    values.add(getInt(i));
  }
  final IntColumn column = IntColumn.create(name() + " Unique values");
  for (int value : values) {
    column.append(value);
  }
  return column;
}
 
Example #16
Source File: RecallEvaluator.java    From jstarcraft-ai with Apache License 2.0 5 votes vote down vote up
@Override
protected float measure(IntSet checkCollection, IntList rankList) {
    if (rankList.size() > size) {
        rankList = rankList.subList(0, size);
    }
    int count = 0;
    for (int itemIndex : rankList) {
        if (checkCollection.contains(itemIndex)) {
            count++;
        }
    }
    return count / (checkCollection.size() + 0F);
}
 
Example #17
Source File: PrecisionEvaluator.java    From jstarcraft-ai with Apache License 2.0 5 votes vote down vote up
@Override
protected float measure(IntSet checkCollection, IntList rankList) {
    if (rankList.size() > size) {
        rankList = rankList.subList(0, size);
    }
    int count = 0;
    for (int itemIndex : rankList) {
        if (checkCollection.contains(itemIndex)) {
            count++;
        }
    }
    return count / (size + 0F);
}
 
Example #18
Source File: FastUtilTypeSpecificBenchmarkUnitTest.java    From tutorials with MIT License 5 votes vote down vote up
@Benchmark
public IntSet givenFastUtilsIntSetWithInitialSizeSet_whenPopulated_checkTimeTaken() {
  IntSet intSet = new IntOpenHashSet(setSize);
  for(int i = 0; i < setSize; i++){
    intSet.add(i);
  }
  return intSet;
}
 
Example #19
Source File: RankingTask.java    From jstarcraft-rns with Apache License 2.0 5 votes vote down vote up
@Override
protected IntList recommend(Model recommender, int userIndex) {
    ReferenceModule trainModule = trainModules[userIndex];
    ReferenceModule testModule = testModules[userIndex];
    IntSet itemSet = new IntOpenHashSet();
    for (DataInstance instance : trainModule) {
        itemSet.add(instance.getQualityFeature(itemDimension));
    }
    // TODO 此处代码需要重构
    ArrayInstance copy = new ArrayInstance(trainMarker.getQualityOrder(), trainMarker.getQuantityOrder());
    copy.copyInstance(testModule.getInstance(0));
    copy.setQualityFeature(userDimension, userIndex);

    List<Integer2FloatKeyValue> rankList = new ArrayList<>(itemSize - itemSet.size());
    for (int itemIndex = 0; itemIndex < itemSize; itemIndex++) {
        if (itemSet.contains(itemIndex)) {
            continue;
        }
        copy.setQualityFeature(itemDimension, itemIndex);
        recommender.predict(copy);
        rankList.add(new Integer2FloatKeyValue(itemIndex, copy.getQuantityMark()));
    }
    Collections.sort(rankList, (left, right) -> {
        return Float.compare(right.getValue(), left.getValue());
    });

    IntList recommendList = new IntArrayList(rankList.size());
    for (Integer2FloatKeyValue keyValue : rankList) {
        recommendList.add(keyValue.getKey());
    }
    return recommendList;
}
 
Example #20
Source File: ArrayPositionListIndex.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
@Override
public PositionListIndex build() {
	IntSet[] array = new IntSet[maxValue + 1];
	Arrays.fill(array, IntSets.EMPTY_SET);
	map.forEach(set(array));
	return new ArrayPositionListIndex(array);
}
 
Example #21
Source File: DateColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
@Override
public int countUnique() {
  IntSet ints = new IntOpenHashSet(size());
  for (int i = 0; i < size(); i++) {
    ints.add(data.getInt(i));
  }
  return ints.size();
}
 
Example #22
Source File: NegativeSamplingExpander.java    From samantha with MIT License 5 votes vote down vote up
private IntList getSampledIndices(IntSet trues, int maxVal) {
    IntList samples = new IntArrayList();
    int num = trues.size();
    if (maxNumSample != null) {
        num = maxNumSample;
    }
    for (int i=0; i<num; i++) {
        int dice = new Random().nextInt(maxVal);
        if (!trues.contains(dice)) {
            samples.add(dice);
        }
    }
    return samples;
}
 
Example #23
Source File: CollectingThresholdMap.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
private IntCollection collect(Double2ObjectMap<IntSet> sortedMap, double max) {
	//this function seems to be the most crucial part of the code
	for (Entry<IntSet> entry : sortedMap.double2ObjectEntrySet()) {
		double threshold = entry.getDoubleKey();
		if (threshold < max) {
			// thresholds are sorted descending
			break;
		}
		IntSet value = entry.getValue();
		result.addAll(value);
	}
	return result;
}
 
Example #24
Source File: AccessTrace.java    From cache2k-benchmark with Apache License 2.0 5 votes vote down vote up
private void initStatistics() {
  IntSet _values = new IntOpenHashSet();
  for (int v : getArray()) {
    _values.add(v);
    if (v < lowValue) {
      lowValue = v;
    }
    if (v > highValue) {
      highValue = v;
    }
  }
  valueCount = _values.size();
}
 
Example #25
Source File: AgreeSetGenerator.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
private void handlePartition(IntList actuelList, int position, Int2ObjectMap<IntSet> index, Set<IntList> max) {

        if (!this.isSubset(actuelList, index)) {
            max.add(actuelList);
            for (int e : actuelList) {
                if (!index.containsKey(e)) {
                    index.put(e, new IntArraySet());
                }
                index.get(e).add(position);
            }
        }
    }
 
Example #26
Source File: AgreeSetGenerator.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
private HandlePartitionTask(IntList actuelList, int actuelIndex, Map<Integer, IntSet> index, Map<IntList, Object> max) {

            this.index = index;
            this.max = max;
            this.actuelList = actuelList;
            this.actuelIndex = actuelIndex;
        }
 
Example #27
Source File: AllWIDs.java    From tagme with Apache License 2.0 5 votes vote down vote up
@Override
protected IntSet parseSet() throws IOException {
	
	Object2IntMap<String> title2wid = new TitlesToWIDMap(lang).getDataset();
	
	IntOpenHashSet wids = new IntOpenHashSet(title2wid.size()*2);
	wids.addAll(title2wid.values());
	wids.trim();
	
	return wids;
}
 
Example #28
Source File: IntColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
@Override
public int countUnique() {
  IntSet uniqueElements = new IntOpenHashSet();
  for (int i = 0; i < size(); i++) {
    uniqueElements.add(getInt(i));
  }
  return uniqueElements.size();
}
 
Example #29
Source File: ValueInTransformFunction.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
private static int[] filterInts(IntSet intSet, int[] source) {
  IntList intList = new IntArrayList();
  for (int value : source) {
    if (intSet.contains(value)) {
      intList.add(value);
    }
  }
  if (intList.size() == source.length) {
    return source;
  } else {
    return intList.toIntArray();
  }
}
 
Example #30
Source File: FastFilters.java    From RankSys with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Item filter that discards items in the training preference data.
 *
 * @param <U> type of the users
 * @param <I> type of the items
 * @param trainData preference data
 * @return item filters for each using returning true if the
 * user-item pair was not observed in the preference data
 */
public static <U, I> Function<U, IntPredicate> notInTrain(FastPreferenceData<U, I> trainData) {
    return user -> {
        IntSet set = new IntOpenHashSet();
        trainData.getUidxPreferences(trainData.user2uidx(user))
                .mapToInt(IdxPref::v1)
                .forEach(set::add);

        return iidx -> !set.contains(iidx);
    };
}