it.unimi.dsi.fastutil.ints.IntOpenHashSet Java Examples

The following examples show how to use it.unimi.dsi.fastutil.ints.IntOpenHashSet. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SlimUDTF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
private void replayTrain(@Nonnull final ByteBuffer buf) {
    final int itemI = buf.getInt();
    final int knnSize = buf.getInt();

    final Int2ObjectMap<Int2FloatMap> knnItems = new Int2ObjectOpenHashMap<>(1024);
    final IntSet pairItems = new IntOpenHashSet();
    for (int i = 0; i < knnSize; i++) {
        int user = buf.getInt();
        int ruSize = buf.getInt();
        Int2FloatMap ru = new Int2FloatOpenHashMap(ruSize);
        ru.defaultReturnValue(0.f);

        for (int j = 0; j < ruSize; j++) {
            int itemK = buf.getInt();
            pairItems.add(itemK);
            float ruk = buf.getFloat();
            ru.put(itemK, ruk);
        }
        knnItems.put(user, ru);
    }

    for (int itemJ : pairItems) {
        train(itemI, knnItems, itemJ);
    }
}
 
Example #2
Source File: CallGraphGenerator.java    From fasten with Apache License 2.0 6 votes vote down vote up
/** Generate a random DAG using preferential attachment. First an independent set of <code>n0</code> nodes is generated.
 *  Then <code>n-n0</code> more nodes are generated: for each node, the outdegree is determined using <code>outdegreeDistribution.nextInt()</code>
 *  minimized with the number of existing nodes. For each arc, the target is the existing node <code>i</code> with probability proportional to
 *  <code>k+1</code> where <code>k</code> is <code>i</code>'s current outdegree.
 *
 * @param n number of nodes.
 * @param n0 number of initial nodes.
 * @param outdegreeDistribution distribution from which outdegrees are sampled.
 * @param random generator used to produce the arcs.
 * @return the generated DAG.
 */
public static ArrayListMutableGraph preferentialAttachmentDAG(final int n, final int n0, final IntegerDistribution outdegreeDistribution, final RandomGenerator random) {
	final ArrayListMutableGraph g = new ArrayListMutableGraph(n);
	final FenwickTree ft = new FenwickTree(n);
	// Initial independent set
	for (int source = 0; source < n0; source++) ft.incrementCount(source + 1);
	// Rest of the graph
	final IntOpenHashSet s = new IntOpenHashSet();
	for (int source = n0; source < n; source++) {
		final int m = Math.min(outdegreeDistribution.sample(), source - 1); // Outdegree
		s.clear();
		while(s.size() < m) {
			final int t = ft.sample(random);
			if (s.add(t)) {
				ft.incrementCount(t);
				g.addArc(source, t - 1);
			}
		}
		ft.incrementCount(source + 1);
	}
	return g;
}
 
Example #3
Source File: ColumnIndexBuilder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(NotEq<T> notEq) {
  T value = notEq.getValue();
  if (value == null) {
    return IndexIterator.filter(getPageCount(), pageIndex -> !nullPages[pageIndex]);
  }

  if (nullCounts == null) {
    // Nulls match so if we don't have null related statistics we have to return all pages
    return IndexIterator.all(getPageCount());
  }

  // Merging value filtering with pages containing nulls
  IntSet matchingIndexes = new IntOpenHashSet();
  getBoundaryOrder().notEq(createValueComparator(value))
      .forEachRemaining((int index) -> matchingIndexes.add(index));
  return IndexIterator.filter(getPageCount(),
      pageIndex -> nullCounts[pageIndex] > 0 || matchingIndexes.contains(pageIndex));
}
 
Example #4
Source File: ExpReplay.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public ArrayList<Transition<A>> getBatch(int size) {
    ArrayList<Transition<A>> batch = new ArrayList<>(size);
    int storageSize = storage.size();
    int actualBatchSize = Math.min(storageSize, size);

    int[] actualIndex = new int[actualBatchSize];
    IntSet set = new IntOpenHashSet();
    for( int i=0; i<actualBatchSize; i++ ){
        int next = rnd.nextInt(storageSize);
        while(set.contains(next)){
            next = rnd.nextInt(storageSize);
        }
        set.add(next);
        actualIndex[i] = next;
    }

    for (int i = 0; i < actualBatchSize; i ++) {
        Transition<A> trans = storage.get(actualIndex[i]);
        batch.add(trans.dup());
    }

    return batch;
}
 
Example #5
Source File: IntDictionaryMap.java    From tablesaw with Apache License 2.0 6 votes vote down vote up
@Override
public Selection selectIsIn(String... strings) {
  IntOpenHashSet keys = new IntOpenHashSet(strings.length);
  for (String string : strings) {
    int key = getKeyForValue(string);
    if (key != DEFAULT_RETURN_VALUE) {
      keys.add(key);
    }
  }

  Selection results = new BitmapBackedSelection();
  for (int i = 0; i < values.size(); i++) {
    if (keys.contains(values.getInt(i))) {
      results.add(i);
    }
  }
  return results;
}
 
Example #6
Source File: PositionListIndex.java    From metanome-algorithms with Apache License 2.0 6 votes vote down vote up
@Override
public int hashCode() {
	final int prime = 31;
	int result = 1;

	List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters);

	Collections.sort(setCluster, new Comparator<IntSet>() {
		@Override
		public int compare(IntSet o1, IntSet o2) {
			return o1.hashCode() - o2.hashCode();
		}
	});
	result = prime * result + (setCluster.hashCode());
	return result;
}
 
Example #7
Source File: NotInPredicateEvaluatorFactory.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary) {
  List<String> values = notInPredicate.getValues();
  _nonMatchingDictIdSet = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.size()));
  for (String value : values) {
    int dictId = dictionary.indexOf(value);
    if (dictId >= 0) {
      _nonMatchingDictIdSet.add(dictId);
    }
  }
  _numNonMatchingDictIds = _nonMatchingDictIdSet.size();
  if (_numNonMatchingDictIds == 0) {
    _alwaysTrue = true;
  } else if (dictionary.length() == _numNonMatchingDictIds) {
    _alwaysFalse = true;
  }
  _dictionary = dictionary;
}
 
Example #8
Source File: IntervalTest.java    From database with GNU General Public License v2.0 6 votes vote down vote up
public void testSubsets() {
	for( int i = 0; i < 10; i++ )
		for( int j = i - 1; j < 10; j++ ) {
			Interval interval = j < i ? EMPTY_INTERVAL : Interval.valueOf( i, j );
			IntSortedSet set = toSortedSet( interval );
			assertEquals( set, interval );
			assertTrue( Arrays.equals( IntIterators.unwrap( set.iterator() ), IntIterators.unwrap( set.iterator() ) ) );
			assertEquals( new IntOpenHashSet( set ), interval );
			for( int k = j - 1; k <= i + 1; k++ ) {
				assertTrue( Arrays.equals( IntIterators.unwrap( set.iterator( k ) ), IntIterators.unwrap( set.iterator( k ) ) ) );
				assertEquals( set.headSet( k ), interval.headSet( k ) );
				assertEquals( set.tailSet( k ), interval.tailSet( k ) );
				for( int l = k; l <= i + 1; l++ )
					assertEquals( set.subSet( k, l ), interval.subSet( k, l ) );
			}
		}
}
 
Example #9
Source File: ThresholdMapFlattenerTest.java    From metanome-algorithms with Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
	Collection<To> row1 = Arrays.asList(
		To.builder().similarity(0.5).records(new IntOpenHashSet(Arrays.asList(Integer.valueOf(1), Integer.valueOf(2)))).build(),
		To.builder().similarity(0.6).records(IntSets.singleton(3)).build());
	Collection<To> row2 = Collections.singletonList(
		To.builder().similarity(0.4).records(IntSets.singleton(1)).build());
	ThresholdMapFlattener flattener = createFlattener(3);
	ThresholdMap map = CollectingThresholdMap.builder()
		.add(1, row1)
		.add(2, row2)
		.build(flattener);
	assertThat(map.greaterOrEqual(1, 0.5)).hasSize(3);
	assertThat(map.greaterOrEqual(1, 0.5)).contains(Integer.valueOf(1), Integer.valueOf(2), Integer.valueOf(3));
	assertThat(map.greaterOrEqual(1, 0.6)).hasSize(1);
	assertThat(map.greaterOrEqual(1, 0.6)).contains(Integer.valueOf(3));
	assertThat(map.greaterOrEqual(1, 0.7)).isEmpty();
	assertThat(map.greaterOrEqual(2, 0.4)).hasSize(1);
	assertThat(map.greaterOrEqual(2, 0.4)).contains(Integer.valueOf(1));
}
 
Example #10
Source File: CollectingThresholdMapTest.java    From metanome-algorithms with Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
	Collection<To> row1 = Arrays.asList(
		To.builder().similarity(0.5).records(new IntOpenHashSet(Arrays.asList(Integer.valueOf(1), Integer.valueOf(2)))).build(),
		To.builder().similarity(0.6).records(IntSets.singleton(3)).build());
	Collection<To> row2 = Collections.singletonList(
		To.builder().similarity(0.4).records(IntSets.singleton(1)).build());
	ThresholdMap map = CollectingThresholdMap.builder()
		.add(1, row1)
		.add(2, row2)
		.build();
	assertThat(map.greaterOrEqual(1, 0.5)).hasSize(3);
	assertThat(map.greaterOrEqual(1, 0.5)).contains(Integer.valueOf(1), Integer.valueOf(2), Integer.valueOf(3));
	assertThat(map.greaterOrEqual(1, 0.6)).hasSize(1);
	assertThat(map.greaterOrEqual(1, 0.6)).contains(Integer.valueOf(3));
	assertThat(map.greaterOrEqual(1, 0.7)).isEmpty();
	assertThat(map.greaterOrEqual(2, 0.4)).hasSize(1);
	assertThat(map.greaterOrEqual(2, 0.4)).contains(Integer.valueOf(1));
}
 
Example #11
Source File: InPredicateEvaluatorFactory.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
DictionaryBasedInPredicateEvaluator(InPredicate inPredicate, Dictionary dictionary) {
  List<String> values = inPredicate.getValues();
  _matchingDictIdSet = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.size()));
  for (String value : values) {
    int dictId = dictionary.indexOf(value);
    if (dictId >= 0) {
      _matchingDictIdSet.add(dictId);
    }
  }
  _numMatchingDictIds = _matchingDictIdSet.size();
  if (_numMatchingDictIds == 0) {
    _alwaysFalse = true;
  } else if (dictionary.length() == _numMatchingDictIds) {
    _alwaysTrue = true;
  }
}
 
Example #12
Source File: OneClassPreferenceFMData.java    From RankSys with Mozilla Public License 2.0 6 votes vote down vote up
@Override
public Stream<? extends FMInstance> stream() {
    return uidxs.stream()
            .flatMap(uidx -> {
                IntSet uidxIidxs = new IntOpenHashSet();
                prefs.getUidxIidxs(uidx).forEachRemaining(uidxIidxs::add);

                List<FMInstance> instances = new ArrayList<>();

                // adding positive examples
                uidxIidxs
                        .forEach(iidx -> instances.add(getInstance(uidx, iidx, 1.0)));

                // adding negative examples
                rnd.ints(iidxs.size(), 0, iidxs.size()).map(iidxs::getInt)
                        .filter(jidx -> !uidxIidxs.contains(jidx))
                        .distinct()
                        .limit((int) (negativeProp * uidxIidxs.size()))
                        .forEach(jidx -> instances.add(getInstance(uidx, jidx, 0.0)));

                Collections.shuffle(instances);

                return instances.stream();
            });
}
 
Example #13
Source File: NbestListUtils.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Baseline implementation. Augments the "standard" list with alternatives.
 * 
 * @param l1
 * @param l2
 * @return
 */
public static <TK,FV> List<RichTranslation<TK,FV>> mergeAndDedup(List<RichTranslation<TK,FV>> standard,
    List<RichTranslation<TK,FV>> alt, int maxAltItems) {
  
  IntSet hashCodeSet = new IntOpenHashSet(standard.size());
  for (RichTranslation<TK,FV> s : standard) {
    hashCodeSet.add(derivationHashCode(s.getFeaturizable().derivation));
  }
  
  List<RichTranslation<TK,FV>> returnList = new ArrayList<>(standard);
  for (int i = 0, sz = Math.min(maxAltItems, alt.size()); i < sz; ++i) {
    RichTranslation<TK,FV> t = alt.get(i);
    int hashCode = derivationHashCode(t.getFeaturizable().derivation);
    if (! hashCodeSet.contains(hashCode)) returnList.add(t);
  }
  Collections.sort(returnList);
  
  return returnList;
}
 
Example #14
Source File: GraphFunctions.java    From data-polygamy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public GraphFunctions(int[][] edges2D, int noNodes) {
 try {
        nv = noNodes;
        nodes = new IntOpenHashSet[nv];
        
        for(int i = 0;i < nv;i ++) {
            nodes[i] = new IntOpenHashSet();
        }
        for(int i = 0; i < edges2D.length; i++) {
            int v1 = edges2D[i][0];
            int v2 = edges2D[i][1];
            nodes[v1].add(v2);
            nodes[v2].add(v1);
        }
    } catch(Exception e) {
        e.printStackTrace();
    }
}
 
Example #15
Source File: IntDictionaryMap.java    From tablesaw with Apache License 2.0 6 votes vote down vote up
@Override
public Selection selectIsIn(Collection<String> strings) {
  IntOpenHashSet keys = new IntOpenHashSet(strings.size());
  for (String string : strings) {
    int key = getKeyForValue(string);
    if (key != DEFAULT_RETURN_VALUE) {
      keys.add(key);
    }
  }

  Selection results = new BitmapBackedSelection();
  for (int i = 0; i < values.size(); i++) {
    if (keys.contains(values.getInt(i))) {
      results.add(i);
    }
  }
  return results;
}
 
Example #16
Source File: ObjectSerDeUtilsTest.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
@Test
public void testIntSet() {
  for (int i = 0; i < NUM_ITERATIONS; i++) {
    int size = RANDOM.nextInt(100);
    IntSet expected = new IntOpenHashSet(size);
    for (int j = 0; j < size; j++) {
      expected.add(RANDOM.nextInt());
    }

    byte[] bytes = ObjectSerDeUtils.serialize(expected);
    IntSet actual = ObjectSerDeUtils.deserialize(bytes, ObjectSerDeUtils.ObjectType.IntSet);

    // NOTE: use Object comparison instead of Collection comparison because the order might be different
    assertEquals((Object) actual, expected, ERROR_MESSAGE);
  }
}
 
Example #17
Source File: PositionListIndex.java    From metanome-algorithms with Apache License 2.0 6 votes vote down vote up
@Override
public int hashCode() {
	final int prime = 31;
	int result = 1;

	List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters);

	Collections.sort(setCluster, new Comparator<IntSet>() {
		@Override
		public int compare(IntSet o1, IntSet o2) {
			return o1.hashCode() - o2.hashCode();
		}
	});
	result = prime * result + (setCluster.hashCode());
	return result;
}
 
Example #18
Source File: PositionListIndex.java    From metanome-algorithms with Apache License 2.0 6 votes vote down vote up
@Override
public int hashCode() {
	final int prime = 31;
	int result = 1;

	List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters);

	Collections.sort(setCluster, new Comparator<IntSet>() {
		@Override
		public int compare(IntSet o1, IntSet o2) {
			return o1.hashCode() - o2.hashCode();
		}
	});
	result = prime * result + (setCluster.hashCode());
	return result;
}
 
Example #19
Source File: IntDictionaryMap.java    From tablesaw with Apache License 2.0 6 votes vote down vote up
@Override
public Selection selectIsIn(String... strings) {
  IntOpenHashSet keys = new IntOpenHashSet(strings.length);
  for (String string : strings) {
    int key = getKeyForValue(string);
    if (key != DEFAULT_RETURN_VALUE) {
      keys.add(key);
    }
  }

  Selection results = new BitmapBackedSelection();
  for (int i = 0; i < values.size(); i++) {
    if (keys.contains(values.getInt(i))) {
      results.add(i);
    }
  }
  return results;
}
 
Example #20
Source File: IntColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
@Override
public int countUnique() {
  IntSet uniqueElements = new IntOpenHashSet();
  for (int i = 0; i < size(); i++) {
    uniqueElements.add(getInt(i));
  }
  return uniqueElements.size();
}
 
Example #21
Source File: FastFilters.java    From RankSys with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Item filter that discards items in the training preference data.
 *
 * @param <U> type of the users
 * @param <I> type of the items
 * @param trainData preference data
 * @return item filters for each using returning true if the
 * user-item pair was not observed in the preference data
 */
public static <U, I> Function<U, IntPredicate> notInTrain(FastPreferenceData<U, I> trainData) {
    return user -> {
        IntSet set = new IntOpenHashSet();
        trainData.getUidxPreferences(trainData.user2uidx(user))
                .mapToInt(IdxPref::v1)
                .forEach(set::add);

        return iidx -> !set.contains(iidx);
    };
}
 
Example #22
Source File: IntColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
@Override
public IntColumn unique() {
  final IntSet values = new IntOpenHashSet();
  for (int i = 0; i < size(); i++) {
    values.add(getInt(i));
  }
  final IntColumn column = IntColumn.create(name() + " Unique values");
  for (int value : values) {
    column.append(value);
  }
  return column;
}
 
Example #23
Source File: DatasetWikiIdExporter.java    From gerbil with GNU Affero General Public License v3.0 5 votes vote down vote up
private IntOpenHashSet analyzeAsD2W(DatasetConfiguration config) throws GerbilException {
    C2WDataset dataset = (C2WDataset) config.getDataset(ExperimentType.C2KB);
    if (dataset == null) {
        return null;
    }
    List<HashSet<Tag>> goldStandard = dataset.getC2WGoldStandardList();
    IntOpenHashSet ids = new IntOpenHashSet();
    for (HashSet<Tag> tags : goldStandard) {
        for (Tag tag : tags) {
            ids.add(tag.getConcept());
        }
    }
    return ids;
}
 
Example #24
Source File: NotInPredicateEvaluatorFactory.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
IntRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate) {
  List<String> values = notInPredicate.getValues();
  _nonMatchingValues = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.size()));
  for (String value : values) {
    _nonMatchingValues.add(Integer.parseInt(value));
  }
}
 
Example #25
Source File: AccessTrace.java    From cache2k-benchmark with Apache License 2.0 5 votes vote down vote up
private void initStatistics() {
  IntSet _values = new IntOpenHashSet();
  for (int v : getArray()) {
    _values.add(v);
    if (v < lowValue) {
      lowValue = v;
    }
    if (v > highValue) {
      highValue = v;
    }
  }
  valueCount = _values.size();
}
 
Example #26
Source File: TimeColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
@Override
public TimeColumn unique() {
  IntSet ints = new IntOpenHashSet(data);
  TimeColumn column = emptyCopy(ints.size());
  column.data = IntArrayList.wrap(ints.toIntArray());
  column.setName(name() + " Unique values");
  return column;
}
 
Example #27
Source File: DistinctCountAggregationFunction.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Override
public IntOpenHashSet extractAggregationResult(AggregationResultHolder aggregationResultHolder) {
  IntOpenHashSet valueSet = aggregationResultHolder.getResult();
  if (valueSet == null) {
    return new IntOpenHashSet();
  } else {
    return valueSet;
  }
}
 
Example #28
Source File: IgnoreWIDs.java    From tagme with Apache License 2.0 5 votes vote down vote up
@Override
protected IntSet parseSet() throws IOException
{
	log.info("Loading data...");
	Object2IntMap<String> titles = new TitlesToWIDMap(lang).getDataset();
	IntOpenHashSet ids = new IntOpenHashSet(titles.size());
	
	Pattern p_date = WikiPatterns.getPattern(lang, Type.PAGE_DATE);
	Pattern p_other = WikiPatterns.getPattern(lang, Type.PAGE_IGNORE);
	
	PLogger plog = new PLogger(log,"titles","dates","others").setEnd(0, titles.size()).start("Parsing ignore-pages...");
	for(String title : titles.keySet())
	{
		plog.update(0);
		if (p_date.matcher(title).find()) {
			plog.update(1);
			ids.add(titles.get(title));
		}
		else if (p_other.matcher(title).find()) {
			plog.update(2);
			ids.add(titles.get(title));
		}
	}
	plog.stop();
	
	ids.trim();
	return ids;
}
 
Example #29
Source File: PageToCategoryIDs.java    From tagme with Apache License 2.0 5 votes vote down vote up
@Override
protected int[][] parseSet() throws IOException {
	final Int2ObjectMap<IntSet> map = new Int2ObjectOpenHashMap<IntSet>(3000000);
	final IntSet hidden= DatasetLoader.get(new HiddenCategoriesWIDs(lang));
	File input = WikipediaFiles.CAT_LINKS.getSourceFile(lang);
	final Object2IntMap<String> categories=DatasetLoader.get(new CategoriesToWIDMap(lang));
	
	SQLWikiParser parser = new SQLWikiParser(log) {
		@Override
		public boolean compute(ArrayList<String> values) throws IOException {
			String c_title=cleanPageName(values.get(SQLWikiParser.CATLINKS_TITLE_TO));
			int id=Integer.parseInt(values.get(SQLWikiParser.CATLINKS_ID_FROM));
			if(categories.containsKey(c_title) && !hidden.contains(categories.get(c_title).intValue())){
				if(map.containsKey(id)){
					map.get(id).add(categories.get(c_title).intValue());
				}else{
					IntSet set = new IntOpenHashSet();
					set.add(categories.get(c_title).intValue());
					map.put(id, set);
				}
				return true;
			} else return false;
		}
		
	};
	InputStreamReader reader = new InputStreamReader(new FileInputStream(input), Charset.forName("UTF-8"));
	parser.compute(reader);
	reader.close();
	return createDump(map);
}
 
Example #30
Source File: DatasetWikiIdExporter.java    From gerbil with GNU Affero General Public License v3.0 5 votes vote down vote up
public void analyzeDataset(DatasetConfiguration config, PrintStream output) throws GerbilException {
    IntOpenHashSet ids = analyzeAsD2W(config);
    if (ids == null) {
        ids = analyzeAsC2W(config);
    }
    printIds(ids, output);
}