Java Code Examples for gnu.trove.map.hash.TIntObjectHashMap#iterator()

The following examples show how to use gnu.trove.map.hash.TIntObjectHashMap#iterator() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: EntityLookup.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
/**
 * Filters the entity candidates against the given list of types
 *
 * @param entities Entities to filter'
 * @param filteringTypes Set of types to filter the entities against
 * @return filtered entities
 */
private Entities filterEntitiesByType(Entities entities, Set<Type> filteringTypes) throws EntityLinkingDataAccessException {
  if (filteringTypes == null) {
    return entities;
  }
  Entities filteredEntities = new Entities();
  TIntObjectHashMap<Set<Type>> entitiesTypes = DataAccess.getTypes(entities);
  for (TIntObjectIterator<Set<Type>> itr = entitiesTypes.iterator(); itr.hasNext(); ) {
    itr.advance();
    int id = itr.key();
    Set<Type> entityTypes = itr.value();
    for (Type t : entityTypes) {
      if (filteringTypes.contains(t)) {
        filteredEntities.add(entities.getEntityById(id));
        break;
      }
    }
  }
  return filteredEntities;
}
 
Example 2
Source File: InlinkOverlapEntityEntitySimilarity.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
private void setupEntities(Entities entities) throws Exception {
  if (entities.size() == 0) {
    logger.debug("Skipping initialization of InlinkEntityEntitySimilarity for " + entities.size() + " entities");
    return;
  }

  logger.debug("Initializing InlinkEntityEntitySimilarity for " + entities.size() + " entities");

  entity2vector = new TIntObjectHashMap<>();

  TIntObjectHashMap<int[]> entityInlinks = DataAccess.getInlinkNeighbors(entities);

  for (TIntObjectIterator<int[]> itr = entityInlinks.iterator(); itr.hasNext(); ) {
    itr.advance();
    int entity = itr.key();
    int[] inLinks = itr.value();

    RoaringBitmap bs = new RoaringBitmap();
    for (int l : inLinks) {
      bs.add(l);
    }
    entity2vector.put(entity, bs);
  }

  logger.debug("Done initializing InlinkEntityEntitySimilarity");
}
 
Example 3
Source File: MilneWittenEntityEntitySimilarity.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
private void setupEntities(Entities entities) throws Exception {
  logger.debug("Initializing MilneWittenEntityEntitySimilarity for " + entities.size() + " entities");

  collectionSize = DataAccess.getCollectionSize();

  TIntObjectHashMap<int[]> entityInlinks = DataAccess.getInlinkNeighbors(entities);

  // inlinks are assumed to be pre-sorted.
  entity2vector = new TIntObjectHashMap<>();

  for (TIntObjectIterator<int[]> itr = entityInlinks.iterator(); itr.hasNext(); ) {
    itr.advance();
    int entity = itr.key();
    int[] inLinks = itr.value();

    RoaringBitmap bs = new RoaringBitmap();
    for (int l : inLinks) {
      bs.add(l);
    }
    entity2vector.put(entity, bs);
  }

  logger.debug("Done initializing MilneWittenEntityEntitySimilarity for " + entities.size() + " entities");
}
 
Example 4
Source File: YagoEntityKeyphraseCooccurrenceDataProviderIterator.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public YagoEntityKeyphraseCooccurrenceDataProviderIterator(TIntObjectHashMap<TIntIntHashMap> superdocKeyphraseCounts) {
  entitiesIterator = superdocKeyphraseCounts.iterator();
  if (entitiesIterator.hasNext()) {
    entitiesIterator.advance();
    currentEntityKeyphrasesIterator = entitiesIterator.value().iterator();
  } else {
    currentEntityKeyphrasesIterator = null;
  }

}
 
Example 5
Source File: DataAccess.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public static Entities getAidaEntitiesForInternalIds(int[] internalIds) throws EntityLinkingDataAccessException {
  TIntObjectHashMap<KBIdentifiedEntity> kbEntities = DataAccess.getKnowlegebaseEntitiesForInternalIds(internalIds);
  Entities entities = new Entities();
  for (TIntObjectIterator<KBIdentifiedEntity> itr = kbEntities.iterator(); itr.hasNext(); ) {
    itr.advance();
    entities.add(new Entity(itr.value(), itr.key()));
  }
  return entities;
}
 
Example 6
Source File: DataAccess.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
/**
 * Computes all entity occurrence probabilities based on their incoming links.
 *
 * @return Map of Entity->Probability.
 * @throws EntityLinkingDataAccessException
 */
public static TIntDoubleHashMap getAllEntityProbabilities() throws EntityLinkingDataAccessException {
  TIntObjectHashMap<int[]> entityInlinks = getAllInlinks();

  TIntDoubleHashMap entityProbabilities = new TIntDoubleHashMap(entityInlinks.size(), 0.5f);

  // Get the total number of links.
  long totalLinkCount = 0;

  TIntObjectIterator<int[]> itr = entityInlinks.iterator();

  while (itr.hasNext()) {
    itr.advance();
    totalLinkCount += itr.value().length;
  }

  // Derive probabilities from counts.
  itr = entityInlinks.iterator();

  while (itr.hasNext()) {
    itr.advance();
    double probability = (double) itr.value().length / (double) totalLinkCount;
    entityProbabilities.put(itr.key(), probability);
  }

  return entityProbabilities;
}
 
Example 7
Source File: DataAccessSQLCache.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private synchronized void addToEntityKeyphrasesCache(String querySignature, TIntObjectHashMap<List<EntityKeyphraseData>> entityKeyphrases) {
  CachingHashMap<Integer, List<EntityKeyphraseData>> queryCache = entityKeyphrasesCaches.get(querySignature);
  if (queryCache == null) {
    int maxEntities = EntityLinkingConfig.getAsInt(EntityLinkingConfig.ENTITIES_CACHE_SIZE);
    queryCache = new CachingHashMap<Integer, List<EntityKeyphraseData>>(maxEntities);
    entityKeyphrasesCaches.put(querySignature, queryCache);
  }

  for (TIntObjectIterator<List<EntityKeyphraseData>> itr = entityKeyphrases.iterator(); itr.hasNext(); ) {
    itr.advance();
    int entityId = itr.key();
    List<EntityKeyphraseData> keyphrases = itr.value();
    queryCache.put(entityId, keyphrases);
  }
}
 
Example 8
Source File: DataAccessSQLCache.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private synchronized void addToEntityKeywordsCache(String querySignature, TIntObjectHashMap<List<EntityKeywordsData>> entityKeywords) {
  CachingHashMap<Integer, List<EntityKeywordsData>> queryCache = entityKeywordsCaches.get(querySignature);
  if (queryCache == null) {
    int maxEntities = EntityLinkingConfig.getAsInt(EntityLinkingConfig.ENTITIES_CACHE_SIZE);
    queryCache = new CachingHashMap<Integer, List<EntityKeywordsData>>(maxEntities);
    entityKeywordsCaches.put(querySignature, queryCache);
  }

  for (TIntObjectIterator<List<EntityKeywordsData>> itr = entityKeywords.iterator(); itr.hasNext(); ) {
    itr.advance();
    int entityId = itr.key();
    List<EntityKeywordsData> keyphrases = itr.value();
    queryCache.put(entityId, keyphrases);
  }
}
 
Example 9
Source File: DataAccessKeyValueStore.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Override //TODO: language not used
public Map<String, int[]> getCategoryIdsForMentions(Set<String> mentions, Language language, boolean isNamedEntity) throws EntityLinkingDataAccessException {
  Map<String, Entities> entities = getEntitiesForMentions(mentions, 1.0, 0, isNamedEntity);

  Map<String, Set<Integer>> mentionCategories = new HashMap<>(entities.size());

  for (Entry<String, Entities> entry:entities.entrySet()) {
    Set<Integer> types = mentionCategories.get(entry.getKey());
    if (types == null) {
      types = new HashSet<>();
    }
    List<Integer> entityIds = new ArrayList<>();
    for (Entity entity:entry.getValue()) {
      entityIds.add(entity.getId());
    }
    TIntObjectHashMap<int[]> categories = getCategoryIdsForEntitiesIds(ArrayUtils.toPrimitive(entityIds.toArray(new Integer[0])));
    TIntObjectIterator<int[]> it = categories.iterator();
    while (it.hasNext()) {
      types.addAll(Arrays.asList(ArrayUtils.toObject(it.value())));
    }
  }

  Map<String, int[]> ret = new HashMap<>();
  for (String key:mentionCategories.keySet()) {
    Integer[] temp = mentionCategories.get(key).toArray(new Integer[0]);
    ret.put(key, ArrayUtils.toPrimitive(temp));
  }

  return ret;
}
 
Example 10
Source File: WikiCorpusTask.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private static Map<Integer, Set<Integer>> computeEntityOutlinks() throws EntityLinkingDataAccessException, IOException, SQLException {
	Map<Integer, Set<Integer>> inLinkOutLinkMap = new HashMap<>();
	String file_name = EntityLinkingManager.getAidaDbIdentifierLight() + "_" + ENTITY_OUTLINK_CACHE_JSON;
	if (Files.exists(Paths.get(file_name))) {
		logger.info("Loading " + file_name + " from cache");
		Map<Integer, Set<Integer>> cache = new GsonBuilder().enableComplexMapKeySerialization().create()
				.fromJson(new JsonReader(new FileReader(file_name)),
						new TypeToken<Map<Integer, Set<Integer>>>() {
						}.getType());

		if (cache != null) {
			inLinkOutLinkMap.putAll(cache);
			return inLinkOutLinkMap;
		}
	}
	logger.info("Computing " + ENTITY_OUTLINK_CACHE_JSON);
	TIntObjectHashMap<int[]> inlinkNeighbors = DataAccess.getInlinkNeighbors(DataAccess.getAllEntities());
	TIntObjectIterator<int[]> iterator = inlinkNeighbors.iterator();

	while (iterator.hasNext()) {
		iterator.advance();
		int outEntity = iterator.key();
		int[] inEntities = iterator.value();
		for (int inEntity : inEntities) {
			if (!inLinkOutLinkMap.containsKey(inEntity)) {
				inLinkOutLinkMap.put(inEntity, new HashSet<>());
			}
			Set<Integer> outLinks = inLinkOutLinkMap.get(inEntity);
			outLinks.add(outEntity);

		}
	}

	if (!inLinkOutLinkMap.isEmpty()) {
		Gson gson = new Gson();
		Files.write(Paths.get(file_name), gson.toJson(inLinkOutLinkMap).getBytes());
	}

	return inLinkOutLinkMap;
}
 
Example 11
Source File: DataAccessCassandraIntegrationTest.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Test public void testGetTypes() throws EntityLinkingDataAccessException {
Entities entities = DataAccess.getEntitiesForMention("Merkel", 1.0, 0, true);
  TIntObjectHashMap<Set<Type>> types = DataAccess.getTypes(entities);
  Type politician = new Type("YAGO3", "<wordnet_politician_110450303>");

  Set<Type> allTypes = new HashSet<>();
  for (TIntObjectIterator<Set<Type>> itr = types.iterator(); itr.hasNext(); ) {
    itr.advance();
    allTypes.addAll(itr.value());
  }

  assertTrue(allTypes.contains(politician));
}
 
Example 12
Source File: WordCluster.java    From fnlp with GNU Lesser General Public License v3.0 5 votes vote down vote up
public String toString(){
	StringBuilder sb = new StringBuilder();

	TIntObjectHashMap<TLinkedHashSet<String>> sets = new TIntObjectHashMap<TLinkedHashSet<String>>();

	for(int i=0;i<alpahbet.size();i++){
		int head = getHead(i);
		TLinkedHashSet<String> s = sets.get(head);
		if(s==null){
			s = new TLinkedHashSet();
			sets.put(head, s);
		}
		s.add(alpahbet.lookupString(i));
	}

	TIntObjectIterator<TLinkedHashSet<String>> it = sets.iterator();
	while(it.hasNext()){
		it.advance();
		if(it.value().size()<2)
			continue;
		sb.append(wordProb.get(it.key()));
		sb.append(" ");
		TObjectHashIterator<String> itt = it.value().iterator();
		while(itt.hasNext()){
			String ss = itt.next();
			sb.append(ss);
			sb.append(" ");
		}
		sb.append("\n");
	}

	return sb.toString();

}
 
Example 13
Source File: WikiCorpusTask.java    From ambiverse-nlu with Apache License 2.0 4 votes vote down vote up
private void computeNonprocessedEntityMentionLabels(Set<Entity> processingEntities,
														HashMap<Integer, Map<String, MentionObject>> entityMentionLabelsMap)
			throws EntityLinkingDataAccessException, InterruptedException {
		TIntObjectHashMap<int[]> typesIdsForEntitiesIds =
				DataAccess.getTypesIdsForEntitiesIds(processingEntities
						.stream()
						.mapToInt(Entity::getId)
						.toArray());

		TIntObjectHashMap<List<MentionObject>> mentionsForEntities = DataAccess.getMentionsForEntities(new Entities(processingEntities));

		TIntObjectIterator<List<MentionObject>> entityMentionsIterator = mentionsForEntities.iterator();
		while (entityMentionsIterator.hasNext()) {
			Map<String, MentionObject> entityResult = new HashMap<>();
			entityMentionsIterator.advance();

			int eid = entityMentionsIterator.key();

			int[] typeIDs = typesIdsForEntitiesIds.get(eid);
			NerType.Label nerTypeForTypeIds = NerType.getNerTypeForTypeIds(typeIDs);

			for (MentionObject mentionObject : entityMentionsIterator.value()) {
				if (Thread.interrupted()) {
					throw new InterruptedException();
				}
				String entityMention = mentionObject.getMention();
				NerType.Label nerTypeForTypeIds_ = nerTypeForTypeIds;

//              getting rid of the junk from the aida database
				if (stopwords.contains(entityMention.toLowerCase()) ||
						entityMention.contains("<SPAN") ||
						entityMention.contains("=") ||
						entityMention.contains("<!--") ||
						entityMention.contains("(") && entityMention.contains(")") ||
						isDate(entityMention.trim()) ||
						entityMention.matches("[.,\\/#!$%\\^&\\*;:{}=\\-_`~()]") ||
						entityMention.endsWith("'S")
						) {
					continue;
				}

//                i.e. United States is a location, not an organization
				if (knownCountries.contains(entityMention.toLowerCase())) {
					nerTypeForTypeIds_ = NerType.Label.LOCATION;
				}
				if (languagesList.contains(entityMention.toLowerCase())) {
					nerTypeForTypeIds_ = NerType.Label.MISC;
				}
				MentionObject copy = mentionObject.copy();
				copy.setLabel(nerTypeForTypeIds_);
				entityResult.put(entityMention, copy);
			}
			entityMentionLabelsMapCache.put(eid, entityResult);
			entityMentionLabelsMap.put(eid, entityResult);
		}
	}