// Copyright (c) Committed Software 2018, [email protected]
package uk.gov.dstl.baleen.entity.linking.collector;

import static java.util.stream.Collectors.toList;

import java.util.*;
import java.util.stream.Collectors;

import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;

import com.google.common.collect.Multimap;

import uk.gov.dstl.baleen.entity.linking.EntityInformation;
import uk.gov.dstl.baleen.entity.linking.InformationCollector;
import uk.gov.dstl.baleen.types.language.Sentence;
import uk.gov.dstl.baleen.types.language.WordToken;
import uk.gov.dstl.baleen.types.semantic.Entity;
import uk.gov.dstl.baleen.types.semantic.ReferenceTarget;
import uk.gov.dstl.baleen.uima.utils.ReferentUtils;

/**
 * Collects basic information about the entity from the JCas and restrict interesting mentions to
 * proper nouns.
 *
 * <p>This requires part of speech tagging to be applied
 */
public class ProperNounInformationCollector implements InformationCollector {

  @Override
  public <T extends Entity> Set<EntityInformation<T>> getEntityInformation(
      JCas jCas, Class<T> clazz) {
    Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz);
    Map<T, List<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class);
    Map<T, List<WordToken>> tokens = JCasUtil.indexCovered(jCas, clazz, WordToken.class);

    Set<EntityInformation<T>> infos = new HashSet<>();
    for (Map.Entry<ReferenceTarget, Collection<T>> entry : map.asMap().entrySet()) {
      Collection<Sentence> sentences =
          entry.getValue().stream().flatMap(m -> index.get(m).stream()).collect(Collectors.toSet());

      List<T> properNouns =
          entry.getValue().stream()
              .filter(
                  e ->
                      tokens.get(e).stream()
                          .map(WordToken::getPartOfSpeech)
                          .anyMatch("NNP"::equals))
              .collect(toList());

      infos.add(new EntityInformation<T>(entry.getKey(), properNouns, sentences));
    }

    return infos;
  }
}