edu.mit.jwi.item.POS Java Examples

The following examples show how to use edu.mit.jwi.item.POS. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SemSigProcess.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
public List<SemSig> getWordPosSemSigs(String taggedWord, LKB lkb, int size, boolean warnings)
{
	String comps[] = taggedWord.split("#");
	String word = comps[0];
	String tag = comps[1];
	
	POS tg = GeneralUtils.getTagfromTag(tag);
	
	//if there exists vectors for words
	if(SemSigProcess.getInstance().wordVectorsExist(lkb))
	{
		List<SemSig> vector = new ArrayList<SemSig>();
		vector.add(SemSigProcess.getInstance().getSemSigFromWord(word, tag, lkb, size));
		
		return vector;
	}
	
	return getWordSemSigs(word, tg, lkb, size, warnings);
}
 
Example #2
Source File: GeneralUtils.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Obtains the list of offsets for all senses of an input term 
 * @param word
 * @param tag
 * @return
 */
public static List<String> getWordOffsets(String word, POS tag)
{
	List<IWord> senses = WordNetUtils.getInstance().getSenses(word,tag);
	List<String> wordOffsets = new ArrayList<String>();
	
	for(IWord sense : senses)
	{
		int offset = sense.getSynset().getOffset();
		String strOffset = fixOffset(offset,tag);
		
		wordOffsets.add(strOffset);
	}
	
	return wordOffsets;
}
 
Example #3
Source File: GeneralUtils.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
/**
 * transforms character into part of speech ({@link POS})
 * @param tag
 * @return
 */
public static POS getTagfromTag(char tag)
{
	switch(tag)
	{
		case 'n':
			return POS.NOUN;
		
		case 'v':
			return POS.VERB;
			
		case 'a':
			return POS.ADJECTIVE;
			
		case 'r':
			return POS.ADVERB;
			
		default:
			return null;
	}
}
 
Example #4
Source File: GeneralUtils.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
/**
 * converts string to {@link POS}
 * @param tag
 * @return
 */
public static POS getTagfromTag(String tag)
{
	if(tag.toLowerCase().startsWith("n"))
		return POS.NOUN;
	
	else
		if(tag.toLowerCase().startsWith("v"))
			return POS.VERB;
	
	else
		if(tag.toLowerCase().startsWith("r") || tag.toLowerCase().startsWith("adv"))
			return POS.ADVERB;
		
	else
		if(tag.toLowerCase().startsWith("j") 
				|| tag.toLowerCase().startsWith("adj") 
				|| tag.toLowerCase().startsWith("a"))
			return POS.ADJECTIVE;
		
	return null;
}
 
Example #5
Source File: GeneralUtils.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
/**
 * converts {@link POS} to character
 * @param tag
 * @return
 */
public static char getTagfromTag(POS tag)
{
	switch(tag)
	{
		case NOUN:
			return 'n';
			
		case VERB:
			return 'v';
			
		case ADJECTIVE:
			return 'a';
			
		case ADVERB:
			return 'r';
			
		default:
			return 'x';
	}
}
 
Example #6
Source File: SemSigProcess.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
public List<SemSig> getWordSemSigs(String word, POS tag, LKB lkb, int size, boolean warnings, LKB normalizationLKB)
{
	List<SemSig> vectors = new ArrayList<SemSig>();
	
	List<String> wordOffsets = (tag == null)? GeneralUtils.getWordOffsets(word) : GeneralUtils.getWordOffsets(word, tag);
	
	for(String offset : wordOffsets)
		vectors.add(getSemSigFromOffset(offset, lkb, size, normalizationLKB));
		
	if(vectors.size() == 0 || vectors.size() !=  wordOffsets.size())
	{
		if(warnings)
			log.info("[Warning! no vector or incomplete vectors generated for "+word+":"+tag+"]");
		//System.exit(0);
	}
	
	return vectors;
}
 
Example #7
Source File: SemSigProcess.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
/**
 * 
 * @param wordsPOS in forms of lemma#[n,v,a,r]
 * @param lkb
 * @param size
 * @param warnings
 * @param normalizationLKB
 * @return
 */
public SemSig getAveragedWordsSemSig(
		List<String> wordsPOS, 
		LKB lkb, 
		int size, 
		boolean warnings, 
		LKB normalizationLKB)
{
	List<SemSig> vectors = new ArrayList<SemSig>();
	
	for(String wordPOS : wordsPOS)
	{
		String comps[] = wordPOS.split("#");
		String lemma = comps[0];
		POS pos = GeneralUtils.getTagfromTag(comps[1]);
	
		List<SemSig> thisWordVectors = getWordSemSigs(lemma, pos, lkb, size, warnings, normalizationLKB);
		vectors.addAll(thisWordVectors);
	}
	
	return SemSigUtils.averageSemSigs(vectors);
}
 
Example #8
Source File: WordNetUtils.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
public IWord mapWordSenseToIWord(String wordSense)
	{
		Matcher m = wordSenseFormat.matcher(wordSense);
		
		if(m.find())
		{
			String word = m.group(1);
			POS tag = GeneralUtils.getTagfromTag(m.group(2));
			int sense = Integer.parseInt(m.group(3));
			
			List<IWord> senses = getSenses(word, tag);
			
			if(senses.size() < sense)
				return null;
			
			return senses.get(sense-1);			
			
		}
		else
		{
//			log.warn("[ERROR: non matching regular expression at "+ wordSense+"]");
//			System.exit(0);
			return null;
		}			
	}
 
Example #9
Source File: WordNetUtils.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
public String mapOffsetToReadableForm(String offset, String word, POS tag)
{
	List<IWord> senses = getSenses(word, tag);
	ISynset syn = getSynsetFromOffset(offset);
		
	int senseRank = 1;
	for(IWord sense : senses)
	{
		if(sense.getSynset() == syn)
			break;
		
		senseRank++;
	}
	
	if(senseRank > senses.size())
	{
		log.warn("[ERROR: could not generate the readable form for "+word+" "+offset);
		return "null";
	}

	return word+"."+senseRank;
}
 
Example #10
Source File: WordNetUtils.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
private void setAllWordNetWords()
{
	if(allWNWords != null) return;
	
	allWNWords = new HashSet<String>();
	allWNPOSWords = HashMultimap.create();
	
	for(POS pos : Arrays.asList(POS.NOUN, POS.VERB, POS.ADJECTIVE, POS.ADVERB))
	{
		Set<String> current = WordNetUtils.getInstance().getAllWords(pos);
		
		allWNWords.addAll(current);
		allWNPOSWords.putAll(pos, current);
	}
	
}
 
Example #11
Source File: Lemmatizer.java    From easyccg with MIT License 6 votes vote down vote up
private static POS getSynsetType(String pos) {
  if (!synsetTypeForPOS.containsKey(pos)) {
    POS result = null;
  
    if (pos.startsWith("NN")) {
      result = POS.NOUN;
    } else if (pos.startsWith("VB")) {
      result = POS.VERB;
    } else if (pos.startsWith("RB")) {
      result = POS.ADVERB;
    } else if (pos.startsWith("JJ")) {
      result = POS.ADJECTIVE;
    }
    
    if (result != null) {
      synsetTypeForPOS.put(pos, result);
    }
  }
  
  return synsetTypeForPOS.get(pos);
}
 
Example #12
Source File: Lemmatizer.java    From easyccg with MIT License 6 votes vote down vote up
static void loadExceptions(POS type, String name) {

    try {
      InputStream is = Lemmatizer.class.getResourceAsStream(name + ".exc");
      InputStreamReader isr = new InputStreamReader(is);
      BufferedReader br = new BufferedReader(isr);
      String line;
      Map<String, String> map = new HashMap<String, String>();
      exceptions.put(type, map);
      while ((line = br.readLine()) != null) 
      {
        String[] split = line.split(" ");
        map.put(new String(split[0]), new String(split[1]));
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
 
Example #13
Source File: POSTag.java    From senti-storm with Apache License 2.0 6 votes vote down vote up
public static POS convertArk(String arkTag) {
  if (arkTag.equals("N") || arkTag.equals("O") || arkTag.equals("^")
      || arkTag.equals("S") || arkTag.equals("Z")) {
    return POS.NOUN;
  }
  if (arkTag.equals("V")) {
    return POS.VERB;
  }
  if (arkTag.equals("A")) {
    return POS.ADJECTIVE;
  }
  if (arkTag.equals("R")) {
    return POS.ADVERB;
  }
  return null;
}
 
Example #14
Source File: WordNetUtils.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
public static String fixOffset(int offset, POS tag)
{
	String foff = Integer.toString(offset);
	
	while(foff.length() < 8)
		foff = "0"+foff;
	
	if(tag.equals(POS.ADJECTIVE))
		foff += "-a";
	else
		foff += "-"+tag.getTag();
	
	return foff;
}
 
Example #15
Source File: WordNet.java    From senti-storm with Apache License 2.0 5 votes vote down vote up
public boolean contains(String word) {
  for (POS pos : POS.values()) {
    for (String stem : m_wordnetStemmer.findStems(word, pos)) {
      IIndexWord indexWord = m_dict.getIndexWord(stem, pos);
      if (indexWord != null)
        return true;
    }
  }
  return false;
}
 
Example #16
Source File: GeneralUtils.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Obtains the list of all offsets for all senses of all parts of speech of an input term 
 * @param word
 * @return
 */
public static List<String> getWordOffsets(String word)
{
	List<String> allOffsets = new ArrayList<String>();
	
	for(POS tag : Arrays.asList(POS.NOUN, POS.VERB, POS.ADJECTIVE, POS.ADVERB))
		allOffsets.addAll(getWordOffsets(word, tag));
	
	return allOffsets;
}
 
Example #17
Source File: GeneralUtils.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
/**
 * gets an offset in integer form and returns it in full 8-letter string form with tag
 * @param intOffset
 * @param tag
 * @return offset in full-form
 */
public static String fixOffset(int intOffset, POS tag)
{
	String offset = Integer.toString(intOffset);
	
	while(offset.length() < 8)
		offset = "0" + offset;
	
	offset = offset + "-" + getTagfromTag(tag);
	
	return offset;
}
 
Example #18
Source File: SemSigComparator.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
public void getClosestSenses(String w1, POS tag1, String w2, POS tag2, LKB lkb, SignatureComparison measure, int size)
{
	double maxSim = 0;
	IWord src = null;
	IWord trg = null;
	
	for(IWord sense1 : WordNetUtils.getInstance().getSenses(w1, tag1))
	{
		SemSig v1 = SemSigProcess.getInstance().getSemSigFromOffset(GeneralUtils.fixOffset(sense1.getSynset().getOffset(), tag1), lkb, size);
	    
		for(IWord sense2 : WordNetUtils.getInstance().getSenses(w2, tag2))
		{
			SemSig v2 = SemSigProcess.getInstance().getSemSigFromOffset(GeneralUtils.fixOffset(sense2.getSynset().getOffset(), tag2), lkb, size);	
			
			double currentSim = SemSigComparator.compareSortedNormalizedMaps(v1.getVector(), v2.getVector(), measure, size);
			
			
			System.out.print(sense1+"\t");
			System.out.print(sense2+"\t");
			System.out.println(currentSim);
			
			
			if(maxSim < currentSim)
			{
				src = sense1;
				trg = sense2;
				maxSim = currentSim;
			}
		}
	}
	
	System.out.println(src);
	System.out.println(trg);
	System.out.println(maxSim);
}
 
Example #19
Source File: SemSigProcess.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
/**
 * gets a word#pos and returns all the offsets associated with that word and part of speech
 * @param wordPos
 * @return
 */
public Set<String> getWordNetOffsets(String wordPos)
{
	Set<String> offsets = new HashSet<String>();

	wordPos = wordPos.trim();
	
	if(wordPos.matches("[0-9]*-[nrva]"))
	{
		offsets.add(wordPos);
	}
	else
	{
		String comps[] = wordPos.split("#");
		
		if(comps.length != 2)
		{
			log.error("mal-formatted word-pos: "+wordPos);
			return null;
		}
		
		String word = comps[0];
		POS pos = GeneralUtils.getTagfromTag(comps[1]);
		
		List<IWord> senses = WordNetUtils.getInstance().getSenses(word, pos);
		
		if(senses == null)
			return null;
		
		for(IWord sense : senses)
		{
			offsets.add(GeneralUtils.fixOffset(sense.getSynset().getOffset(), pos));
		}
	}
	
	return offsets;
}
 
Example #20
Source File: WordNetUtils.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
public Set<String> getWordNetStems(String word)
{
 Set<String> stems = new HashSet<String>();

 for (POS pos : POS.values())
	 stems.addAll(wnStemmer.findStems(word, pos));

 return stems;
}
 
Example #21
Source File: WordNetUtils.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
public Set<String> getAllWords(POS pos)
{
 Set<String> words = new HashSet<String>();

 Iterator<IIndexWord> i = dictionary.getIndexWordIterator(pos);
 while(i.hasNext())
 {
	 IIndexWord iw = (IIndexWord)i.next();
	 words.add(iw.getLemma());
 }

 return words;
}
 
Example #22
Source File: WordNet.java    From senti-storm with Apache License 2.0 5 votes vote down vote up
public synchronized POS findPOS(String word) {
  int maxCount = 0;
  POS mostLikelyPOS = null;
  for (POS pos : POS.values()) {
    // From JavaDoc: The surface form may or may not contain whitespace or
    // underscores, and may be in mixed case.
    word = word.replaceAll("\\s", "").replaceAll("_", "");

    List<String> stems = m_wordnetStemmer.findStems(word, pos);
    for (String stem : stems) {
      IIndexWord indexWord = m_dict.getIndexWord(stem, pos);
      if (indexWord != null) {
        int count = 0;
        for (IWordID wordId : indexWord.getWordIDs()) {
          IWord aWord = m_dict.getWord(wordId);
          ISenseEntry senseEntry = m_dict.getSenseEntry(aWord.getSenseKey());
          count += senseEntry.getTagCount();
        }

        if (count > maxCount) {
          maxCount = count;
          mostLikelyPOS = pos;
        }
      }
    }
  }

  return mostLikelyPOS;
}
 
Example #23
Source File: SemSigProcess.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String args[])
	{
//		SemSig v1 = SemSigProcess.getInstance().getSemSigFromWordSense("gem.n.3", LKB.WordNetGloss, 0);
//		SemSig v2 = SemSigProcess.getInstance().getSemSigFromWordSense("jewel.n.2", LKB.WordNetGloss, 0);

		
//		double sim = SemSigComparator.compare(v1, v2, new WeightedOverlap(), 0, true, true);
//		System.out.println(sim);
		
		System.out.println(SemSigProcess.getInstance().getSemSigFromWord("monkey", POS.NOUN, LKB.WordNetGloss, 0, null));
	}
 
Example #24
Source File: WordNetUtils.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
public boolean inVocabulary(String word, POS pos)
{
	if(allWNPOSWords == null)
		setAllWordNetWords();

	return allWNPOSWords.containsKey(word);
}
 
Example #25
Source File: WordNetUtils.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
public IWord getSenseFromSenseKey(final String sensekey)
{
	final String lemma = sensekey.split("%")[0];
	final Set<IWord> senses = new HashSet<IWord>();
	for (POS pos : POS.values())
		senses.addAll(getSenses(lemma, pos));

	for (IWord sense : senses)
		if (sense.getSenseKey().toString().equals(sensekey))
			return sense;

	return null;
}
 
Example #26
Source File: POSTag.java    From senti-storm with Apache License 2.0 5 votes vote down vote up
public static POS parseString(String tag) {
  switch (tag.charAt(0)) {
    case 'n':
      return POS.NOUN;
    case 'v':
      return POS.VERB;
    case 'a':
      return POS.ADJECTIVE;
    case 'r':
      return POS.ADVERB;
    default:
      throw new IllegalStateException("Unknown POS tag '" + tag + "'!");
  }
}
 
Example #27
Source File: POSTag.java    From senti-storm with Apache License 2.0 5 votes vote down vote up
public static String toString(POS posTag) {
  switch (posTag) {
    case NOUN:
      return "n";
    case VERB:
      return "v";
    case ADJECTIVE:
      return "a";
    case ADVERB:
      return "r";
    default:
      throw new IllegalStateException("Unknown POS tag '" + posTag + "'!");
  }
}
 
Example #28
Source File: POSTag.java    From senti-storm with Apache License 2.0 5 votes vote down vote up
public static POS convertPTB(String pennTag) {
  if (pennTag.startsWith("NN")) { // includes proper nouns
    return POS.NOUN;
  }
  if (pennTag.startsWith("VB")) {
    return POS.VERB;
  }
  if (pennTag.startsWith("JJ")) {
    return POS.ADJECTIVE;
  }
  if (pennTag.startsWith("RB")) {
    return POS.ADVERB;
  }
  return null;
}
 
Example #29
Source File: Preprocess.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
/**
 * removes hyphen only if the word cannot be found in WN 
 * replace by space or blank according to existence of the word in WN
 * @param sentence
 * @return
 */
public static String removeHyphens(String sentence)
{
	String cleanedSentence = "";
	
	for(String word : StanfordTokenizer.getInstance().tokenizeString(sentence))
	{
		word = word.replaceAll("\\\\", "");
		if(!word.contains("-") && !word.contains("/") && !word.contains("<") && !word.contains(">"))
			cleanedSentence += word;
		else
		{
			if(WordNetUtils.getInstance().inVocabulary(WordNetUtils.getInstance().getSingularOf(word, POS.NOUN)))
				cleanedSentence += word;
			else
			{
				String replacement = word.replaceAll("[-/<>]", "").trim();
			
				if(WordNetUtils.getInstance().inVocabulary(WordNetUtils.getInstance().getSingularOf(replacement, POS.NOUN)))
					cleanedSentence += replacement;
				else
					cleanedSentence += word.replaceAll("[-/><]", " ").trim();
			}
		}
		
		cleanedSentence += " ";
	}
	
	return cleanedSentence.trim();
}
 
Example #30
Source File: RandG.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
public static double getClosestPairScore(
			String word1, String word2, 
			POS tag, 
			int size, 
			boolean smallerBetter)
	{
	
		List<SemSig> v1s = SemSigProcess.getInstance().getWordSemSigs(word1, tag, LKB.WordNetGloss, size, true);
		List<SemSig> v2s = SemSigProcess.getInstance().getWordSemSigs(word2, tag, LKB.WordNetGloss, size, true);
		
		WeightedOverlap WO = new WeightedOverlap();
		
		double maxSim = (smallerBetter)? 1000 : 0;
		
		for(SemSig v1 : v1s)
		{
			for(SemSig v2 : v2s)
			{
				double currentSim = WO.compare(v1.getVector(size), v2.getVector(size), false);
//				double currentSim = Cosine.cosineSimilarity(v1.getVector(size), v2.getVector(size));
				
				if(smallerBetter)
				{
					if(currentSim < maxSim)
					{
						maxSim = currentSim;
					}
				}
				else
				{
					if(currentSim > maxSim)
					{
						maxSim = currentSim;
					}
				}
			}
		}
			
		return maxSim;
	}