import nltk import sys """ sentence = "Satya Nadella is the CEO of Microsoft." words = nltk.word_tokenize(sentence) tagged = nltk.pos_tag(words) chunks = nltk.ne_chunk(tagged, binary=True) print(tagged) print(chunks) sys.exit(0) print(nltk.tree2conlltags(nltk.ne_chunk(tagged))) grammar = r"NAMED-ENTITY: {<NNP>+}" cp = nltk.RegexpParser(grammar) print(cp.parse(tagged)) """ grammar = r"NAMED-ENTITY: {<NNP>+}" cp = nltk.RegexpParser(grammar) samplestrings = [ "Microsoft Azure is a cloud service", "Bill Gates announces Satya Nadella as new CEO of Microsoft" ] def demo(samplestrings): for s in samplestrings: words = nltk.word_tokenize(s) tagged = nltk.pos_tag(words) # chunks = nltk.ne_chunk(tagged) chunks = cp.parse(tagged) print(nltk.tree2conllstr(chunks)) print(chunks) demo(samplestrings) strings = [""" Microsoft NNP B-COMPANY Azure NNP I-CLOUDSERVICE is VBZ O a DT O cloud JJ O service NN O """, """ Bill NNP B-PERSON Gates NNP I-PERSON announces NNS O Satya NNP B-PERSON Nadella NNP I-PERSON as IN O new JJ O CEO NNP B-NAMED-ENTITY of IN O Microsoft NNP B-COMPANY """] #print(nltk.conllstr2tree(strings[1])) #print(nltk.ne_chunk(nltk.conllstr2tree(strings[1])))