Python nltk.corpus.brown.sents() Examples

The following are 6 code examples for showing how to use nltk.corpus.brown.sents(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module nltk.corpus.brown , or try the search function .

Example 1
Project: razzy-spinner   Author: rafasashi   File: tnt.py    License: GNU General Public License v3.0 6 votes vote down vote up
def demo():
    from nltk.corpus import brown
    sents = list(brown.tagged_sents())
    test = list(brown.sents())

    # create and train the tagger
    tagger = TnT()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j+100]
        for i in range(len(s)):
            print(s[i],'--', t[i])
        print() 
Example 2
def info_content(lookup_word):
    """
    Uses the Brown corpus available in NLTK to calculate a Laplace
    smoothed frequency distribution of words, then uses this information
    to compute the information content of the lookup_word.
    """
    global N
    if N == 0:
        # poor man's lazy evaluation
        for sent in brown.sents():
            for word in sent:
                word = word.lower()
                if not word in brown_freqs:
                    brown_freqs[word] = 0
                brown_freqs[word] = brown_freqs[word] + 1
                N = N + 1
    lookup_word = lookup_word.lower()
    n = 0 if not lookup_word in brown_freqs else brown_freqs[lookup_word]
    return 1.0 - (math.log(n + 1) / math.log(N + 1)) 
Example 3
Project: luscan-devel   Author: blackye   File: tnt.py    License: GNU General Public License v2.0 6 votes vote down vote up
def demo():
    from nltk.tag import tnt
    from nltk.corpus import brown
    sents = list(brown.tagged_sents())
    test = list(brown.sents())

    # create and train the tagger
    tagger = tnt.TnT()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j+100]
        for i in range(len(s)):
            print s[i],'--', t[i]
        print 
Example 4
Project: V1EngineeringInc-Docs   Author: V1EngineeringInc   File: tnt.py    License: Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def demo():
    from nltk.corpus import brown

    sents = list(brown.tagged_sents())
    test = list(brown.sents())

    # create and train the tagger
    tagger = TnT()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j + 100]
        for i in range(len(s)):
            print(s[i], '--', t[i])
        print() 
Example 5
Project: neural_tokenizer   Author: Kyubyong   File: data_load.py    License: MIT License 5 votes vote down vote up
def load_data(mode="train"):
    word2idx, idx2word = load_vocab()

    from nltk.corpus import brown
    sents = [" ".join(words) for words in brown.sents()]

    xs, ys = [], []
    for sent in sents:
        sent = re.sub(r"[^ A-Za-z']", "", sent)
        if hp.minlen <= len(sent) <= hp.maxlen:
            x, y = [], []
            for word in sent.split():
                for char in word:
                    x.append(word2idx[char])
                    y.append(0) # 0: no space
                y[-1] = 1 # space for end of a word
            y[-1] = 0 # no space for end of sentence

            xs.append(x + [0] * (hp.maxlen-len(x)))
            ys.append(y + [0] * (hp.maxlen-len(x)))

    # Convert to ndarrays
    X = np.array(xs, np.int32)
    Y = np.array(ys, np.int32)

    # mode
    if mode=="train":
        X, Y = X[: int(len(X) * .8)], Y[: int(len(Y) * .8)]
        # X, Y = X[: 128], Y[: 128]
    elif mode=="val":
        X, Y = X[int(len(X) * .8): -int(len(X) * .1)], Y[int(len(X) * .8): -int(len(X) * .1)]
    else:
        X, Y = X[-int(len(X) * .1):], Y[-int(len(X) * .1):]

    return X, Y 
Example 6
Project: PyRATA   Author: nicolashernandez   File: do_benchmark.py    License: Apache License 2.0 4 votes vote down vote up
def test_clause():

  """                            
  """

  print ('Measuring time performance on # {} sentences over # {} iterations for recognizing Clause'.format(size, iteration_number))

  from nltk.corpus import brown
  brown_sents = brown.sents()[:size]
  import nltk
  global brown_pos_tag_sents
  brown_pos_tag_sents = [nltk.pos_tag(sentence) for sentence in brown_sents] 
  #print (brown_pos_tag_sents[0])


  # ----------------------------------------------------
  # nltk_parser 
  # ----------------------------------------------------
  analyzer_name='nltk_parser'
  

  times, averagetime, mintime = measure_time(nltk_parse_clause_in_the_whole_text, iteration_number)
  grammar = "clause"
  print ('{}\t{}\t{}\t{}'.format(analyzer_name, grammar, averagetime, mintime))


  # ----------------------------------------------------
  # pyrata 
  # ----------------------------------------------------
  analyzer_name='pyrata'

  global sentences_dict_list_list
  sentences_dict_list_list = []

  for s in brown_pos_tag_sents:
    sentences_dict_list_list.append([{'raw':w, 'pos':p} for (w, p) in s])
  # data -> sentences_dict_list_list
  #data = data[0]
  # flatten a list of list i.e. sentences of words becomes a text of words 
  # data = [val for sublist in data for val in sublist]
  #print (data[:10])
  #print ('len(data):', len(data))

  times, averagetime, mintime = measure_time(pyrata_recognize_clause_in_the_whole_text, iteration_number)
  grammar = "clause"
  print ('{}\t{}\t{}\t{}'.format(analyzer_name, grammar, averagetime, mintime))