import nltk
nltk.download('brown')

from nltk.corpus import brown

# Split a text into chunks 
def splitter(data, num_words):
    words = data.split(' ')
    output = []

    cur_count = 0
    cur_words = []
    for word in words:
        cur_words.append(word)
        cur_count += 1
        if cur_count == num_words:
            output.append(' '.join(cur_words))
            cur_words = []
            cur_count = 0

    output.append(' '.join(cur_words) )

    return output 

if __name__=='__main__':
    # Read the data from the Brown corpus
    data = ' '.join(brown.words()[:10000])

    # Number of words in each chunk 
    num_words = 1700

    chunks = []
    counter = 0

    text_chunks = splitter(data, num_words)

    print("Number of text chunks =", len(text_chunks))