Python nltk.corpus.names.words() Examples

The following are code examples for showing how to use nltk.corpus.names.words(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: Natural-Language-Processing-with-Python-Cookbook   Author: PacktPublishing   File: Anaphora.py    MIT License 5 votes vote down vote up
def __init__(self):
        males = [(name, 'male') for name in names.words('male.txt')]
        females = [(name, 'female') for name in names.words('female.txt')]
        combined = males + females
        random.shuffle(combined)
        training = [(self.feature(name), gender) for (name, gender) in combined]
        self._classifier = nltk.NaiveBayesClassifier.train(training) 
Example 2
Project: nltk-on-gae   Author: sivu22   File: svm.py    Apache License 2.0 5 votes vote down vote up
def demo():

    def gender_features(word):
        return {'last_letter': word[-1], 'penultimate_letter': word[-2]}

    from nltk.classify import accuracy
    from nltk.corpus import names


    import random
    names = ([(name, 'male') for name in names.words('male.txt')] +
             [(name, 'female') for name in names.words('female.txt')])
    import random
    random.seed(60221023)
    random.shuffle(names)

    featuresets = [(gender_features(n), g) for (n,g) in names]
    train_set, test_set = featuresets[500:], featuresets[:500]

    print('--- nltk.classify.svm demo ---')
    print('Number of training examples:', len(train_set))
    classifier = SvmClassifier.train(train_set)
    print('Total SVM dimensions:', len(classifier._svmfeatureindex))
    print('Label mapping:', classifier._labelmapping)
    print('--- Processing an example instance ---')
    print('Reference instance:', names[0])
    print('NLTK-format features:\n    ' + str(test_set[0]))
    print('SVMlight-format features:\n    ' + str(map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex)))
    distr = classifier.prob_classify(test_set[0][0])
    print('Instance classification and confidence:', distr.max(), distr.prob(distr.max()))
    print('--- Measuring classifier performance ---')
    print('Overall accuracy:', accuracy(classifier, test_set)) 
Example 3
Project: luscan-devel   Author: blackye   File: svm.py    GNU General Public License v2.0 5 votes vote down vote up
def demo():

    def gender_features(word):
        return {'last_letter': word[-1], 'penultimate_letter': word[-2]}

    from nltk.classify import accuracy
    from nltk.corpus import names


    import random
    names = ([(name, 'male') for name in names.words('male.txt')] +
             [(name, 'female') for name in names.words('female.txt')])
    import random
    random.seed(60221023)
    random.shuffle(names)

    featuresets = [(gender_features(n), g) for (n,g) in names]
    train_set, test_set = featuresets[500:], featuresets[:500]

    print '--- nltk.classify.svm demo ---'
    print 'Number of training examples:', len(train_set)
    classifier = SvmClassifier.train(train_set)
    print 'Total SVM dimensions:', len(classifier._svmfeatureindex)
    print 'Label mapping:', classifier._labelmapping
    print '--- Processing an example instance ---'
    print 'Reference instance:', names[0]
    print 'NLTK-format features:\n    ' + str(test_set[0])
    print 'SVMlight-format features:\n    ' + str(map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex))
    distr = classifier.prob_classify(test_set[0][0])
    print 'Instance classification and confidence:', distr.max(), distr.prob(distr.max())
    print '--- Measuring classifier performance ---'
    print 'Overall accuracy:', accuracy(classifier, test_set) 
Example 4
Project: Decoy-Browsing   Author: plummerfernandez   File: DecoyFacebookBrowsing.py    GNU General Public License v2.0 5 votes vote down vote up
def createName():
	mynames = ([(name, 'male') for name in names.words('male.txt')] +
			[(name, 'female') for name in names.words('female.txt')])
	random.shuffle(mynames)
	firstname = str(mynames[0][0]).replace(' ','')
	
	return firstname 
Example 5
Project: hobbs   Author: cmward   File: hobbs.py    MIT License 5 votes vote down vote up
def gender_match(tree, pos, pro):
    """ Takes a proposed antecedent and pronoun and checks whether
    they match in gender. Only checks for mismatches between singular
    proper name antecedents and singular pronouns.
    """
    male_names = (name.lower() for name in names.words('male.txt'))
    female_names = (name.lower() for name in names.words('female.txt'))
    male_pronouns = ["he", "him", "himself"]
    female_pronouns = ["she", "her", "herself"]
    neuter_pronouns = ["it", "itself"]
    
    for c in tree[pos]:
        if isinstance(c, nltk.Tree) and c.label() in nominal_labels:
            # If the proposed antecedent is a recognized male name,
            # but the pronoun being resolved is either female or
            # neuter, they don't match
            if c.leaves()[0].lower() in male_names:
                if pro in female_pronouns:
                    return False
                elif pro in neuter_pronouns:
                    return False
            # If the proposed antecedent is a recognized female name,
            # but the pronoun being resolved is either male or 
            # neuter, they don't match
            elif c.leaves()[0].lower() in female_names:
                if pro in male_pronouns:
                    return False
                elif pro in neuter_pronouns:
                    return False
            # If the proposed antecedent is a numeral, but the 
            # pronoun being resolved is not neuter, they don't match
            elif c.leaves()[0].isdigit():
                if pro in male_pronouns:
                    return False
                elif pro in female_pronouns:
                    return False

    return True 
Example 6
Project: razzy-spinner   Author: rafasashi   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 7
Project: razzy-spinner   Author: rafasashi   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 8
Project: OpenBottle   Author: xiaozhuchacha   File: util.py    MIT License 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 9
Project: OpenBottle   Author: xiaozhuchacha   File: util.py    MIT License 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 10
Project: OpenBottle   Author: xiaozhuchacha   File: util.py    MIT License 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 11
Project: OpenBottle   Author: xiaozhuchacha   File: util.py    MIT License 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 12
Project: Health-Checker   Author: KriAga   File: util.py    MIT License 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 13
Project: Health-Checker   Author: KriAga   File: util.py    MIT License 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 14
Project: FancyWord   Author: EastonLee   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 15
Project: FancyWord   Author: EastonLee   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 16
Project: nltk-on-gae   Author: sivu22   File: util.py    Apache License 2.0 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n,g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n),g) for (n,g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n,g) in test]
        pdists = classifier.batch_prob_classify(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in zip(test, pdists)[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 17
Project: nltk-on-gae   Author: sivu22   File: util.py    Apache License 2.0 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n),m) for (n,m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n,m) in test]
        pdists = classifier.batch_prob_classify(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 18
Project: luscan-devel   Author: blackye   File: util.py    GNU General Public License v2.0 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print 'Training classifier...'
    classifier = trainer( [(features(n), g) for (n,g) in train] )

    # Run the classifier on the test data.
    print 'Testing classifier...'
    acc = accuracy(classifier, [(features(n),g) for (n,g) in test])
    print 'Accuracy: %6.4f' % acc

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n,g) in test]
        pdists = classifier.batch_prob_classify(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print 'Avg. log likelihood: %6.4f' % (sum(ll)/len(test))
        print
        print 'Unseen Names      P(Male)  P(Female)\n'+'-'*40
        for ((name, gender), pdist) in zip(test, pdists)[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print fmt % (name, pdist.prob('male'), pdist.prob('female'))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 19
Project: luscan-devel   Author: blackye   File: util.py    GNU General Public License v2.0 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print 'Training classifier...'
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print 'Testing classifier...'
    acc = accuracy(classifier, [(features(n),m) for (n,m) in test])
    print 'Accuracy: %6.4f' % acc

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n,m) in test]
        pdists = classifier.batch_prob_classify(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print 'Avg. log likelihood: %6.4f' % (sum(ll)/len(test))
        print
        print 'Unseen Names      P(Male)  P(Female)\n'+'-'*40
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print fmt % (name, pdist.prob(True), pdist.prob(False))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 20
Project: honours_project   Author: JFriel   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 21
Project: honours_project   Author: JFriel   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 22
Project: honours_project   Author: JFriel   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 23
Project: honours_project   Author: JFriel   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 24
Project: aop-helpFinder   Author: jecarvaill   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 25
Project: aop-helpFinder   Author: jecarvaill   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 26
Project: serverless-chatbots-workshop   Author: datteswararao   File: util.py    Apache License 2.0 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 27
Project: serverless-chatbots-workshop   Author: datteswararao   File: util.py    Apache License 2.0 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 28
Project: serverless-chatbots-workshop   Author: datteswararao   File: util.py    Apache License 2.0 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 29
Project: serverless-chatbots-workshop   Author: datteswararao   File: util.py    Apache License 2.0 4 votes vote down vote up
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
Example 30
Project: hltdi-l3   Author: LowResourceLanguages   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] + 
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n,g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n),g) for (n,g) in test])
    print(('Accuracy: %6.4f' % acc))

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n,g) in test]
        pdists = classifier.batch_prob_classify(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print(('Avg. log likelihood: %6.4f' % (sum(ll)/len(test))))
        print()
        print(('Unseen Names      P(Male)  P(Female)\n'+'-'*40))
        for ((name, gender), pdist) in zip(test, pdists)[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print((fmt % (name, pdist.prob('male'), pdist.prob('female'))))
    except NotImplementedError:
        pass
    
    # Return the classifier
    return classifier