python source code of texttiling

Project: razzy-spinner (GitHub Link)

razzy-spinner-master
- demo
  - index.php
  - proccess_spin.php
  - assets
    - img
    - fonts
      - roboto
        Roboto-Bold.eot
        Roboto-Regular.ttf
        Roboto-Medium.ttf
        Roboto-Bold.woff
        Roboto-Light.woff
        Roboto-Thin.woff
        Roboto-Light.ttf
        Roboto-Light.eot
        Roboto-Medium.woff2
        Roboto-Thin.eot
        Roboto-Thin.ttf
        Roboto-Medium.woff
        Roboto-Regular.woff2
        Roboto-Regular.woff
        Roboto-Bold.woff2
        Roboto-Regular.eot
        Roboto-Bold.ttf
        Roboto-Medium.eot
        Roboto-Thin.woff2
        Roboto-Light.woff2
    - js
      - inputTags.jquery.min.js
      - inputTags.jquery.js
      - LICENSE
      - materialize.min.js
      - app.js
      - jquery.blockUI.js
      - README.md
      - sweetalert2.min.js
    - font
      - material-design-icons
        Material-Design-Icons.woff
        Material-Design-Icons.woff2
        Material-Design-Icons.ttf
        Material-Design-Icons.eot
        LICENSE.txt
      - roboto
        Roboto-Bold.eot
        Roboto-Regular.ttf
        Roboto-Medium.ttf
        Roboto-Bold.woff
        Roboto-Light.woff
        Roboto-Thin.woff
        Roboto-Light.ttf
        Roboto-Light.eot
        Roboto-Medium.woff2
        Roboto-Thin.eot
        Roboto-Thin.ttf
        Roboto-Medium.woff
        Roboto-Regular.woff2
        Roboto-Regular.woff
        Roboto-Bold.woff2
        Roboto-Regular.eot
        Roboto-Bold.ttf
        Roboto-Medium.eot
        Roboto-Thin.woff2
        Roboto-Light.woff2
    - css
      - inputTags.min.css
      - inputTags.css
      - materialize.min.css
      - materialize.css
      - inputTags.less
      - sweetalert2.min.css
- LICENSE
- autoload.php
- README.md
- core
  - resources
    - lang
  - libs
    - razzy
      - therausus.php
  - classes
    - pos_tagger.class.php
    - cache.class.php
    - spinner.class.php
    - lang.class.php
    - functions.class.php
  - composer.lock
  - snippets
    - countries.php
    - stopwords.php
  - vendor
    - icanboogie
      - inflector
        LICENSE
        lib
        inflector.php
        inflections.php
        inflections
        tr.php
        nb.php
        fr.php
        es.php
        en.php
        pt.php
        helpers.php
        composer.json
        README.md
    - composer
      - autoload_classmap.php
      - LICENSE
      - ClassLoader.php
      - autoload_psr4.php
      - autoload_files.php
      - autoload_real.php
      - autoload_namespaces.php
      - installed.json
    - autoload.php
    - fire015
      - flintstone
        src
        Exception.php
        Cache
        CacheInterface.php
        ArrayCache.php
        Flintstone.php
        Database.php
        Formatter
        SerializeFormatter.php
        JsonFormatter.php
        FormatterInterface.php
        Config.php
        phpunit.xml.dist
        UPGRADE.md
        CHANGELOG.md
        composer.json
        LICENSE.md
        .travis.yml
        README.md
        tests
        ConfigTest.php
        FlintstoneTest.php
        DatabaseTest.php
        .gitignore
  - composer.json
  - bin
    - python
      - en
        numeral.pyc
        commonsense.pyc
        commonsense.py
        rid.py
        quantify.py
        ogden
        __init__.pyc
        __init__.py
        ogden_2000.txt
        tags.pyc
        ordinal.pyc
        article.py
        singular.pyc
        __init__.pyc
        rid.pyc
        article.pyc
        quantify.pyc
        verb
        __init__.pyc
        __init__.py
        singular.py
        tags.py
        __init__.py
        wordnet
        wntools.py
        wordnet2
        compile
        src
        tkAppInit.c
        Makefile.in
        wnb
        stubs.c
        wn.c
        Makefile.am
        ChangeLog
        AUTHORS
        NEWS
        Makefile.in
        include
        wngrind.h
        Makefile
        Makefile.in
        Makefile.am
        wn.h
        acinclude.m4
        INSTALL
        README
        lib
        Makefile.in
        wnhelp.c
        binsrch.c
        wnrtl.c
        search.c
        morph.c
        wnutil.c
        wnres
        license.txt
        Makefile.in
        wngloss.man
        Makefile.am
        wnb.man
        wn.xbm
        Makefile.am
        wnglobal.c
        configure
        COPYING
        configure.ac
        aclocal.m4
        missing
        Makefile.am
        config.h.in
        dict
        adj.exc
        Makefile
        index.adv
        Makefile.in
        noun.exc
        sentidx.vrb
        adv.exc
        sents.vrb
        frames.vrb
        verb.Framestext
        lexnames
        Makefile.am
        verb.exc
        log.grind.2.1
        install-sh
        depcomp
        doc
        pdf
        Makefile
        Makefile.in
        Makefile.am
        Makefile
        Makefile.in
        html
        binsrch.3WN.html
        wnintro.1WN.html
        wnsearch.3WN.html
        wnintro.3WN.html
        Makefile
        wndb.5WN.html
        Makefile.in
        wnlicens.7WN.html
        grind.1WN.html
        lexnames.5WN.html
        wnpkgs.7WN.html
        cntlist.5WN.html
        senseidx.5WN.html
        wn.1WN.html
        wnintro.7WN.html
        morphy.7WN.html
        wnutil.3WN.html
        wngroups.7WN.html
        wnstats.7WN.html
        uniqbeg.7WN.html
        Makefile.am
        wnb.1WN.html
        wninput.5WN.html
        wnintro.5WN.html
        morph.3WN.html
        wngloss.7WN.html
        Makefile.am
        man
        wninput.5
        wnintro.5
        grind.1
        Makefile
        wnlicens.7
        Makefile.in
        morphy.7
        wnutil.3
        wn.1
        wnstats.7
        wnintro.1
        wnintro.7
        senseidx.5
        wndb.5
        wnintro.3
        binsrch.3
        wngloss.7
        lexnames.5
        wnpkgs.7
        Makefile.am
        morph.3
        wnb.1
        wnsearch.3
        wngroups.7
        uniqbeg.7
        cntlist.5
        ps
        wnstats.7.ps
        cntlist.5.ps
        Makefile
        wnlicens.7.ps
        Makefile.in
        uniqbeg.7.ps
        wnintro.1.ps
        grind.1.ps
        wndb.5.ps
        wngroups.7.ps
        wnutil.3.ps
        wnpkgs.7.ps
        wnintro.5.ps
        wngloss.7.ps
        wn.1.ps
        wnintro.7.ps
        wnb.1.ps
        Makefile.am
        morph.3.ps
        wninput.5.ps
        wnsearch.3.ps
        senseidx.5.ps
        lexnames.5.ps
        wnintro.3.ps
        binsrch.3.ps
        morphy.7.ps
        concordance.py
        __init__.pyc
        README.txt
        wordnet.pyc
        docsrc
        index.xml
        build.xml
        download.xml
        styles.in.css
        release-notes.xml
        contributors.xml
        requirements.xml
        publish.xsl
        contributors-src.xml
        format-contributors.xsl
        schemas.xml
        install.xml
        license.xml
        make-navigation.xsl
        examples.xml
        make-css-prefix.xsl
        setup.py
        PKG-INFO
        wntools.pyc
        __init__.py
        docs
        installation.html
        contents.html
        styles.css
        nav.html.inc
        contributors.html
        styles.css.prefix
        release-notes.html
        index.html
        examples.html
        license.html
        download.html
        LICENSE.txt
        wordnet.py
        spelling
        __init__.pyc
        __init__.py
        plural.py
        ordinal.py
        numeral.py
        _en-test.py
        LICENSE.txt
        plural.pyc
        parser
        __init__.pyc
        __init__.py
        nltk_lite
        draw
        srparser.py
        rdparser.py
        plot.py
        chart.py
        __init__.py
        tree.py
        cfg.py
        dispersion.py
        probability.pyc
        utilities.py
        semantics
        utilities.py
        evaluate.py
        logic.py
        __init__.py
        evaluate.py
        corpora
        toolbox.py
        ieer.py
        words.py
        ppattach.py
        timit.py
        conll2000.py
        ycoe.py
        state_union.py
        inaugural.py
        treebank.py
        stopwords.py
        senseval.py
        brown.py
        cmudict.py
        __init__.py
        genesis.py
        names.py
        gutenberg.py
        sinica_treebank.py
        chat
        zen.py
        eliza.py
        rude.py
        iesha.py
        __init__.py
        tag
        hmm.py
        unigram.py
        brill.py
        ngram.py
        __init__.py
        test
        doctest_driver.py
        __init__.py
        probability.py
        stem
        regexp.py
        __init__.py
        porter.py
        parse
        treetransforms.py
        rd.pyc
        featurestructure.pyc
        chunk.pyc
        grammarfile.py
        viterbi.py
        viterbi.pyc
        pchart.py
        cfg.pyc
        chart.pyc
        category.py
        chunk.py
        sr.pyc
        sr.py
        featurechart.py
        __init__.pyc
        tree.pyc
        generate.py
        chart.py
        rd.py
        featurestructure.py
        __init__.py
        tree.py
        pcfg.py
        pcfg.pyc
        cfg.py
        __init__.pyc
        README.txt
        etree
        ElementPath.py
        __init__.py
        ElementInclude.py
        ElementTree.py
        misc
        wordfinder.py
        __init__.py
        sort.py
        LICENSE.TXT
        setup.py
        PKG-INFO
        __init__.py
        wordnet
        wntools.py
        concordance.py
        __init__.py
        wordnet.py
        tokenize
        regexp.py
        simple.pyc
        simple.py
        __init__.pyc
        regexp.pyc
        __init__.py
        model
        __init__.py
        cluster
        gaac.py
        em.py
        __init__.py
        kmeans.py
        contrib
        paradigmquery.py
        hole.py
        combined.py
        featurelite.py
        toolbox
        data.py
        utilities.py
        text.py
        lexicon.py
        __init__.py
        settings.py
        errors.py
        kimmo.py
        __init__.py
        lambda.py
        fsa.py
        featuredemo.py
        marshal.py
        paradigm.py
        marshalbrill.py
        concord.py
        INSTALL.TXT
        Brill_license.txt
      - nltk_core.pyc
      - convert_tense.py
      - __pycache__
        nltk_core.cpython-34.pyc
      - pos_tagger.py
      - nltk
        classify
        util.py
        svm.pyc
        weka.pyc
        rte_classify.py
        decisiontree.py
        __pycache__
        senna.cpython-34.pyc
        tadm.cpython-34.pyc
        maxent.cpython-34.pyc
        textcat.cpython-34.pyc
        megam.cpython-34.pyc
        rte_classify.cpython-34.pyc
        decisiontree.cpython-34.pyc
        weka.cpython-34.pyc
        positivenaivebayes.cpython-34.pyc
        naivebayes.cpython-34.pyc
        scikitlearn.cpython-34.pyc
        api.cpython-34.pyc
        util.cpython-34.pyc
        __init__.cpython-34.pyc
        senna.pyc
        weka.py
        naivebayes.pyc
        positivenaivebayes.pyc
        util.pyc
        __init__.pyc
        textcat.py
        senna.py
        textcat.pyc
        scikitlearn.pyc
        maxent.py
        tadm.pyc
        maxent.pyc
        naivebayes.py
        api.py
        api.pyc
        rte_classify.pyc
        megam.py
        __init__.py
        tadm.py
        svm.py
        decisiontree.pyc
        megam.pyc
        positivenaivebayes.py
        scikitlearn.py
        data.py
        treetransforms.py
        draw
        util.py
        __pycache__
        dispersion.cpython-34.pyc
        tree.cpython-34.pyc
        cfg.cpython-34.pyc
        table.cpython-34.pyc
        util.cpython-34.pyc
        __init__.cpython-34.pyc
        cfg.pyc
        table.py
        util.pyc
        __init__.pyc
        tree.pyc
        __init__.py
        tree.py
        cfg.py
        dispersion.pyc
        dispersion.py
        table.pyc
        toolbox.py
        probability.pyc
        decorators.py
        tgrep.py
        util.py
        decorators.pyc
        lazyimport.py
        featstruct.py
        book.py
        help.pyc
        text.py
        __pycache__
        featstruct.cpython-34.pyc
        decorators.cpython-34.pyc
        grammar.cpython-34.pyc
        tree.cpython-34.pyc
        probability.cpython-34.pyc
        help.cpython-34.pyc
        text.cpython-34.pyc
        compat.cpython-34.pyc
        internals.cpython-34.pyc
        jsontags.cpython-34.pyc
        treetransforms.cpython-34.pyc
        data.cpython-34.pyc
        downloader.cpython-34.pyc
        wsd.cpython-34.pyc
        toolbox.cpython-34.pyc
        util.cpython-34.pyc
        six.cpython-34.pyc
        lazyimport.cpython-34.pyc
        __init__.cpython-34.pyc
        collocations.cpython-34.pyc
        internals.py
        jsontags.py
        collocations.py
        wsd.pyc
        twitter
        util.py
        twitterclient.pyc
        util.pyc
        __init__.pyc
        twitter_demo.py
        api.py
        api.pyc
        twitterclient.py
        common.py
        __init__.py
        common.pyc
        twitter_demo.pyc
        six.py
        translate
        ibm5.pyc
        gdfa.py
        ibm_model.pyc
        gdfa.pyc
        ibm5.py
        stack_decoder.py
        __pycache__
        ibm1.cpython-34.pyc
        ibm_model.cpython-34.pyc
        ibm4.cpython-34.pyc
        ibm5.cpython-34.pyc
        ibm2.cpython-34.pyc
        bleu_score.cpython-34.pyc
        metrics.cpython-34.pyc
        api.cpython-34.pyc
        stack_decoder.cpython-34.pyc
        ibm3.cpython-34.pyc
        __init__.cpython-34.pyc
        ibm1.pyc
        ibm3.py
        phrase_based.pyc
        metrics.py
        ibm_model.py
        ibm2.py
        ibm3.pyc
        gale_church.pyc
        ibm1.py
        __init__.pyc
        ibm4.py
        api.py
        api.pyc
        ibm2.pyc
        gale_church.py
        __init__.py
        metrics.pyc
        bleu_score.pyc
        phrase_based.py
        ibm4.pyc
        stack_decoder.pyc
        bleu_score.py
        corpus
        util.py
        __pycache__
        util.cpython-34.pyc
        __init__.cpython-34.pyc
        util.pyc
        __init__.pyc
        europarl_raw.py
        europarl_raw.pyc
        __init__.py
        reader
        wordlist.py
        toolbox.py
        opinion_lexicon.py
        ieer.py
        tagged.pyc
        chasen.py
        util.py
        indian.pyc
        lin.pyc
        cmudict.pyc
        ppattach.py
        timit.py
        nps_chat.py
        pl196x.pyc
        verbnet.py
        udhr.pyc
        ycoe.py
        nps_chat.pyc
        __pycache__
        conll.cpython-34.pyc
        xmldocs.cpython-34.pyc
        semcor.cpython-34.pyc
        timit.cpython-34.pyc
        verbnet.cpython-34.pyc
        sinica_treebank.cpython-34.pyc
        plaintext.cpython-34.pyc
        senseval.cpython-34.pyc
        nps_chat.cpython-34.pyc
        sentiwordnet.cpython-34.pyc
        nkjp.cpython-34.pyc
        rte.cpython-34.pyc
        comparative_sents.cpython-34.pyc
        bracket_parse.cpython-34.pyc
        categorized_sents.cpython-34.pyc
        childes.cpython-34.pyc
        twitter.cpython-34.pyc
        cmudict.cpython-34.pyc
        knbc.cpython-34.pyc
        chunked.cpython-34.pyc
        ieer.cpython-34.pyc
        opinion_lexicon.cpython-34.pyc
        mte.cpython-34.pyc
        propbank.cpython-34.pyc
        crubadan.cpython-34.pyc
        pl196x.cpython-34.pyc
        wordnet.cpython-34.pyc
        ppattach.cpython-34.pyc
        ycoe.cpython-34.pyc
        nombank.cpython-34.pyc
        udhr.cpython-34.pyc
        indian.cpython-34.pyc
        switchboard.cpython-34.pyc
        tagged.cpython-34.pyc
        chasen.cpython-34.pyc
        string_category.cpython-34.pyc
        ipipan.cpython-34.pyc
        wordlist.cpython-34.pyc
        bnc.cpython-34.pyc
        toolbox.cpython-34.pyc
        pros_cons.cpython-34.pyc
        aligned.cpython-34.pyc
        api.cpython-34.pyc
        util.cpython-34.pyc
        reviews.cpython-34.pyc
        lin.cpython-34.pyc
        framenet.cpython-34.pyc
        __init__.cpython-34.pyc
        dependency.cpython-34.pyc
        string_category.py
        plaintext.pyc
        lin.py
        comparative_sents.pyc
        ieer.pyc
        bnc.py
        semcor.py
        framenet.py
        rte.pyc
        xmldocs.py
        switchboard.pyc
        ipipan.py
        indian.py
        toolbox.pyc
        mte.py
        util.pyc
        propbank.pyc
        __init__.pyc
        wordnet.pyc
        wordlist.pyc
        sinica_treebank.pyc
        chunked.py
        rte.py
        dependency.pyc
        mte.pyc
        switchboard.py
        conll.py
        bracket_parse.pyc
        bnc.pyc
        categorized_sents.py
        nombank.py
        childes.py
        senseval.py
        crubadan.pyc
        bracket_parse.py
        api.py
        reviews.py
        knbc.py
        plaintext.py
        api.pyc
        timit.pyc
        string_category.pyc
        nkjp.pyc
        cmudict.py
        twitter.py
        opinion_lexicon.pyc
        crubadan.py
        __init__.py
        chasen.pyc
        verbnet.pyc
        knbc.pyc
        nombank.pyc
        ppattach.pyc
        sentiwordnet.pyc
        conll.pyc
        pros_cons.py
        sinica_treebank.py
        pros_cons.pyc
        comparative_sents.py
        nkjp.py
        aligned.py
        ipipan.pyc
        senseval.pyc
        sentiwordnet.py
        childes.pyc
        semcor.pyc
        propbank.py
        reviews.pyc
        framenet.pyc
        aligned.pyc
        udhr.py
        dependency.py
        pl196x.py
        tagged.py
        chunked.pyc
        twitter.pyc
        ycoe.pyc
        xmldocs.pyc
        wordnet.py
        categorized_sents.pyc
        chat
        zen.py
        zen.pyc
        util.py
        suntsu.py
        eliza.py
        rude.py
        util.pyc
        __init__.pyc
        suntsu.pyc
        iesha.py
        iesha.pyc
        rude.pyc
        __init__.py
        eliza.pyc
        tag
        crf.py
        perceptron.py
        brill_trainer.py
        hmm.pyc
        hunpos.pyc
        hmm.py
        util.py
        sequential.py
        __pycache__
        stanford.cpython-34.pyc
        senna.cpython-34.pyc
        crf.cpython-34.pyc
        perceptron.cpython-34.pyc
        brill.cpython-34.pyc
        brill_trainer.cpython-34.pyc
        tnt.cpython-34.pyc
        mapping.cpython-34.pyc
        sequential.cpython-34.pyc
        api.cpython-34.pyc
        util.cpython-34.pyc
        hunpos.cpython-34.pyc
        hmm.cpython-34.pyc
        __init__.cpython-34.pyc
        mapping.py
        senna.pyc
        crf.pyc
        util.pyc
        __init__.pyc
        stanford.pyc
        senna.py
        mapping.pyc
        api.py
        api.pyc
        brill.py
        tnt.py
        __init__.py
        hunpos.py
        tnt.pyc
        stanford.py
        sequential.pyc
        brill_trainer.pyc
        perceptron.pyc
        brill.pyc
        test
        childes_fixt.py
        gluesemantics_malt_fixt.pyc
        parse.doctest
        util.doctest
        compat_fixt.pyc
        childes_fixt.pyc
        classify_fixt.pyc
        bleu.doctest
        semantics_fixt.py
        dependency.doctest
        tree.doctest
        sentiwordnet.doctest
        nonmonotonic_fixt.pyc
        wordnet_fixt.py
        gluesemantics_malt.doctest
        discourse.doctest
        portuguese_en.doctest
        generate.doctest
        grammartestsuites.doctest
        translate.doctest
        resolution.doctest
        stem.doctest
        probability.doctest
        wsd.doctest
        paice.doctest
        ccg.doctest
        childes.doctest
        inference.doctest
        tag.doctest
        metrics.doctest
        gluesemantics_malt_fixt.py
        collocations.doctest
        index.doctest
        bnc.doctest
        segmentation_fixt.pyc
        semantics.doctest
        portuguese_en_fixt.pyc
        nonmonotonic.doctest
        __init__.pyc
        toolbox.doctest
        tokenize.doctest
        wordnet_fixt.pyc
        featgram.doctest
        sentiment.doctest
        grammar.doctest
        portuguese_en_fixt.py
        classify_fixt.py
        chunk.doctest
        compat_fixt.py
        treetransforms.doctest
        segmentation_fixt.py
        gensim_fixt.py
        drt.doctest
        japanese.doctest
        framenet.doctest
        gensim.doctest
        crubadan.doctest
        discourse_fixt.pyc
        inference_fixt.pyc
        translate_fixt.pyc
        runtests.py
        doctest_nose_plugin.py
        discourse_fixt.py
        compat.doctest
        __init__.py
        chat80.doctest
        probability_fixt.py
        propbank.doctest
        corpus.doctest
        translate_fixt.py
        semantics_fixt.pyc
        relextract.doctest
        wordnet.doctest
        runtests.pyc
        probability_fixt.pyc
        corpus_fixt.pyc
        gensim_fixt.pyc
        inference_fixt.py
        wordnet_lch.doctest
        corpus_fixt.py
        nonmonotonic_fixt.py
        unit
        test_tgrep.pyc
        test_seekable_unicode_stream_reader.py
        test_json2csv_corpus.py
        test_hmm.pyc
        test_collocations.pyc
        test_stem.py
        test_hmm.py
        test_corpus_views.pyc
        translate
        test_ibm1.pyc
        test_ibm4.pyc
        test_bleu.py
        test_ibm5.pyc
        test_ibm2.pyc
        test_ibm3.py
        __init__.pyc
        test_ibm4.py
        test_stack_decoder.pyc
        test_ibm3.pyc
        test_ibm1.py
        test_ibm_model.py
        test_stack_decoder.py
        test_bleu.pyc
        test_ibm5.py
        __init__.py
        test_ibm_model.pyc
        test_ibm2.py
        test_seekable_unicode_stream_reader.pyc
        test_tag.pyc
        test_stem.pyc
        test_twitter_auth.pyc
        __init__.pyc
        test_tgrep.py
        test_classify.py
        test_2x_compat.py
        test_classify.pyc
        test_naivebayes.pyc
        __init__.py
        utils.py
        test_naivebayes.py
        test_corpus_views.py
        test_corpora.pyc
        test_2x_compat.pyc
        test_twitter_auth.py
        utils.pyc
        test_tag.py
        test_corpora.py
        test_json2csv_corpus.pyc
        test_collocations.py
        all.py
        data.doctest
        logic.doctest
        treeprettyprinter.doctest
        simple.doctest
        classify.doctest
        all.pyc
        featstruct.doctest
        internals.doctest
        misc.doctest
        gluesemantics.doctest
        doctest_nose_plugin.pyc
        probability.py
        stem
        snowball.py
        regexp.py
        util.py
        __pycache__
        lancaster.cpython-34.pyc
        snowball.cpython-34.pyc
        rslp.cpython-34.pyc
        porter.cpython-34.pyc
        regexp.cpython-34.pyc
        wordnet.cpython-34.pyc
        isri.cpython-34.pyc
        api.cpython-34.pyc
        util.cpython-34.pyc
        __init__.cpython-34.pyc
        rslp.py
        lancaster.pyc
        util.pyc
        __init__.pyc
        wordnet.pyc
        snowball.pyc
        porter.pyc
        isri.pyc
        api.py
        api.pyc
        rslp.pyc
        regexp.pyc
        __init__.py
        lancaster.py
        isri.py
        porter.py
        wordnet.py
        parse
        projectivedependencyparser.pyc
        earleychart.py
        util.py
        viterbi.py
        viterbi.pyc
        shiftreduce.py
        pchart.py
        recursivedescent.pyc
        __pycache__
        shiftreduce.cpython-34.pyc
        dependencygraph.cpython-34.pyc
        pchart.cpython-34.pyc
        transitionparser.cpython-34.pyc
        earleychart.cpython-34.pyc
        bllip.cpython-34.pyc
        projectivedependencyparser.cpython-34.pyc
        viterbi.cpython-34.pyc
        malt.cpython-34.pyc
        nonprojectivedependencyparser.cpython-34.pyc
        recursivedescent.cpython-34.pyc
        chart.cpython-34.pyc
        featurechart.cpython-34.pyc
        api.cpython-34.pyc
        util.cpython-34.pyc
        evaluate.cpython-34.pyc
        __init__.cpython-34.pyc
        evaluate.py
        dependencygraph.py
        nonprojectivedependencyparser.py
        nonprojectivedependencyparser.pyc
        bllip.py
        transitionparser.py
        chart.pyc
        evaluate.pyc
        generate.pyc
        earleychart.pyc
        featurechart.py
        util.pyc
        recursivedescent.py
        __init__.pyc
        projectivedependencyparser.py
        stanford.pyc
        shiftreduce.pyc
        generate.py
        chart.py
        api.py
        transitionparser.pyc
        api.pyc
        bllip.pyc
        dependencygraph.pyc
        __init__.py
        malt.py
        stanford.py
        pchart.pyc
        featurechart.pyc
        malt.pyc
        six.pyc
        toolbox.pyc
        treeprettyprinter.py
        util.pyc
        __init__.pyc
        tree.pyc
        misc
        minimalset.py
        wordfinder.py
        __pycache__
        minimalset.cpython-34.pyc
        chomsky.cpython-34.pyc
        babelfish.cpython-34.pyc
        wordfinder.cpython-34.pyc
        __init__.cpython-34.pyc
        chomsky.pyc
        sort.pyc
        babelfish.py
        minimalset.pyc
        babelfish.pyc
        __init__.pyc
        chomsky.py
        __init__.py
        wordfinder.pyc
        sort.py
        data.pyc
        featstruct.pyc
        treetransforms.pyc
        sem
        linearlogic.pyc
        drt_glue_demo.py
        util.py
        glue.py
        cooper_storage.py
        __pycache__
        glue.cpython-34.pyc
        linearlogic.cpython-34.pyc
        boxer.cpython-34.pyc
        lfg.cpython-34.pyc
        drt.cpython-34.pyc
        relextract.cpython-34.pyc
        skolemize.cpython-34.pyc
        logic.cpython-34.pyc
        util.cpython-34.pyc
        evaluate.cpython-34.pyc
        __init__.cpython-34.pyc
        evaluate.py
        glue.pyc
        logic.py
        chat80.py
        logic.pyc
        relextract.py
        linearlogic.py
        evaluate.pyc
        hole.py
        hole.pyc
        drt_glue_demo.pyc
        util.pyc
        boxer.pyc
        __init__.pyc
        lfg.py
        lfg.pyc
        skolemize.py
        __init__.py
        cooper_storage.pyc
        drt.py
        drt.pyc
        skolemize.pyc
        boxer.py
        chat80.pyc
        relextract.pyc
        downloader.py
        grammar.pyc
        jsontags.pyc
        text.pyc
        tbl
        rule.pyc
        erroranalysis.py
        erroranalysis.pyc
        __pycache__
        template.cpython-34.pyc
        feature.cpython-34.pyc
        erroranalysis.cpython-34.pyc
        rule.cpython-34.pyc
        __init__.cpython-34.pyc
        template.pyc
        demo.py
        __init__.pyc
        demo.pyc
        template.py
        rule.py
        api.py
        api.pyc
        feature.pyc
        __init__.py
        feature.py
        sentiment
        sentiment_analyzer.pyc
        util.py
        util.pyc
        sentiment_analyzer.py
        __init__.pyc
        vader.pyc
        __init__.py
        vader.py
        internals.pyc
        collocations.pyc
        lazyimport.pyc
        grammar.py
        VERSION
        tgrep.pyc
        __init__.py
        wsd.py
        chunk
        regexp.py
        util.py
        named_entity.pyc
        __pycache__
        regexp.cpython-34.pyc
        api.cpython-34.pyc
        util.cpython-34.pyc
        __init__.cpython-34.pyc
        named_entity.py
        util.pyc
        __init__.pyc
        api.py
        api.pyc
        regexp.pyc
        __init__.py
        tokenize
        punkt.py
        regexp.py
        util.py
        simple.pyc
        simple.py
        punkt.pyc
        treebank.pyc
        sexpr.pyc
        __pycache__
        stanford.cpython-34.pyc
        mwe.cpython-34.pyc
        simple.cpython-34.pyc
        texttiling.cpython-34.pyc
        regexp.cpython-34.pyc
        treebank.cpython-34.pyc
        punkt.cpython-34.pyc
        sexpr.cpython-34.pyc
        casual.cpython-34.pyc
        api.cpython-34.pyc
        util.cpython-34.pyc
        __init__.cpython-34.pyc
        util.pyc
        __init__.pyc
        stanford.pyc
        mwe.pyc
        treebank.py
        api.py
        api.pyc
        regexp.pyc
        texttiling.py
        __init__.py
        mwe.py
        stanford.py
        texttiling.pyc
        casual.py
        casual.pyc
        sexpr.py
        help.py
        tree.py
        compat.pyc
        book.pyc
        inference
        resolution.pyc
        nonmonotonic.py
        __pycache__
        tableau.cpython-34.pyc
        prover9.cpython-34.pyc
        resolution.cpython-34.pyc
        mace.cpython-34.pyc
        discourse.cpython-34.pyc
        api.cpython-34.pyc
        __init__.cpython-34.pyc
        nonmonotonic.pyc
        tableau.pyc
        prover9.pyc
        discourse.pyc
        __init__.pyc
        api.py
        api.pyc
        mace.pyc
        mace.py
        __init__.py
        discourse.py
        prover9.py
        resolution.py
        tableau.py
        app
        rdparser_app.py
        concordance_app.pyc
        chunkparser_app.pyc
        collocations_app.pyc
        rdparser_app.pyc
        wordnet_app.py
        chartparser_app.pyc
        srparser_app.py
        nemo_app.py
        chunkparser_app.py
        __init__.pyc
        wordfreq_app.pyc
        nemo_app.pyc
        wordfreq_app.py
        chartparser_app.py
        srparser_app.pyc
        __init__.py
        concordance_app.py
        wordnet_app.pyc
        collocations_app.py
        metrics
        distance.pyc
        spearman.py
        __pycache__
        scores.cpython-34.pyc
        spearman.cpython-34.pyc
        paice.cpython-34.pyc
        confusionmatrix.cpython-34.pyc
        segmentation.cpython-34.pyc
        association.cpython-34.pyc
        distance.cpython-34.pyc
        agreement.cpython-34.pyc
        __init__.cpython-34.pyc
        scores.py
        agreement.pyc
        spearman.pyc
        paice.pyc
        agreement.py
        association.py
        __init__.pyc
        scores.pyc
        confusionmatrix.pyc
        segmentation.pyc
        association.pyc
        __init__.py
        segmentation.py
        distance.py
        paice.py
        confusionmatrix.py
        compat.py
        cluster
        util.py
        em.pyc
        gaac.py
        util.pyc
        __init__.pyc
        gaac.pyc
        em.py
        api.py
        api.pyc
        __init__.py
        kmeans.py
        kmeans.pyc
        downloader.pyc
        treeprettyprinter.pyc
        ccg
        __pycache__
        lexicon.cpython-34.pyc
        chart.cpython-34.pyc
        api.cpython-34.pyc
        combinator.cpython-34.pyc
        __init__.cpython-34.pyc
        chart.pyc
        combinator.py
        __init__.pyc
        chart.py
        lexicon.py
        api.py
        api.pyc
        __init__.py
        combinator.pyc
        lexicon.pyc
      - nltk_core.py
      - numeric_to_spoken.py
      - lemmatize.py

# Natural Language Toolkit: TextTiling
#
# Copyright (C) 2001-2015 NLTK Project
# Author: George Boutsioukis
#
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT

import re
import math

try:
    import numpy
except ImportError:
    pass

from nltk.tokenize.api import TokenizerI

BLOCK_COMPARISON, VOCABULARY_INTRODUCTION = 0, 1
LC, HC = 0, 1
DEFAULT_SMOOTHING = [0]


class TextTilingTokenizer(TokenizerI):
    """Tokenize a document into topical sections using the TextTiling algorithm.
    This algorithm detects subtopic shifts based on the analysis of lexical
    co-occurrence patterns.

    The process starts by tokenizing the text into pseudosentences of
    a fixed size w. Then, depending on the method used, similarity
    scores are assigned at sentence gaps. The algorithm proceeds by
    detecting the peak differences between these scores and marking
    them as boundaries. The boundaries are normalized to the closest
    paragraph break and the segmented text is returned.

    :param w: Pseudosentence size
    :type w: int
    :param k: Size (in sentences) of the block used in the block comparison method
    :type k: int
    :param similarity_method: The method used for determining similarity scores:
       `BLOCK_COMPARISON` (default) or `VOCABULARY_INTRODUCTION`.
    :type similarity_method: constant
    :param stopwords: A list of stopwords that are filtered out (defaults to NLTK's stopwords corpus)
    :type stopwords: list(str)
    :param smoothing_method: The method used for smoothing the score plot:
      `DEFAULT_SMOOTHING` (default)
    :type smoothing_method: constant
    :param smoothing_width: The width of the window used by the smoothing method
    :type smoothing_width: int
    :param smoothing_rounds: The number of smoothing passes
    :type smoothing_rounds: int
    :param cutoff_policy: The policy used to determine the number of boundaries:
      `HC` (default) or `LC`
    :type cutoff_policy: constant

    >>> from nltk.corpus import brown
    >>> tt = TextTilingTokenizer(demo_mode=True)
    >>> text = brown.raw()[:10000]
    >>> s, ss, d, b = tt.tokenize(text)
    >>> b
    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
     0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
     0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]
    """

    def __init__(self,
                 w=20,
                 k=10,
                 similarity_method=BLOCK_COMPARISON,
                 stopwords=None,
                 smoothing_method=DEFAULT_SMOOTHING,
                 smoothing_width=2,
                 smoothing_rounds=1,
                 cutoff_policy=HC,
                 demo_mode=False):


        if stopwords is None:
            from nltk.corpus import stopwords
            stopwords = stopwords.words('english')
        self.__dict__.update(locals())
        del self.__dict__['self']

    def tokenize(self, text):
        """Return a tokenized copy of *text*, where each "token" represents
        a separate topic."""

        lowercase_text = text.lower()
        paragraph_breaks = self._mark_paragraph_breaks(text)
        text_length = len(lowercase_text)

        # Tokenization step starts here

        # Remove punctuation
        nopunct_text = ''.join(c for c in lowercase_text
                               if re.match("[a-z\-\' \n\t]", c))
        nopunct_par_breaks = self._mark_paragraph_breaks(nopunct_text)

        tokseqs = self._divide_to_tokensequences(nopunct_text)

        # The morphological stemming step mentioned in the TextTile
        # paper is not implemented.  A comment in the original C
        # implementation states that it offers no benefit to the
        # process. It might be interesting to test the existing
        # stemmers though.
        #words = _stem_words(words)

        # Filter stopwords
        for ts in tokseqs:
            ts.wrdindex_list = [wi for wi in ts.wrdindex_list
                                if wi[0] not in self.stopwords]

        token_table = self._create_token_table(tokseqs, nopunct_par_breaks)
        # End of the Tokenization step

        # Lexical score determination
        if self.similarity_method == BLOCK_COMPARISON:
            gap_scores = self._block_comparison(tokseqs, token_table)
        elif self.similarity_method == VOCABULARY_INTRODUCTION:
            raise NotImplementedError("Vocabulary introduction not implemented")

        if self.smoothing_method == DEFAULT_SMOOTHING:
            smooth_scores = self._smooth_scores(gap_scores)
        # End of Lexical score Determination

        # Boundary identification
        depth_scores = self._depth_scores(smooth_scores)
        segment_boundaries = self._identify_boundaries(depth_scores)

        normalized_boundaries = self._normalize_boundaries(text,
                                                           segment_boundaries,
                                                           paragraph_breaks)
        # End of Boundary Identification
        segmented_text = []
        prevb = 0

        for b in normalized_boundaries:
            if b == 0:
                continue
            segmented_text.append(text[prevb:b])
            prevb = b

        if prevb < text_length: # append any text that may be remaining
            segmented_text.append(text[prevb:])

        if not segmented_text:
            segmented_text = [text]

        if self.demo_mode:
            return gap_scores, smooth_scores, depth_scores, segment_boundaries
        return segmented_text

    def _block_comparison(self, tokseqs, token_table):
        "Implements the block comparison method"
        def blk_frq(tok, block):
            ts_occs = filter(lambda o: o[0] in block,
                             token_table[tok].ts_occurences)
            freq = sum([tsocc[1] for tsocc in ts_occs])
            return freq

        gap_scores = []
        numgaps = len(tokseqs)-1

        for curr_gap in range(numgaps):
            score_dividend, score_divisor_b1, score_divisor_b2 = 0.0, 0.0, 0.0
            score = 0.0
            #adjust window size for boundary conditions
            if curr_gap < self.k-1:
                window_size = curr_gap + 1
            elif curr_gap > numgaps-self.k:
                window_size = numgaps - curr_gap
            else:
                window_size = self.k

            b1 = [ts.index
                  for ts in tokseqs[curr_gap-window_size+1 : curr_gap+1]]
            b2 = [ts.index
                  for ts in tokseqs[curr_gap+1 : curr_gap+window_size+1]]

            for t in token_table:
                score_dividend += blk_frq(t, b1)*blk_frq(t, b2)
                score_divisor_b1 += blk_frq(t, b1)**2
                score_divisor_b2 += blk_frq(t, b2)**2
            try:
                score = score_dividend/math.sqrt(score_divisor_b1*
                                                 score_divisor_b2)
            except ZeroDivisionError:
                pass # score += 0.0

            gap_scores.append(score)

        return gap_scores

    def _smooth_scores(self, gap_scores):
        "Wraps the smooth function from the SciPy Cookbook"
        return list(smooth(numpy.array(gap_scores[:]),
                           window_len = self.smoothing_width+1))

    def _mark_paragraph_breaks(self, text):
        """Identifies indented text or line breaks as the beginning of
        paragraphs"""

        MIN_PARAGRAPH = 100
        pattern = re.compile("[ \t\r\f\v]*\n[ \t\r\f\v]*\n[ \t\r\f\v]*")
        matches = pattern.finditer(text)

        last_break = 0
        pbreaks = [0]
        for pb in matches:
            if pb.start()-last_break < MIN_PARAGRAPH:
                continue
            else:
                pbreaks.append(pb.start())
                last_break = pb.start()

        return pbreaks

    def _divide_to_tokensequences(self, text):
        "Divides the text into pseudosentences of fixed size"
        w = self.w
        wrdindex_list = []
        matches = re.finditer("\w+", text)
        for match in matches:
            wrdindex_list.append((match.group(), match.start()))
        return [TokenSequence(i/w, wrdindex_list[i:i+w])
                for i in range(0, len(wrdindex_list), w)]

    def _create_token_table(self, token_sequences, par_breaks):
        "Creates a table of TokenTableFields"
        token_table = {}
        current_par = 0
        current_tok_seq = 0
        pb_iter = par_breaks.__iter__()
        current_par_break = next(pb_iter)
        if current_par_break == 0:
            try:
                current_par_break = next(pb_iter) #skip break at 0
            except StopIteration:
                raise ValueError(
                    "No paragraph breaks were found(text too short perhaps?)"
                    )
        for ts in token_sequences:
            for word, index in ts.wrdindex_list:
                try:
                    while index > current_par_break:
                        current_par_break = next(pb_iter)
                        current_par += 1
                except StopIteration:
                    #hit bottom
                    pass

                if word in token_table:
                    token_table[word].total_count += 1

                    if token_table[word].last_par != current_par:
                        token_table[word].last_par = current_par
                        token_table[word].par_count += 1

                    if token_table[word].last_tok_seq != current_tok_seq:
                        token_table[word].last_tok_seq = current_tok_seq
                        token_table[word]\
                                .ts_occurences.append([current_tok_seq,1])
                    else:
                        token_table[word].ts_occurences[-1][1] += 1
                else: #new word
                    token_table[word] = TokenTableField(first_pos=index,
                                                        ts_occurences= \
                                                          [[current_tok_seq,1]],
                                                        total_count=1,
                                                        par_count=1,
                                                        last_par=current_par,
                                                        last_tok_seq= \
                                                          current_tok_seq)

            current_tok_seq += 1

        return token_table

    def _identify_boundaries(self, depth_scores):
        """Identifies boundaries at the peaks of similarity score
        differences"""

        boundaries = [0 for x in depth_scores]

        avg = sum(depth_scores)/len(depth_scores)
        stdev = numpy.std(depth_scores)

        #SB: what is the purpose of this conditional?
        if self.cutoff_policy == LC:
            cutoff = avg-stdev/2.0
        else:
            cutoff = avg-stdev/2.0

        depth_tuples = sorted(zip(depth_scores, range(len(depth_scores))))
        depth_tuples.reverse()
        hp = list(filter(lambda x:x[0]>cutoff, depth_tuples))

        for dt in hp:
            boundaries[dt[1]] = 1
            for dt2 in hp: #undo if there is a boundary close already
                if dt[1] != dt2[1] and abs(dt2[1]-dt[1]) < 4 \
                       and boundaries[dt2[1]] == 1:
                    boundaries[dt[1]] = 0
        return boundaries

    def _depth_scores(self, scores):
        """Calculates the depth of each gap, i.e. the average difference
        between the left and right peaks and the gap's score"""

        depth_scores = [0 for x in scores]
        #clip boundaries: this holds on the rule of thumb(my thumb)
        #that a section shouldn't be smaller than at least 2
        #pseudosentences for small texts and around 5 for larger ones.

        clip = min(max(len(scores)/10, 2), 5)
        index = clip

        for gapscore in scores[clip:-clip]:
            lpeak = gapscore
            for score in scores[index::-1]:
                if score >= lpeak:
                    lpeak = score
                else:
                    break
            rpeak = gapscore
            for score in scores[index:]:
                if score >= rpeak:
                    rpeak = score
                else:
                    break
            depth_scores[index] = lpeak + rpeak - 2 * gapscore
            index += 1

        return depth_scores

    def _normalize_boundaries(self, text, boundaries, paragraph_breaks):
        """Normalize the boundaries identified to the original text's
        paragraph breaks"""

        norm_boundaries = []
        char_count, word_count, gaps_seen = 0, 0, 0
        seen_word = False

        for char in text:
            char_count += 1
            if char in " \t\n" and seen_word:
                seen_word = False
                word_count += 1
            if char not in " \t\n" and not seen_word:
                seen_word=True
            if gaps_seen < len(boundaries) and word_count > \
                                               (max(gaps_seen*self.w, self.w)):
                if boundaries[gaps_seen] == 1:
                    #find closest paragraph break
                    best_fit = len(text)
                    for br in paragraph_breaks:
                        if best_fit > abs(br-char_count):
                            best_fit = abs(br-char_count)
                            bestbr = br
                        else:
                            break
                    if bestbr not in norm_boundaries: #avoid duplicates
                        norm_boundaries.append(bestbr)
                gaps_seen += 1

        return norm_boundaries


class TokenTableField(object):
    """A field in the token table holding parameters for each token,
    used later in the process"""
    def __init__(self,
                 first_pos,
                 ts_occurences,
                 total_count=1,
                 par_count=1,
                 last_par=0,
                 last_tok_seq=None):
        self.__dict__.update(locals())
        del self.__dict__['self']

class TokenSequence(object):
    "A token list with its original length and its index"
    def __init__(self,
                 index,
                 wrdindex_list,
                 original_length=None):
        original_length=original_length or len(wrdindex_list)
        self.__dict__.update(locals())
        del self.__dict__['self']

#Pasted from the SciPy cookbook: http://www.scipy.org/Cookbook/SignalSmooth
def smooth(x,window_len=11,window='flat'):
    """smooth the data using a window with requested size.

    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal
    (with the window size) in both ends so that transient parts are minimized
    in the beginning and end part of the output signal.

    :param x: the input signal
    :param window_len: the dimension of the smoothing window; should be an odd integer
    :param window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
        flat window will produce a moving average smoothing.

    :return: the smoothed signal

    example::

        t=linspace(-2,2,0.1)
        x=sin(t)+randn(len(t))*0.1
        y=smooth(x)

    :see also: numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve,
        scipy.signal.lfilter

    TODO: the window parameter could be the window itself if an array instead of a string
    """

    if x.ndim != 1:
        raise ValueError("smooth only accepts 1 dimension arrays.")

    if x.size < window_len:
        raise ValueError("Input vector needs to be bigger than window size.")

    if window_len < 3:
        return x

    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")

    s=numpy.r_[2*x[0]-x[window_len:1:-1],x,2*x[-1]-x[-1:-window_len:-1]]

    #print(len(s))
    if window == 'flat': #moving average
        w = numpy.ones(window_len,'d')
    else:
        w = eval('numpy.' + window + '(window_len)')

    y = numpy.convolve(w/w.sum(), s, mode='same')

    return y[window_len-1:-window_len+1]


def demo(text=None):
    from nltk.corpus import brown
    from matplotlib import pylab
    tt = TextTilingTokenizer(demo_mode=True)
    if text is None: text = brown.raw()[:10000]
    s, ss, d, b = tt.tokenize(text)
    pylab.xlabel("Sentence Gap index")
    pylab.ylabel("Gap Scores")
    pylab.plot(range(len(s)), s, label="Gap Scores")
    pylab.plot(range(len(ss)), ss, label="Smoothed Gap scores")
    pylab.plot(range(len(d)), d, label="Depth scores")
    pylab.stem(range(len(b)), b)
    pylab.legend()
    pylab.show()