python source code of wnli

fine-lm-master
- src
  - tensor2tensor
    - AUTHORS
    - tensor2tensor
      - serving
        export.py
        query.py
        __init__.py
        README.md
        serving_utils.py
      - utils
        video2gif.py
        avg_checkpoints.py
        expert_utils_test.py
        data_reader_test.py
        quantization.py
        trainer_lib_test.py
        modality.py
        cloud_tpu.py
        usr_dir.py
        get_rouge.py
        metrics_test.py
        learning_rate.py
        devices.py
        yellowfin_test.py
        metrics.py
        cloud_mlengine.py
        decoding.py
        adv_attack_utils.py
        rouge.py
        video_metrics.py
        diet_test.py
        flags.py
        adafactor.py
        get_ende_bleu.sh
        diet.py
        data_reader.py
        rouge_test.py
        registry.py
        restore_hook.py
        checkpoint_compatibility_test.py
        t2t_model.py
        yellowfin.py
        __init__.py
        multistep_optimizer_test.py
        registry_test.py
        bleu_hook_test.py
        optimize.py
        expert_utils.py
        compute_video_metrics.py
        metrics_hook_test.py
        beam_search.py
        beam_search_test.py
        trainer_lib.py
        get_cnndm_rouge.sh
        metrics_hook.py
        multistep_optimizer.py
        bleu_hook.py
      - layers
        discretization.py
        latent_layers.py
        discretization_test.py
        rev_block_test.py
        common_hparams.py
        modalities_test.py
        common_image_attention.py
        rev_block.py
        common_message_passing_attention.py
        common_layers.py
        common_attention.py
        common_attention_test.py
        common_image_attention_test.py
        __init__.py
        modalities.py
        common_layers_test.py
      - models
        revnet_test.py
        slicenet.py
        distillation.py
        transformer.py
        basic_test.py
        slicenet_test.py
        neural_gpu.py
        basic.py
        bytenet_test.py
        image_transformer_2d.py
        xception.py
        bytenet.py
        image_transformer_2d_test.py
        resnet.py
        lstm_test.py
        image_transformer.py
        lstm.py
        xception_test.py
        __init__.py
        README.md
        shake_shake.py
        transformer_test.py
        resnet_test.py
        research
        universal_transformer_util.py
        rl.py
        autoencoders.py
        transformer_moe.py
        cycle_gan.py
        autoencoders_test.py
        gene_expression.py
        multimodel.py
        attention_lm_moe.py
        transformer_vae_test.py
        gene_expression_test.py
        transformer_revnet.py
        adafactor_experiments.py
        transformer_nat.py
        transformer_revnet_test.py
        transformer_vae.py
        attention_lm.py
        universal_transformer_test.py
        next_frame.py
        lm_experiments.py
        transformer_symshard.py
        super_lm.py
        transformer_sketch.py
        multimodel_test.py
        __init__.py
        universal_transformer.py
        next_frame_test.py
        aligned.py
        neural_gpu_test.py
        revnet.py
        image_transformer_test.py
        vanilla_gan.py
      - rl
        model_rl_experiment_test.py
        rl_trainer_lib_test.py
        ppo.py
        model_rl_experiment_stochastic_test.py
        t2t_rl_trainer.py
        model_rl_experiment.py
        rl_trainer_lib.py
        __init__.py
        README.md
        collect.py
        envs
        py_func_batch_env.py
        batch_env_factory.py
        tf_atari_wrappers.py
        in_graph_batch_env.py
        batch_env.py
        __init__.py
        utils.py
        simulated_batch_env.py
      - test_data
        example_usr_dir
        my_submodule.py
        __init__.py
        requirements.txt
        vocab.ende.8192
        transformer_test_ckpt
        flags.txt
        model.ckpt-1.index
        hparams.json
        checkpoint
        model.ckpt-1.data-00000-of-00002
      - notebooks
        hello_t2t-rl.ipynb
        asr_transformer.ipynb
      - data_generators
        translate_test.py
        translate_enmk.py
        snli.py
        translate_enzh.py
        gym_utils.py
        twentybn.py
        mscoco_test.py
        speech_recognition.py
        audio_test.py
        gym_problems.py
        imdb.py
        gym_problems_specs.py
        timeseries_data_generator.py
        desc2code.py
        wsj_parsing.py
        tokenizer.py
        cipher.py
        gene_expression.py
        wikitext103.py
        mnist.py
        celeba.py
        librispeech.py
        translate_ende.py
        timeseries.py
        gene_expression_test.py
        ptb.py
        problem_hparams.py
        dna_encoder_test.py
        translate_enid.py
        text_encoder_test.py
        lm1b.py
        generator_utils.py
        dna_encoder.py
        cnn_dailymail.py
        wnli.py
        fsns.py
        mscoco.py
        celeba_test.py
        imagenet_test.py
        babi_qa.py
        desc2code_test.py
        text_encoder_build_subword.py
        algorithmic_test.py
        tokenizer_test.py
        video_generated.py
        program_search.py
        image_utils.py
        audio.py
        ice_parsing.py
        image_lsun.py
        algorithmic_math.py
        style_transfer.py
        test_data
        vocab-1.txt
        vocab-2.txt
        corpus-1.txt
        corpus-2.txt
        multi_problem.py
        algorithmic.py
        rte.py
        video_utils.py
        ocr.py
        problem.py
        subject_verb_agreement.py
        timeseries_data_generator_test.py
        bair_robot_pushing.py
        sst_binary.py
        image_utils_test.py
        gym_problems_test.py
        text_problems.py
        translate_enet.py
        generator_utils_test.py
        gh_function_docstring_encoder.py
        __init__.py
        text_problems_test.py
        common_voice.py
        README.md
        google_robot_pushing.py
        timeseries_test.py
        text_encoder.py
        program_search_test.py
        wikisum
        delete_instances.sh
        validate_data.py
        wikisum.py
        utils_test.py
        parallel_launch.py
        get_references_web_single_group.py
        test_data
        para_good1.txt
        para_bad1.txt
        get_references_commoncrawl.py
        get_references_web.py
        produce_examples.py
        __init__.py
        README.md
        utils.py
        generate_vocab.py
        translate_envi.py
        multinli.py
        all_problems.py
        algorithmic_math_test.py
        translate.py
        lm1b_imdb.py
        inspect_tfrecord.py
        cola.py
        imagenet.py
        cifar.py
        lambada.py
        wiki.py
        qnli.py
        translate_enfr.py
        quora_qpairs.py
        translate_encs.py
        squad.py
      - __init__.py
      - visualization
        visualization_test.py
        attention.py
        TransformerVisualization.ipynb
        attention.js
        __init__.py
        visualization.py
      - bin
        t2t-trainer
        t2t_trainer.py
        t2t_translate_all.py
        t2t-translate-all
        t2t_datagen.py
        t2t-decoder
        t2t-make-tf-configs
        t2t-avg-all
        make_tf_configs.py
        t2t_avg_all.py
        t2t_trainer_test.py
        t2t_distill.py
        t2t_attack.py
        t2t-exporter
        t2t-bleu
        __init__.py
        t2t-insights-server
        t2t_bleu.py
        t2t_decoder.py
        t2t-datagen
        t2t-query-server
      - problems_test.py
      - problems.py
      - insights
        query_processor.py
        graph.py
        insight_configuration.proto
        server.py
        polymer
        explore_view
        explore-view.js
        explore-view.html
        query_card
        query-card.js
        query-card.html
        insights_app
        insights-app.html
        insights-app.js
        language_selector
        language-selector-content.html
        language-selector-content.js
        language-selector.html
        language-selector.js
        attention_visualization
        attention-visualization.js
        attention-visualization.html
        processing_visualization
        processing-visualization.js
        processing-visualization.html
        bower.json
        translation_result
        translation-result.html
        translation-result.js
        common-types.js
        graph_visualization
        graph-visualization.html
        graph-visualization.js
        .bowerrc
        tensor2tensor.html
        index.html
        __init__.py
        README.md
        transformer_model.py
    - LICENSE
    - CONTRIBUTING.md
    - ISSUE_TEMPLATE.md
    - setup.py
    - .travis.yml
    - README.md
    - pylintrc
    - .gitignore
    - docs
      - new_model.md
      - walkthrough.md
      - cloud_mlengine.md
      - index.md
      - overview.md
      - tutorials
        asr_with_transformer.md
      - distributed_training.md
      - new_problem.md
      - cloud_tpu.md
  - README.md
  - scripts
    - opennmt-unmt
      - train.sh
      - train.py
      - inference.py
    - en-lm.sh
    - gcloud-ctpu-startup.sh
- LICENSE
- imgs
- presentations
  - intro
    - xaringan-themer.css
    - libs
      - dt-core
        js
        jquery.dataTables.min.js
        css
        jquery.dataTables.min.css
        jquery.dataTables.extra.css
      - jquery
        jquery.min.js
        LICENSE.txt
      - crosstalk
        js
        crosstalk.min.js.map
        crosstalk.js.map
        crosstalk.min.js
        crosstalk.js
        css
        crosstalk.css
      - datatables-css
        datatables-crosstalk.css
      - datatables-binding
        datatables.js
      - htmlwidgets
        htmlwidgets.js
    - imgs
      - stupendousman.jpeg
    - intro-slides_files
      - figure-html
        cars-1.svg
    - index.html
    - intro-slides.html
    - index.Rmd
  - lit
    - delayed-impact
      - xaringan-themer.css
      - imgs
      - icml.html
      - icml.Rmd
  - progress
    - xaringan-themer.css
    - libs
      - dt-core
        js
        jquery.dataTables.min.js
        css
        jquery.dataTables.min.css
        jquery.dataTables.extra.css
      - jquery
        jquery.min.js
        LICENSE.txt
      - crosstalk
        js
        crosstalk.min.js.map
        crosstalk.js.map
        crosstalk.min.js
        crosstalk.js
        css
        crosstalk.css
      - datatables-css
        datatables-crosstalk.css
      - datatables-binding
        datatables.js
      - htmlwidgets
        htmlwidgets.js
    - imgs
      - stupendousman.jpeg
    - index.html
    - index.Rmd
- README.md
- .gitignore
- docs
  - xaringan-themer.css
  - libs
    - dt-core
      - js
        jquery.dataTables.min.js
      - css
        jquery.dataTables.min.css
        jquery.dataTables.extra.css
    - jquery
      - jquery.min.js
      - LICENSE.txt
    - crosstalk
      - js
        crosstalk.min.js.map
        crosstalk.js.map
        crosstalk.min.js
        crosstalk.js
      - css
        crosstalk.css
    - datatables-css
      - datatables-crosstalk.css
    - datatables-binding
      - datatables.js
    - htmlwidgets
      - htmlwidgets.js
  - imgs
    - stupendousman.jpeg
  - index_files
    - figure-html
  - index.html
  - index.Rmd
- fine-lm.Rproj

# coding=utf-8
# Copyright 2018 The Tensor2Tensor Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data generators for the Winograd NLI dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import zipfile
import six
from tensor2tensor.data_generators import generator_utils
from tensor2tensor.data_generators import problem
from tensor2tensor.data_generators import text_encoder
from tensor2tensor.data_generators import text_problems
from tensor2tensor.utils import registry
import tensorflow as tf

EOS = text_encoder.EOS


@registry.register_problem
class WinogradNLI(text_problems.TextConcat2ClassProblem):
  """Winograd NLI classification problems."""

  # Link to data from GLUE: https://gluebenchmark.com/tasks
  _WNLI_URL = ("https://firebasestorage.googleapis.com/v0/b/"
               "mtl-sentence-representations.appspot.com/o/"
               "data%2FWNLI.zip?alt=media&token=068ad0a0-ded7-"
               "4bd7-99a5-5e00222e0faf")

  @property
  def is_generate_per_split(self):
    return True

  @property
  def dataset_splits(self):
    return [{
        "split": problem.DatasetSplit.TRAIN,
        "shards": 1,
    }, {
        "split": problem.DatasetSplit.EVAL,
        "shards": 1,
    }]

  @property
  def approx_vocab_size(self):
    return 2**13  # 8k vocab suffices for this small dataset.

  @property
  def vocab_filename(self):
    return "vocab.wnli.%d" % self.approx_vocab_size

  @property
  def num_classes(self):
    return 2

  @property
  def concat_token(self):
    return "<EN-PR-HYP>"

  @property
  def concat_id(self):
    if self.vocab_type == text_problems.VocabType.CHARACTER:
      return problem.TaskID.EN_PR_HYP
    return 2

  def class_labels(self, data_dir):
    del data_dir
    # Note this binary classification is different from usual MNLI.
    return ["not_entailment", "entailment"]

  def _maybe_download_corpora(self, tmp_dir):
    wnli_filename = "WNLI.zip"
    wnli_finalpath = os.path.join(tmp_dir, "WNLI")
    if not tf.gfile.Exists(wnli_finalpath):
      zip_filepath = generator_utils.maybe_download(
          tmp_dir, wnli_filename, self._WNLI_URL)
      zip_ref = zipfile.ZipFile(zip_filepath, "r")
      zip_ref.extractall(tmp_dir)
      zip_ref.close()

    return wnli_finalpath

  def example_generator(self, filename):
    for idx, line in enumerate(tf.gfile.Open(filename, "rb")):
      if idx == 0: continue  # skip header
      if six.PY2:
        line = unicode(line.strip(), "utf-8")
      else:
        line = line.strip().decode("utf-8")
      _, s1, s2, l = line.split("\t")
      inputs = [s1, s2]
      yield {
          "inputs": inputs,
          "label": int(l)
      }

  def generate_samples(self, data_dir, tmp_dir, dataset_split):
    wnli_dir = self._maybe_download_corpora(tmp_dir)
    if dataset_split == problem.DatasetSplit.TRAIN:
      filesplit = "train.tsv"
    else:
      filesplit = "dev.tsv"

    filename = os.path.join(wnli_dir, filesplit)
    for example in self.example_generator(filename):
      yield example


@registry.register_problem
class WinogradNLICharacters(WinogradNLI):
  """Winograd NLI classification problems, character level"""

  @property
  def vocab_type(self):
    return text_problems.VocabType.CHARACTER

  @property
  def task_id(self):
    return problem.TaskID.EN_NLI