Python six.PY2 Examples

The following are 30 code examples for showing how to use six.PY2(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module six , or try the search function .

Example 1
Project: BERT-Classification-Tutorial   Author: Socialbird-AILab   File: tokenization.py    License: Apache License 2.0 6 votes vote down vote up
def convert_to_unicode(text):
    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text.decode("utf-8", "ignore")
        elif isinstance(text, unicode):
            return text
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example 2
Project: BERT-Classification-Tutorial   Author: Socialbird-AILab   File: tokenization.py    License: Apache License 2.0 6 votes vote down vote up
def printable_text(text):
    """Returns text encoded in a way suitable for print or `tf.logging`."""

    # These functions want `str` for both Python2 and Python3, but in one case
    # it's a Unicode string and in the other it's a byte string.
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text
        elif isinstance(text, unicode):
            return text.encode("utf-8")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example 3
Project: neuropythy   Author: noahbenson   File: __init__.py    License: GNU Affero General Public License v3.0 6 votes vote down vote up
def reload_neuropythy():
    '''
    reload_neuropythy() reloads all of the modules of neuropythy and returns the reloaded
    neuropythy module. This is similar to reload(neuropythy) except that it reloads all the
    neuropythy submodules prior to reloading neuropythy.

    Example:
      import neuropythy as ny
      # ... some nonsense that breaks the library ...
      ny = ny.reload_neuropythy()
    '''
    import sys, six
    if not six.PY2:
        try:              from importlib import reload
        except Exception: from imp import reload
    for mdl in submodules:
        if mdl in sys.modules:
            sys.modules[mdl] = reload(sys.modules[mdl])
    return reload(sys.modules['neuropythy']) 
Example 4
Project: OpenNRE   Author: thunlp   File: utils.py    License: MIT License 6 votes vote down vote up
def convert_to_unicode(text):
    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text.decode("utf-8", "ignore")
        elif isinstance(text, unicode):
            return text
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example 5
Project: OpenNRE   Author: thunlp   File: utils.py    License: MIT License 6 votes vote down vote up
def printable_text(text):
    """    Returns text encoded in a way suitable for print or `tf.logging`.
        These functions want `str` for both Python2 and Python3, but in one case
        it's a Unicode string and in the other it's a byte string.
    """
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text
        elif isinstance(text, unicode):
            return text.encode("utf-8")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example 6
Project: fine-lm   Author: akzaidi   File: quora_qpairs.py    License: MIT License 6 votes vote down vote up
def example_generator(self, filename):
    skipped = 0
    for idx, line in enumerate(tf.gfile.Open(filename, "rb")):
      if idx == 0: continue  # skip header
      if six.PY2:
        line = unicode(line.strip(), "utf-8")
      else:
        line = line.strip().decode("utf-8")
      split_line = line.split("\t")
      if len(split_line) < 6:
        skipped += 1
        tf.logging.info("Skipping %d" % skipped)
        continue
      s1, s2, l = split_line[3:]
      # A neat data augmentation trick from Radford et al. (2018)
      # https://blog.openai.com/language-unsupervised/
      inputs = [[s1, s2], [s2, s1]]
      for inp in inputs:
        yield {
            "inputs": inp,
            "label": int(l)
        } 
Example 7
Project: fine-lm   Author: akzaidi   File: multinli.py    License: MIT License 6 votes vote down vote up
def example_generator(self, filename):
    label_list = self.class_labels(data_dir=None)
    for idx, line in enumerate(tf.gfile.Open(filename, "rb")):
      if idx == 0: continue  # skip header
      if six.PY2:
        line = unicode(line.strip(), "utf-8")
      else:
        line = line.strip().decode("utf-8")
      split_line = line.split("\t")
      # Works for both splits even though dev has some extra human labels.
      s1, s2 = split_line[8:10]
      l = label_list.index(split_line[-1])
      inputs = [s1, s2]
      yield {
          "inputs": inputs,
          "label": l
      } 
Example 8
Project: fine-lm   Author: akzaidi   File: generator_utils.py    License: MIT License 6 votes vote down vote up
def to_example(dictionary):
  """Helper: build tf.Example from (string -> int/float/str list) dictionary."""
  features = {}
  for (k, v) in six.iteritems(dictionary):
    if not v:
      raise ValueError("Empty generated field: %s" % str((k, v)))
    if isinstance(v[0], six.integer_types):
      features[k] = tf.train.Feature(int64_list=tf.train.Int64List(value=v))
    elif isinstance(v[0], float):
      features[k] = tf.train.Feature(float_list=tf.train.FloatList(value=v))
    elif isinstance(v[0], six.string_types):
      if not six.PY2:  # Convert in python 3.
        v = [bytes(x, "utf-8") for x in v]
      features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v))
    elif isinstance(v[0], bytes):
      features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v))
    else:
      raise ValueError("Value for %s is not a recognized type; v: %s type: %s" %
                       (k, str(v[0]), str(type(v[0]))))
  return tf.train.Example(features=tf.train.Features(feature=features)) 
Example 9
Project: gnocchi   Author: gnocchixyz   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def ResourceUUID(value, creator):
    if isinstance(value, uuid.UUID):
        return value
    if '/' in value:
        raise ValueError("'/' is not supported in resource id")
    try:
        return uuid.UUID(value)
    except ValueError:
        if len(value) <= 255:
            if creator is None:
                creator = "\x00"
            # value/creator must be str (unicode) in Python 3 and str (bytes)
            # in Python 2. It's not logical, I know.
            if six.PY2:
                value = value.encode('utf-8')
                creator = creator.encode('utf-8')
            return uuid.uuid5(RESOURCE_ID_NAMESPACE,
                              value + "\x00" + creator)
        raise ValueError(
            'transformable resource id >255 max allowed characters') 
Example 10
Project: tudouNLP   Author: fennuDetudou   File: tokenization.py    License: MIT License 6 votes vote down vote up
def convert_to_unicode(text):
  """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
  if six.PY3:
    if isinstance(text, str):
      return text
    elif isinstance(text, bytes):
      return text.decode("utf-8", "ignore")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  elif six.PY2:
    if isinstance(text, str):
      return text.decode("utf-8", "ignore")
    elif isinstance(text, unicode):
      return text
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  else:
    raise ValueError("Not running on Python2 or Python 3?") 
Example 11
Project: tudouNLP   Author: fennuDetudou   File: tokenization.py    License: MIT License 6 votes vote down vote up
def printable_text(text):
  """Returns text encoded in a way suitable for print or `tf.logging`."""

  # These functions want `str` for both Python2 and Python3, but in one case
  # it's a Unicode string and in the other it's a byte string.
  if six.PY3:
    if isinstance(text, str):
      return text
    elif isinstance(text, bytes):
      return text.decode("utf-8", "ignore")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  elif six.PY2:
    if isinstance(text, str):
      return text
    elif isinstance(text, unicode):
      return text.encode("utf-8")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  else:
    raise ValueError("Not running on Python2 or Python 3?") 
Example 12
Project: pwnypack   Author: edibledinos   File: bytecode.py    License: MIT License 6 votes vote down vote up
def to_code(self):
        """
        Convert this instance back into a native python code object. This
        only works if the internals of the code object are compatible with
        those of the running python version.

        Returns:
            types.CodeType: The native python code object.
        """

        if self.internals is not get_py_internals():
            raise ValueError('CodeObject is not compatible with the running python internals.')

        if six.PY2:
            return types.CodeType(
                self.co_argcount, self.co_nlocals, self.co_stacksize, self.co_flags, self.co_code, self.co_consts,
                self.co_names, self.co_varnames, self.co_filename, self.co_name, self.co_firstlineno, self.co_lnotab,
                self.co_freevars, self.co_cellvars
            )
        else:
            return types.CodeType(
                self.co_argcount, self.co_kwonlyargcount, self.co_nlocals, self.co_stacksize, self.co_flags,
                self.co_code, self.co_consts, self.co_names, self.co_varnames, self.co_filename, self.co_name,
                self.co_firstlineno, self.co_lnotab, self.co_freevars, self.co_cellvars
            ) 
Example 13
Project: custodia   Author: latchset   File: config.py    License: GNU General Public License v3.0 6 votes vote down vote up
def create_parser(self):
        parser = configparser.ConfigParser(
            interpolation=configparser.ExtendedInterpolation(),
            defaults=self.defaults
        )
        parser.optionxform = str

        # add env
        parser.add_section(u'ENV')
        for k, v in os.environ.items():
            if set(v).intersection('\r\n\x00'):
                continue
            if six.PY2:
                k = k.decode('utf-8', 'replace')
                v = v.decode('utf-8', 'replace')
            parser.set(u'ENV', k, v.replace(u'$', u'$$'))

        # default globals
        parser.add_section(u'global')
        parser.set(u'global', u'auditlog', u'${logdir}/audit.log')
        parser.set(u'global', u'debug', u'false')
        parser.set(u'global', u'umask', u'027')
        parser.set(u'global', u'makedirs', u'false')

        return parser 
Example 14
Project: recipes-py   Author: luci   File: luci_context.py    License: Apache License 2.0 6 votes vote down vote up
def _initial_load():
  global _CUR_CONTEXT
  to_assign = {}

  ctx_path = os.environ.get(ENV_KEY)
  if ctx_path:
    if six.PY2:
      ctx_path = ctx_path.decode(sys.getfilesystemencoding())
    _LOGGER.debug('Loading LUCI_CONTEXT: %r', ctx_path)
    try:
      with open(ctx_path, 'r') as f:
        loaded = _to_utf8(json.load(f))
        if _check_ok(loaded):
          to_assign = loaded
    except OSError as ex:
      _LOGGER.error('LUCI_CONTEXT failed to open: %s', ex)
    except IOError as ex:
      _LOGGER.error('LUCI_CONTEXT failed to read: %s', ex)
    except ValueError as ex:
      _LOGGER.error('LUCI_CONTEXT failed to decode: %s', ex)

  _CUR_CONTEXT = to_assign 
Example 15
Project: UDPipe-Future   Author: CoNLL-UD-2018   File: bert_wrapper.py    License: Mozilla Public License 2.0 6 votes vote down vote up
def convert_to_unicode(text):
    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text.decode("utf-8", "ignore")
        elif isinstance(text, unicode):
            return text
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example 16
Project: UDPipe-Future   Author: CoNLL-UD-2018   File: bert_wrapper.py    License: Mozilla Public License 2.0 6 votes vote down vote up
def printable_text(text):
    """Returns text encoded in a way suitable for print or `tf.logging`."""

    # These functions want `str` for both Python2 and Python3, but in one case
    # it's a Unicode string and in the other it's a byte string.
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text
        elif isinstance(text, unicode):
            return text.encode("utf-8")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example 17
Project: decaNLP   Author: salesforce   File: example.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fromCSV(cls, data, fields):
        data = data.rstrip("\n")
        # If Python 2, encode to utf-8 since CSV doesn't take unicode input
        if six.PY2:
            data = data.encode('utf-8')
        # Use Python CSV module to parse the CSV line
        parsed_csv_lines = csv.reader([data])

        # If Python 2, decode back to unicode (the original input format).
        if six.PY2:
            for line in parsed_csv_lines:
                parsed_csv_line = [six.text_type(col, 'utf-8') for col in line]
                break
        else:
            parsed_csv_line = list(parsed_csv_lines)[0]
        return cls.fromlist(parsed_csv_line, fields) 
Example 18
Project: decaNLP   Author: salesforce   File: field.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def preprocess(self, x):
        """Load a single example using this field, tokenizing if necessary.

        If the input is a Python 2 `str`, it will be converted to Unicode
        first. If `sequential=True`, it will be tokenized. Then the input
        will be optionally lowercased and passed to the user-provided
        `preprocessing` Pipeline."""
        if (six.PY2 and isinstance(x, six.string_types) and not
                isinstance(x, six.text_type)):
            x = Pipeline(lambda s: six.text_type(s, encoding='utf-8'))(x)
        if self.sequential and isinstance(x, six.text_type):
            x = self.tokenize(x.rstrip('\n'))
        if self.lower:
            x = Pipeline(six.text_type.lower)(x)
        if self.preprocessing is not None:
            return self.preprocessing(x)
        else:
            return x 
Example 19
Project: linter-pylama   Author: AtomLinter   File: helpers.py    License: MIT License 6 votes vote down vote up
def _function_type(function, builtins):
    if isinstance(function, scoped_nodes.Lambda):
        if function.root().name == BUILTINS:
            cls_name = 'builtin_function_or_method'
        else:
            cls_name = 'function'
    elif isinstance(function, bases.BoundMethod):
        if six.PY2:
            cls_name = 'instancemethod'
        else:
            cls_name = 'method'
    elif isinstance(function, bases.UnboundMethod):
        if six.PY2:
            cls_name = 'instancemethod'
        else:
            cls_name = 'function'
    return _build_proxy_class(cls_name, builtins) 
Example 20
Project: linter-pylama   Author: AtomLinter   File: python3.py    License: MIT License 6 votes vote down vote up
def visit_raise(self, node):
        """Visit a raise statement and check for raising
        strings or old-raise-syntax.
        """
        if six.PY2:
            if (node.exc is not None and
                    node.inst is not None):
                self.add_message('old-raise-syntax', node=node)

        # Ignore empty raise.
        if node.exc is None:
            return
        expr = node.exc
        if self._check_raise_value(node, expr):
            return
        try:
            value = next(astroid.unpack_infer(expr))
        except astroid.InferenceError:
            return
        self._check_raise_value(node, value) 
Example 21
Project: plugin.video.emby   Author: MediaBrowser   File: _parser.py    License: GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, instream):
        if six.PY2:
            # In Python 2, we can't duck type properly because unicode has
            # a 'decode' function, and we'd be double-decoding
            if isinstance(instream, (binary_type, bytearray)):
                instream = instream.decode()
        else:
            if getattr(instream, 'decode', None) is not None:
                instream = instream.decode()

        if isinstance(instream, text_type):
            instream = StringIO(instream)
        elif getattr(instream, 'read', None) is None:
            raise TypeError('Parser must be a string or character stream, not '
                            '{itype}'.format(itype=instream.__class__.__name__))

        self.instream = instream
        self.charstack = []
        self.tokenstack = []
        self.eof = False 
Example 22
Project: BERT-for-Chinese-Question-Answering   Author: eva-n27   File: tokenization.py    License: Apache License 2.0 6 votes vote down vote up
def convert_to_unicode(text):
    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text.decode("utf-8", "ignore")
        elif isinstance(text, unicode):
            return text
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example 23
Project: BERT-for-Chinese-Question-Answering   Author: eva-n27   File: tokenization.py    License: Apache License 2.0 6 votes vote down vote up
def printable_text(text):
    """Returns text encoded in a way suitable for print or `tf.logging`."""

    # These functions want `str` for both Python2 and Python3, but in one case
    # it's a Unicode string and in the other it's a byte string.
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return text.decode("utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text
        elif isinstance(text, unicode):
            return text.encode("utf-8")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example 24
Project: recruit   Author: Frank-qlu   File: _parser.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, instream):
        if six.PY2:
            # In Python 2, we can't duck type properly because unicode has
            # a 'decode' function, and we'd be double-decoding
            if isinstance(instream, (bytes, bytearray)):
                instream = instream.decode()
        else:
            if getattr(instream, 'decode', None) is not None:
                instream = instream.decode()

        if isinstance(instream, text_type):
            instream = StringIO(instream)
        elif getattr(instream, 'read', None) is None:
            raise TypeError('Parser must be a string or character stream, not '
                            '{itype}'.format(itype=instream.__class__.__name__))

        self.instream = instream
        self.charstack = []
        self.tokenstack = []
        self.eof = False 
Example 25
Project: recruit   Author: Frank-qlu   File: _common.py    License: Apache License 2.0 6 votes vote down vote up
def tzname_in_python2(namefunc):
    """Change unicode output into bytestrings in Python 2

    tzname() API changed in Python 3. It used to return bytes, but was changed
    to unicode strings
    """
    if PY2:
        @wraps(namefunc)
        def adjust_encoding(*args, **kwargs):
            name = namefunc(*args, **kwargs)
            if name is not None:
                name = name.encode()

            return name

        return adjust_encoding
    else:
        return namefunc


# The following is adapted from Alexander Belopolsky's tz library
# https://github.com/abalkin/tz 
Example 26
Project: ciocheck   Author: ContinuumIO   File: tools.py    License: MIT License 5 votes vote down vote up
def setup_pytest_coverage_args(self, paths):
        """Setup pytest-cov arguments and config file path."""
        if isinstance(paths, (dict, OrderedDict)):
            paths = list(sorted(paths.keys()))

        for path in paths:
            if os.path.isdir(path):
                cov = '--cov={0}'.format(path)
                coverage_args = [cov]
                break
        else:
            coverage_args = []

        coverage_config_file = os.path.join(self.cmd_root,
                                            COVERAGE_CONFIGURATION_FILE)
        if os.path.isfile(coverage_config_file):
            cov_config = ['--cov-config', coverage_config_file]
            coverage_args = cov_config + coverage_args

        if PY2:
            # xdist appears to lock up the test suite with python2, maybe due
            # to an interaction with coverage
            enable_xdist = []
        else:
            enable_xdist = ['-n', str(cpu_count())]

        self.pytest_args = ['--json={0}'.format(self.REPORT_FILE)]
        self.pytest_args = self.pytest_args + enable_xdist
        self.pytest_args = self.pytest_args + coverage_args 
Example 27
Project: neuropythy   Author: noahbenson   File: filemap.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def url_download(url, topath=None, create_dirs=True):
    '''
    url_download(url) yields the contents of the given url as a byte-string.
    url_download(url, topath) downloads the given url to the given path, topath and yields that path
      on success.

    The option create_dirs (default: True) may be set to False to prevent the topath directory from
    being created.
    '''
    # ensure directory exists
    if topath: topath = os.path.expanduser(os.path.expandvars(topath))
    if create_dirs and topath:
        dnm = os.path.dirname(topath)
        if not os.path.isdir(dnm): os.makedirs(os.path.abspath(dnm), 0o755)
    if six.PY2:
        response = urllib.request.urlopen(url)
        if topath is None: topath = response.read()
        else:
            with open(topath, 'wb') as fl:
                shutil.copyfileobj(response, fl)
    else:
        with urllib.request.urlopen(url) as response:
            if topath is None: topath = response.read()
            else:
                with open(topath, 'wb') as fl:
                    shutil.copyfileobj(response, fl)
    return topath 
Example 28
Project: DOTA_models   Author: ringringyi   File: ops.py    License: Apache License 2.0 5 votes vote down vote up
def padded_one_hot_encoding(indices, depth, left_pad):
  """Returns a zero padded one-hot tensor.

  This function converts a sparse representation of indices (e.g., [4]) to a
  zero padded one-hot representation (e.g., [0, 0, 0, 0, 1] with depth = 4 and
  left_pad = 1). If `indices` is empty, the result will simply be a tensor of
  shape (0, depth + left_pad). If depth = 0, then this function just returns
  `None`.

  Args:
    indices: an integer tensor of shape [num_indices].
    depth: depth for the one-hot tensor (integer).
    left_pad: number of zeros to left pad the one-hot tensor with (integer).

  Returns:
    padded_onehot: a tensor with shape (num_indices, depth + left_pad). Returns
      `None` if the depth is zero.

  Raises:
    ValueError: if `indices` does not have rank 1 or if `left_pad` or `depth are
      either negative or non-integers.

  TODO: add runtime checks for depth and indices.
  """
  if depth < 0 or not isinstance(depth, (int, long) if six.PY2 else int):
    raise ValueError('`depth` must be a non-negative integer.')
  if left_pad < 0 or not isinstance(left_pad, (int, long) if six.PY2 else int):
    raise ValueError('`left_pad` must be a non-negative integer.')
  if depth == 0:
    return None
  if len(indices.get_shape().as_list()) != 1:
    raise ValueError('`indices` must have rank 1')

  def one_hot_and_pad():
    one_hot = tf.cast(tf.one_hot(tf.cast(indices, tf.int64), depth,
                                 on_value=1, off_value=0), tf.float32)
    return tf.pad(one_hot, [[0, 0], [left_pad, 0]], mode='CONSTANT')
  result = tf.cond(tf.greater(tf.size(indices), 0), one_hot_and_pad,
                   lambda: tf.zeros((depth + left_pad, 0)))
  return tf.reshape(result, [-1, depth + left_pad]) 
Example 29
Project: fine-lm   Author: akzaidi   File: wnli.py    License: MIT License 5 votes vote down vote up
def example_generator(self, filename):
    for idx, line in enumerate(tf.gfile.Open(filename, "rb")):
      if idx == 0: continue  # skip header
      if six.PY2:
        line = unicode(line.strip(), "utf-8")
      else:
        line = line.strip().decode("utf-8")
      _, s1, s2, l = line.split("\t")
      inputs = [s1, s2]
      yield {
          "inputs": inputs,
          "label": int(l)
      } 
Example 30
Project: fine-lm   Author: akzaidi   File: cnn_dailymail.py    License: MIT License 5 votes vote down vote up
def example_generator(all_files, urls_path, sum_token):
  """Generate examples."""

  def fix_run_on_sents(line):
    if u"@highlight" in line:
      return line
    if not line:
      return line
    if line[-1] in END_TOKENS:
      return line
    return line + u"."

  filelist = example_splits(urls_path, all_files)
  story_summary_split_token = u" <summary> " if sum_token else " "

  for story_file in filelist:
    story = []
    summary = []
    reading_highlights = False
    for line in tf.gfile.Open(story_file, "rb"):
      if six.PY2:
        line = unicode(line.strip(), "utf-8")
      else:
        line = line.strip().decode("utf-8")
      line = fix_run_on_sents(line)
      if not line:
        continue
      elif line.startswith(u"@highlight"):
        if not story:
          break  # No article text.
        reading_highlights = True
      elif reading_highlights:
        summary.append(line)
      else:
        story.append(line)

    if (not story) or not summary:
      continue

    yield " ".join(story) + story_summary_split_token + " ".join(summary)