Python six.ensure_text() Examples

The following are 30 code examples of six.ensure_text(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module six , or try the search function .
Example #1
Source File: checkpoint_hooks.py    From tensor2robot with Apache License 2.0 6 votes vote down vote up
def _copy_savedmodel(self, source_dir, destination):
    """Copy source_dir to destination.

    This recursively copies all of the files in `source_dir` to destination.
    `source_dir` is assumed to have the SavedModel format.

    Args:
      source_dir: Source directory, should be a path to a SavedModel directory.
      destination: Base directory to copy these.

    Returns:
      Destination path of the copied model.
    """
    source_dir = six.ensure_text(source_dir)
    destination = six.ensure_text(destination)
    basename = os.path.basename(source_dir)
    dest_base_dir = os.path.join(destination, basename)
    copy_fn(source_dir, dest_base_dir)
    return dest_base_dir 
Example #2
Source File: tfjs_rewriter.py    From tfx with Apache License 2.0 6 votes vote down vote up
def _rewrite(self, original_model: rewriter.ModelDescription,
               rewritten_model: rewriter.ModelDescription):
    """Rewrites the provided model.

    Args:
      original_model: A `ModelDescription` specifying the original model to be
        rewritten.
      rewritten_model: A `ModelDescription` specifying the format and location
        of the rewritten model.

    Raises:
      ValueError: If the model could not be sucessfully rewritten.
    """
    if rewritten_model.model_type not in [
        rewriter.ModelType.TFJS_MODEL, rewriter.ModelType.ANY_MODEL
    ]:
      raise ValueError('TFJSConverter can only convert to the TFJS format.')

    _convert_tfjs_model(
        six.ensure_text(original_model.path),
        six.ensure_text(rewritten_model.path)) 
Example #3
Source File: print_tf_records.py    From lingvo with Apache License 2.0 6 votes vote down vote up
def _CustomShortDebugString(tf_example):
  text = []
  for name, value in sorted(six.iteritems(tf_example.features.feature)):
    if value.HasField('bytes_list'):
      if FLAGS.bytes_as_utf8:
        utf8_values = [
            six.ensure_text(v, 'utf-8') for v in value.bytes_list.value
        ]
        value_string = _ListDebugString(utf8_values)
      else:
        value_string = _ListDebugString(value.bytes_list.value)
    elif value.HasField('float_list'):
      value_string = _ListDebugString(value.float_list.value)
    elif value.HasField('int64_list'):
      value_string = _ListDebugString(value.int64_list.value, to_string=repr)
    text += ['%s: %s' % (name, value_string)]
  return '\n'.join(text) 
Example #4
Source File: albert_tokenization.py    From bert-for-tf2 with MIT License 6 votes vote down vote up
def printable_text(text):
    """Returns text encoded in a way suitable for print or `tf.logging`."""

    # These functions want `str` for both Python2 and Python3, but in one case
    # it's a Unicode string and in the other it's a byte string.
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return six.ensure_text(text, "utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text
        elif isinstance(text, six.text_type):
            return six.ensure_binary(text, "utf-8")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example #5
Source File: wpm_encoder.py    From lingvo with Apache License 2.0 6 votes vote down vote up
def __init__(self, wpm_filepath, merge_prob=1.):
    """Create a WPM encoder.

    Args:
      wpm_filepath: a path to the file containing the vocabulary.
      merge_prob: the probability of merging tokens while encoding.
    """
    # Load vocabulary file.
    lines = py_utils.ReadFileLines(wpm_filepath)

    self._pieces = []
    for line in lines:
      if isinstance(line, six.binary_type):
        line = six.ensure_text(line, 'utf-8')
      piece = line.strip().split('\t')[0]
      self._pieces.append(piece)
    self._merge_prob = merge_prob 
Example #6
Source File: albert_tokenization.py    From bert-for-tf2 with MIT License 6 votes vote down vote up
def convert_to_unicode(text):
    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return six.ensure_text(text, "utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return six.ensure_text(text, "utf-8", "ignore")
        elif isinstance(text, six.text_type):
            return text
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example #7
Source File: testutils.py    From pythonfinder with MIT License 6 votes vote down vote up
def normalize_path(path):
    # type: (AnyStr) -> AnyStr
    """
    Return a case-normalized absolute variable-expanded path.

    :param str path: The non-normalized path
    :return: A normalized, expanded, case-normalized path
    :rtype: str
    """

    path = os.path.abspath(os.path.expandvars(os.path.expanduser(str(path))))
    if os.name == "nt" and os.path.exists(path):
        try:
            from ctypes import create_unicode_buffer, windll
        except ImportError:
            path = os.path.normpath(os.path.normcase(path))
        else:
            BUFSIZE = 500
            buffer = create_unicode_buffer(BUFSIZE)
            get_long_path_name = windll.kernel32.GetLongPathNameW
            get_long_path_name(six.ensure_text(path), buffer, BUFSIZE)
            path = buffer.value
        return path

    return os.path.normpath(os.path.normcase(path)) 
Example #8
Source File: tokenization.py    From albert with Apache License 2.0 6 votes vote down vote up
def preprocess_text(inputs, remove_space=True, lower=False):
  """preprocess data by removing extra space and normalize data."""
  outputs = inputs
  if remove_space:
    outputs = " ".join(inputs.strip().split())

  if six.PY2 and isinstance(outputs, str):
    try:
      outputs = six.ensure_text(outputs, "utf-8")
    except UnicodeDecodeError:
      outputs = six.ensure_text(outputs, "latin-1")

  outputs = unicodedata.normalize("NFKD", outputs)
  outputs = "".join([c for c in outputs if not unicodedata.combining(c)])
  if lower:
    outputs = outputs.lower()

  return outputs 
Example #9
Source File: albert_tokenization.py    From bert-for-tf2 with MIT License 6 votes vote down vote up
def preprocess_text(inputs, remove_space=True, lower=False):
    """preprocess data by removing extra space and normalize data."""
    outputs = inputs
    if remove_space:
        outputs = " ".join(inputs.strip().split())

    if six.PY2 and isinstance(outputs, str):
        try:
            outputs = six.ensure_text(outputs, "utf-8")
        except UnicodeDecodeError:
            outputs = six.ensure_text(outputs, "latin-1")

    outputs = unicodedata.normalize("NFKD", outputs)
    outputs = "".join([c for c in outputs if not unicodedata.combining(c)])
    if lower:
        outputs = outputs.lower()

    return outputs 
Example #10
Source File: os_utilities.py    From luci-py with Apache License 2.0 6 votes vote down vote up
def get_hostname():
  """Returns the machine's hostname."""
  if platforms.is_gce() and not os.path.isfile('/.dockerenv'):
    # When running on GCE, always use the hostname as defined by GCE. It's
    # possible the VM hadn't learned about it yet. We ignore GCE hostname when
    # running inside a Docker container and instead use its own hostname.
    meta = platforms.gce.get_metadata() or {}
    hostname = meta.get('instance', {}).get('hostname')
    if hostname:
      return hostname.decode('utf-8')

  # Windows enjoys putting random case in there. Enforces lower case for sanity.
  hostname = socket.getfqdn().lower()
  if hostname.endswith('.in-addr.arpa'):
    # When OSX fails to get the FDQN, it returns as the base name the IPv4
    # address reversed, which is not useful. Get the base hostname as defined by
    # the host itself instead of the FQDN since the returned FQDN is useless.
    hostname = socket.gethostname()
  return six.ensure_text(hostname) 
Example #11
Source File: tokenization.py    From albert with Apache License 2.0 6 votes vote down vote up
def printable_text(text):
  """Returns text encoded in a way suitable for print or `tf.logging`."""

  # These functions want `str` for both Python2 and Python3, but in one case
  # it's a Unicode string and in the other it's a byte string.
  if six.PY3:
    if isinstance(text, str):
      return text
    elif isinstance(text, bytes):
      return six.ensure_text(text, "utf-8", "ignore")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  elif six.PY2:
    if isinstance(text, str):
      return text
    elif isinstance(text, six.text_type):
      return six.ensure_binary(text, "utf-8")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  else:
    raise ValueError("Not running on Python2 or Python 3?") 
Example #12
Source File: tokenization.py    From albert with Apache License 2.0 6 votes vote down vote up
def convert_to_unicode(text):
  """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
  if six.PY3:
    if isinstance(text, str):
      return text
    elif isinstance(text, bytes):
      return six.ensure_text(text, "utf-8", "ignore")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  elif six.PY2:
    if isinstance(text, str):
      return six.ensure_text(text, "utf-8", "ignore")
    elif isinstance(text, six.text_type):
      return text
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  else:
    raise ValueError("Not running on Python2 or Python 3?") 
Example #13
Source File: onsets_frames_transcription_transcribe.py    From magenta with Apache License 2.0 6 votes vote down vote up
def create_example(filename, sample_rate, load_audio_with_librosa):
  """Processes an audio file into an Example proto."""
  wav_data = tf.gfile.Open(filename, 'rb').read()
  example_list = list(
      audio_label_data_utils.process_record(
          wav_data=wav_data,
          sample_rate=sample_rate,
          ns=music_pb2.NoteSequence(),
          # decode to handle filenames with extended characters.
          example_id=six.ensure_text(filename, 'utf-8'),
          min_length=0,
          max_length=-1,
          allow_empty_notesequence=True,
          load_audio_with_librosa=load_audio_with_librosa))
  assert len(example_list) == 1
  return example_list[0].SerializeToString() 
Example #14
Source File: tokenization.py    From embedding-as-service with MIT License 6 votes vote down vote up
def preprocess_text(inputs, remove_space=True, lower=False):
    """preprocess data by removing extra space and normalize data."""
    outputs = inputs
    if remove_space:
        outputs = " ".join(inputs.strip().split())

    if six.PY2 and isinstance(outputs, str):
        try:
            outputs = six.ensure_text(outputs, "utf-8")
        except UnicodeDecodeError:
            outputs = six.ensure_text(outputs, "latin-1")

    outputs = unicodedata.normalize("NFKD", outputs)
    outputs = "".join([c for c in outputs if not unicodedata.combining(c)])
    if lower:
        outputs = outputs.lower()

    return outputs 
Example #15
Source File: plugin.py    From fairness-indicators with Apache License 2.0 6 votes vote down vote up
def _get_evaluation_result(self, request):
    run = request.args.get('run')
    try:
      run = six.ensure_text(run)
    except (UnicodeDecodeError, AttributeError):
      pass

    data = []
    try:
      eval_result_output_dir = six.ensure_text(
          self._multiplexer.Tensors(run, FairnessIndicatorsPlugin.plugin_name)
          [0].tensor_proto.string_val[0])
      eval_result = tfma.load_eval_result(output_path=eval_result_output_dir)
      # TODO(b/141283811): Allow users to choose different model output names
      # and class keys in case of multi-output and multi-class model.
      data = widget_view.convert_slicing_metrics_to_ui_input(
          eval_result.slicing_metrics)
    except (KeyError, json_format.ParseError) as error:
      logging.info('Error while fetching evaluation data, %s', error)
    return http_util.Respond(request, data, content_type='application/json') 
Example #16
Source File: tokenization.py    From embedding-as-service with MIT License 6 votes vote down vote up
def convert_to_unicode(text):
    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return six.ensure_text(text, "utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return six.ensure_text(text, "utf-8", "ignore")
        elif isinstance(text, six.text_type):
            return text
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example #17
Source File: tokenization.py    From embedding-as-service with MIT License 6 votes vote down vote up
def printable_text(text):
    """Returns text encoded in a way suitable for print or `tf.logging`."""

    # These functions want `str` for both Python2 and Python3, but in one case
    # it's a Unicode string and in the other it's a byte string.
    if six.PY3:
        if isinstance(text, str):
            return text
        elif isinstance(text, bytes):
            return six.ensure_text(text, "utf-8", "ignore")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    elif six.PY2:
        if isinstance(text, str):
            return text
        elif isinstance(text, six.text_type):
            return six.ensure_binary(text, "utf-8")
        else:
            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
        raise ValueError("Not running on Python2 or Python 3?") 
Example #18
Source File: test_json_serialization.py    From scalyr-agent-2 with Apache License 2.0 6 votes vote down vote up
def _test_json_encode(benchmark, json_lib, log_tuple):
    """
    :param json_lib: JSON library to use.
    :param log_tuple: Tuple with (log_filename, log_bytes_to_use).
    """
    set_json_lib(json_lib)

    file_name, bytes_to_read = log_tuple

    if log_tuple not in CACHED_TEST_DATA["encode"]:
        data = read_bytes_from_log_fixture_file(file_name, bytes_to_read)
        data = six.ensure_text(data)

        CACHED_TEST_DATA["encode"][log_tuple] = data

    data = CACHED_TEST_DATA["encode"][log_tuple]

    def run_benchmark():
        return json_encode(data)

    result = benchmark.pedantic(run_benchmark, iterations=20, rounds=50)

    assert get_json_lib() == json_lib
    assert isinstance(result, six.text_type)
    # assert json.dumps(data) == result 
Example #19
Source File: plugin.py    From fairness-indicators with Apache License 2.0 6 votes vote down vote up
def _get_evaluation_result_from_remote_path(self, request):
    evaluation_output_path = request.args.get('evaluation_output_path')
    try:
      evaluation_output_path = six.ensure_text(evaluation_output_path)
    except (UnicodeDecodeError, AttributeError):
      pass
    try:
      eval_result = tfma.load_eval_result(
          os.path.dirname(evaluation_output_path),
          output_file_format=self._get_output_file_format(
              evaluation_output_path))
      data = widget_view.convert_slicing_metrics_to_ui_input(
          eval_result.slicing_metrics)
    except (KeyError, json_format.ParseError) as error:
      logging.info('Error while fetching evaluation data, %s', error)
      data = []
    return http_util.Respond(request, data, content_type='application/json') 
Example #20
Source File: visualization.py    From tensor2robot with Apache License 2.0 6 votes vote down vote up
def tf_put_text(imgs, texts, text_size=1, text_pos=(0, 30),
                text_color=(0, 0, 1)):
  """Adds text to an image tensor."""

  def _put_text(imgs, texts):
    """Python function that renders text onto a image."""
    result = np.empty_like(imgs)
    for i in range(imgs.shape[0]):
      text = texts[i]
      if isinstance(text, bytes):
        text = six.ensure_text(text)
      # You may need to adjust text size and position and size.
      # If your images are in [0, 255] range replace (0, 0, 1) with (0, 0, 255)
      result[i, :, :, :] = cv2.putText(
          imgs[i, :, :, :], str(text), text_pos,
          cv2.FONT_HERSHEY_COMPLEX, text_size, text_color, 1)
    return result

  return tf.py_func(_put_text, [imgs, texts], Tout=imgs.dtype) 
Example #21
Source File: util.py    From scalyr-agent-2 with Apache License 2.0 6 votes vote down vote up
def json_encode(obj, output=None, binary=False):
    """Encodes an object into a JSON string.

    @param obj: The object to serialize
    @param output: If not None, a file-like object to which the serialization should be written.
    @param binary: If True return binary string, otherwise text string.
    @type obj: dict|list|six.text_type
    @type binary: bool
    """
    # 2->TODO encode json according to 'binary' flag.
    if binary:

        result = six.ensure_binary(_json_encode(obj, None))
        if output:
            output.write(result)
        else:
            return result
    else:
        return six.ensure_text(_json_encode(obj, output)) 
Example #22
Source File: scalyr_logging.py    From scalyr-agent-2 with Apache License 2.0 6 votes vote down vote up
def formatException(self, ei):
        # We just want to indent the stack trace to make it easier to write a parsing rule to detect it.
        output = io.StringIO()
        try:
            # 2->TODO 'logging.Formatter.formatException' returns binary data (str) in python2,
            #  so it will not work with io.StringIO here.
            exception_string = six.ensure_text(
                logging.Formatter.formatException(self, ei)
            )

            for line in exception_string.splitlines(True):
                output.write("  ")
                output.write(line)
            return output.getvalue()
        finally:
            output.close() 
Example #23
Source File: loghandler.py    From jellyfin-kodi with GNU General Public License v3.0 6 votes vote down vote up
def formatException(self, exc_info):
        _pluginpath_real = os.path.realpath(__pluginpath__)
        res = []

        for o in traceback.format_exception(*exc_info):
            o = ensure_text(o, get_filesystem_encoding())

            if o.startswith('  File "'):
                # If this split can't handle your file names, you should seriously consider renaming your files.
                fn = o.split('  File "', 2)[1].split('", line ', 1)[0]
                rfn = os.path.realpath(fn)
                if rfn.startswith(_pluginpath_real):
                    o = o.replace(fn, os.path.relpath(rfn, _pluginpath_real))

            res.append(o)

        return ''.join(res) 
Example #24
Source File: checkpoint_hooks.py    From tensor2robot with Apache License 2.0 6 votes vote down vote up
def __init__(self,
               export_fn,
               export_dir,
               num_versions = None):
    """Initializes a `CheckpointExportListener`.

    Args:
      export_fn: function which exports the model.
      export_dir: directory to export models
      num_versions: number of exports to keep. If unset, keep all exports.
    """
    self._export_fn = export_fn
    self._export_dir = six.ensure_text(export_dir)
    tf.io.gfile.makedirs(self._export_dir)
    self._gc = None
    if num_versions:
      self._gc = _DirectoryVersionGC(num_versions)
      export_dir_contents = sorted(tf.gfile.ListDirectory(self._export_dir))
      self._gc.observe_multiple([
          os.path.join(self._export_dir, filename)
          for filename in export_dir_contents
      ]) 
Example #25
Source File: convert_stix.py    From cti-stix-elevator with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def convert_test_mechanism(indicator, indicator_instance):
    if indicator.test_mechanisms is not None:
        if not _ALLOW_YARA_AND_SNORT_PATTENS and get_option_value("spec_version") == "2.0":
            warn("YARA/SNORT/IOC or other patterns are not supported in STIX 2.0. See %s", 504, indicator_instance["id"])
            return
        if hasattr(indicator_instance, "pattern"):
            # TODO: maybe put in description
            warn("Only one type pattern can be specified in %s - using 'stix'", 712, indicator_instance["id"])
        else:
            for tm in indicator.test_mechanisms:
                if hasattr(indicator_instance, "pattern"):
                    msg = "Only one alternative test mechanism allowed for %s in STIX 2.1 - used %s, dropped %s"
                    warn(msg, 506, indicator_instance["id"], indicator_instance["pattern_type"], determine_pattern_type(tm))
                else:
                    if isinstance(tm, YaraTestMechanism):
                        indicator_instance["pattern"] = text_type(tm.rule.value)
                        indicator_instance["pattern_type"] = "yara"
                    elif isinstance(tm, SnortTestMechanism):
                        list_of_strings = []
                        for rule in tm.rules:
                            list_of_strings.append(text_type(rule.value))
                        indicator_instance["pattern"] = ", ".join(list_of_strings)
                        indicator_instance["pattern_type"] = "snort"
                    elif isinstance(tm, OpenIOCTestMechanism):
                        warn("IOC indicator in %s cannot be converted to a STIX pattern", 410, indicator_instance["id"])
                        indicator_instance["pattern"] = ensure_text(etree.tostring(tm.ioc))
                        indicator_instance["pattern_type"] = "openioc" 
Example #26
Source File: agent_runner.py    From scalyr-agent-2 with Apache License 2.0 5 votes vote down vote up
def write_line(self, path, data):
        # type: (Path, six.text_type) -> None
        data = six.ensure_text(data)
        data = "{0}\n".format(data)
        self.write_to_file(path, data) 
Example #27
Source File: _inspector.py    From colabtools with Apache License 2.0 5 votes vote down vote up
def _getdef(self, obj, oname=''):
    """Safe variant of oinspect.Inspector._getdef.

    The upstream _getdef method includes the full string representation of all
    default arguments, which may run arbitrary code. We intercede to apply our
    custom getargspec wrapper, which uses _safe_repr.

    Args:
      obj: function whose definition we want to format.
      oname: (optional) If provided, prefix the definition with this name.

    Returns:
      A formatted definition or None.
    """

    def formatvalue(value):
      return '=' + _safe_repr(value)

    try:
      argspec = _getargspec(obj)
      if argspec is None:
        return None
      return six.ensure_text(
          oname + inspect.formatargspec(*argspec, formatvalue=formatvalue))
    except:  # pylint: disable=bare-except
      logging.exception('Exception raised in ColabInspector._getdef') 
Example #28
Source File: _inspector.py    From colabtools with Apache License 2.0 5 votes vote down vote up
def _getdoc(obj):
  """Custom wrapper for inspect.getdoc.

  IPython.core.oinspect.getdoc wraps Python's inspect.getdoc to catch exceptions
  and allow for objects with a custom getdoc() method. However, there are two
  problems:
   * inspect.getdoc already catches any exceptions
   * it then calls get_encoding, which calls inspect.getfile, which may call
     repr(obj) (to use in an error string, which oinspect.getdoc throws away).

  We replace this with our own wrapper which still allows for custom getdoc()
  methods, but avoids calling inspect.getfile.

  Args:
    obj: an object to fetch a docstring for

  Returns:
    A docstring or ''.
  """
  if hasattr(obj, 'getdoc'):
    try:
      docstring = obj.getdoc()
    except Exception:  # pylint: disable=broad-except
      pass
    else:
      if isinstance(docstring, six.string_types):
        return docstring

  docstring = inspect.getdoc(obj) or ''
  # In principle, we want to find the file associated with obj, and use that
  # encoding here. However, attempting to find the file may lead to calling
  # repr(obj), so we instead assume UTF8 and replace non-UTF8 characters.
  return six.ensure_text(docstring, errors='backslashreplace') 
Example #29
Source File: utils.py    From django-drf-filepond with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_file_id():
    file_id = shortuuid.uuid()
    return six.ensure_text(file_id) 
Example #30
Source File: jsonrpc.py    From monero-python with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def send_transaction(self, blob, relay=True):
        res = self.raw_request('/sendrawtransaction', {
            'tx_as_hex': six.ensure_text(binascii.hexlify(blob)),
            'do_not_relay': not relay})
        if res['status'] == 'OK':
            return res
        raise exceptions.TransactionBroadcastError(
                "{status}: {reason}".format(**res),
                details=res)