Python absl.logging.warning() Examples

The following are 30 code examples of absl.logging.warning(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module absl.logging , or try the search function .
Example #1
Source File: agent.py    From recsim with Apache License 2.0 6 votes vote down vote up
def unbundle(self, checkpoint_dir, iteration_number, bundle_dict):
    """Restores the agent from a checkpoint.

    Args:
      checkpoint_dir: A string that represents the path to the checkpoint and is
        used when we save TensorFlow objects by tf.Save.
      iteration_number: An integer that represents the checkpoint version and is
        used when restoring replay buffer.
      bundle_dict: A dict containing additional Python objects owned by the
        agent. Each key is an object name and the value is the actual object.

    Returns:
      bool, True if unbundling was successful.
    """
    del checkpoint_dir  # Unused.
    del iteration_number  # Unused.
    if 'episode_num' not in bundle_dict:
      logging.warning(
          'Could not unbundle from checkpoint files with exception.')
      return False
    self._episode_num = bundle_dict['episode_num']
    return True 
Example #2
Source File: space_serializer.py    From trax with Apache License 2.0 6 votes vote down vote up
def __init__(self, space, vocab_size, precision=2, max_range=(-100.0, 100.0)):
    self._precision = precision

    # Some gym envs (e.g. CartPole) have unreasonably high bounds for
    # observations. We clip so we can represent them.
    bounded_space = copy.copy(space)
    (min_low, max_high) = max_range
    bounded_space.low = np.maximum(space.low, min_low)
    bounded_space.high = np.minimum(space.high, max_high)
    if (not np.allclose(bounded_space.low, space.low) or
        not np.allclose(bounded_space.high, space.high)):
      logging.warning(
          'Space limits %s, %s out of bounds %s. Clipping to %s, %s.',
          str(space.low), str(space.high), str(max_range),
          str(bounded_space.low), str(bounded_space.high)
      )

    super(BoxSpaceSerializer, self).__init__(bounded_space, vocab_size) 
Example #3
Source File: mock_text_seq_label_data.py    From delta with Apache License 2.0 6 votes vote down vote up
def after_download(self) -> bool:
    try:
      train_file_path = os.path.join(self.data_dir, self.train_file)
      dev_file_path = os.path.join(self.data_dir, self.dev_file)
      test_file_path = os.path.join(self.data_dir, self.test_file)

      text_vocab_file = os.path.join(self.data_dir, self.text_vocab)
      label_vocab_file = os.path.join(self.data_dir, self.label_vocab)

      mock_data(self.samples, train_file_path, dev_file_path, test_file_path,
                text_vocab_file, self.text_vocab_list, label_vocab_file, self.label_vocab_list)

    except Exception as e:
      logging.warning(traceback.format_exc())
      return False
    return True 
Example #4
Source File: cats_vs_dogs.py    From datasets with Apache License 2.0 6 votes vote down vote up
def _generate_examples(self, archive):
    """Generate Cats vs Dogs images and labels given a directory path."""
    num_skipped = 0
    for fname, fobj in archive:
      res = _NAME_RE.match(fname)
      if not res:  # README file, ...
        continue
      label = res.group(1).lower()
      if tf.compat.as_bytes("JFIF") not in fobj.peek(10):
        num_skipped += 1
        continue
      record = {
          "image": fobj,
          "image/filename": fname,
          "label": label,
      }
      yield fname, record

    if num_skipped != _NUM_CORRUPT_IMAGES:
      raise ValueError("Expected %d corrupt images, but found %d" % (
          _NUM_CORRUPT_IMAGES, num_skipped))
    logging.warning("%d images were corrupted and were skipped", num_skipped) 
Example #5
Source File: wmt.py    From datasets with Apache License 2.0 6 votes vote down vote up
def _parse_tsv(path, language_pair=None):
  """Generates examples from TSV file."""
  if language_pair is None:
    lang_match = re.match(r".*\.([a-z][a-z])-([a-z][a-z])\.tsv", path)
    assert lang_match is not None, "Invalid TSV filename: %s" % path
    l1, l2 = lang_match.groups()
  else:
    l1, l2 = language_pair
  with tf.io.gfile.GFile(path) as f:
    for j, line in enumerate(f):
      cols = line.split("\t")
      if len(cols) != 2:
        logging.warning(
            "Skipping line %d in TSV (%s) with %d != 2 columns.",
            j, path, len(cols))
        continue
      s1, s2 = cols
      yield j, {
          l1: s1.strip(),
          l2: s2.strip()
      } 
Example #6
Source File: eval_coco_format.py    From MAX-Image-Segmenter with Apache License 2.0 6 votes vote down vote up
def _build_metric(metric,
                  num_categories,
                  ignored_label,
                  max_instances_per_category,
                  intersection_offset=None,
                  normalize_by_image_size=True):
  """Creates a metric aggregator objet of the given name."""
  if metric == 'pq':
    logging.warning('One should check Panoptic Quality results against the '
                    'official COCO API code. Small numerical differences '
                    '(< 0.1%) can be magnified by rounding.')
    return panoptic_quality.PanopticQuality(num_categories, ignored_label,
                                            max_instances_per_category,
                                            intersection_offset)
  elif metric == 'pc':
    return parsing_covering.ParsingCovering(
        num_categories, ignored_label, max_instances_per_category,
        intersection_offset, normalize_by_image_size)
  else:
    raise ValueError('No implementation for metric "%s"' % metric) 
Example #7
Source File: eval_coco_format.py    From MAX-Image-Segmenter with Apache License 2.0 6 votes vote down vote up
def _is_thing_array(categories_json, ignored_label):
  """is_thing[category_id] is a bool on if category is "thing" or "stuff"."""
  is_thing_dict = {}
  for category_json in categories_json:
    is_thing_dict[category_json['id']] = bool(category_json['isthing'])

  # Check our assumption that the category ids are consecutive.
  # Usually metrics should be able to handle this case, but adding a warning
  # here.
  max_category_id = max(six.iterkeys(is_thing_dict))
  if len(is_thing_dict) != max_category_id + 1:
    seen_ids = six.viewkeys(is_thing_dict)
    all_ids = set(six.moves.range(max_category_id + 1))
    unseen_ids = all_ids.difference(seen_ids)
    if unseen_ids != {ignored_label}:
      logging.warning(
          'Nonconsecutive category ids or no category JSON specified for ids: '
          '%s', unseen_ids)

  is_thing_array = np.zeros(max_category_id + 1)
  for category_id, is_thing in six.iteritems(is_thing_dict):
    is_thing_array[category_id] = is_thing

  return is_thing_array 
Example #8
Source File: mock_text_cls_data.py    From delta with Apache License 2.0 6 votes vote down vote up
def after_download(self) -> bool:
    try:
      for data_type in self.samples_dict:

        samples = self.samples_dict[data_type]
        text_vocab_list = self.text_vocab_dict[data_type]

        train_file_path = os.path.join(self.data_dir,
                                       self.train_file.replace("txt", "") + data_type + ".txt")
        dev_file_path = os.path.join(self.data_dir,
                                     self.dev_file.replace("txt", "") + data_type + ".txt")
        test_file_path = os.path.join(self.data_dir,
                                      self.test_file.replace("txt", "") + data_type + ".txt")
        text_vocab_file = os.path.join(self.data_dir,
                                       self.text_vocab.replace("txt", "") + data_type + ".txt")

        mock_data(samples, train_file_path, dev_file_path, test_file_path, text_vocab_file, text_vocab_list)

    except Exception as e:
      logging.warning(traceback.format_exc())
      return False
    return True 
Example #9
Source File: generate.py    From mathematics_dataset with Apache License 2.0 6 votes vote down vote up
def main(unused_argv):
  """Prints Q&As from modules according to FLAGS.filter."""
  init_modules()

  text_wrapper = textwrap.TextWrapper(
      width=80, initial_indent=' ', subsequent_indent='  ')

  for regime, flat_modules in six.iteritems(filtered_modules):
    per_module = counts[regime]
    for module_name, module in six.iteritems(flat_modules):
      # These magic print constants make the header bold.
      print('\033[1m{}/{}\033[0m'.format(regime, module_name))
      num_dropped = 0
      for _ in range(per_module):
        problem, extra_dropped = sample_from_module(module)
        num_dropped += extra_dropped
        text = text_wrapper.fill(
            '{}  \033[92m{}\033[0m'.format(problem.question, problem.answer))
        print(text)
      if num_dropped > 0:
        logging.warning('Dropped %d examples', num_dropped) 
Example #10
Source File: kaldi_dir.py    From delta with Apache License 2.0 6 votes vote down vote up
def validate(self):
    ''' Sanity check. Make sure everything is (probably) OK. '''
    # TODO: more efficient and robust. Also check speakers.
    for utt_key in self.utts.keys():
      first_utt_key = utt_key
      break
    num_props = len(self.utts[first_utt_key])
    for utt_key, utt in self.utts.items():
      if len(utt) != num_props:
        logging.warning('Utt %s has unequal number of props with %s.' % \
                     (utt_key, first_utt_key))
        return False
      if 'spkid' not in utt:
        utt['spkid'] = self.spks[utt.spk].id
    logging.warning(
        'All utts have same number of props, data dir appears to be OK.')
    return True 
Example #11
Source File: config.py    From delta with Apache License 2.0 6 votes vote down vote up
def config_join_project_path(project_dir: str, config: dict,
                             key_path: List[Union[str, int]]):
  """join project dir on a path"""
  d = config
  try:
    for k in key_path[:-1]:
      d = d[k]
    original_path = d[key_path[-1]]
  except KeyError as e:
    logging.warning(f"key_path: {key_path} not found!")
    raise KeyError(repr(e))
  if isinstance(original_path, list):
    d[key_path[-1]] = [os.path.join(project_dir, p) for p in original_path]
  elif isinstance(original_path, str):
    d[key_path[-1]] = os.path.join(project_dir, original_path)
  else:
    logging.warning(f"key_path: {key_path} error.")
    raise TypeError("path is not str or list!") 
Example #12
Source File: premade.py    From lattice with Apache License 2.0 6 votes vote down vote up
def from_config(cls, config, custom_objects=None):
    model = super(CalibratedLinear, cls).from_config(
        config, custom_objects=custom_objects)
    try:
      model_config = tf.keras.utils.deserialize_keras_object(
          config.get('model_config'), custom_objects=custom_objects)
      premade_lib.verify_config(model_config)
      model.model_config = model_config
    except ValueError:
      logging.warning(
          'Could not load model_config. Constructing model without it: %s',
          str(config.get('model_config')))
    return model


# TODO: add support for tf.map_fn and inputs of shape (B, ?, input_dim)
# as well as non-ragged inputs using padding/mask. 
Example #13
Source File: executor.py    From tfx with Apache License 2.0 5 votes vote down vote up
def CheckBlessing(self, input_dict: Dict[Text, List[types.Artifact]]) -> bool:
    """Check that model is blessed by upstream validators.

    Args:
      input_dict: Input dict from input key to a list of artifacts:
        - model_blessing: A `ModelBlessing` artifact from model validator or
          evaluator.
          Pusher looks for a custom property `blessed` in the artifact to check
          it is safe to push.
        - infra_blessing: An `InfraBlessing` artifact from infra validator.
          Pusher looks for a custom proeprty `blessed` in the artifact to
          determine whether the model is mechanically servable from the model
          server to which Pusher is going to push.

    Returns:
      True if the model is blessed by validator.
    """
    # TODO(jyzhao): should this be in driver or executor.
    maybe_model_blessing = input_dict.get(MODEL_BLESSING_KEY)
    if maybe_model_blessing:
      model_blessing = artifact_utils.get_single_instance(maybe_model_blessing)
      if not model_utils.is_model_blessed(model_blessing):
        logging.info('Model on %s was not blessed by model validation',
                     model_blessing.uri)
        return False
    maybe_infra_blessing = input_dict.get(INFRA_BLESSING_KEY)
    if maybe_infra_blessing:
      infra_blessing = artifact_utils.get_single_instance(maybe_infra_blessing)
      if not model_utils.is_infra_validated(infra_blessing):
        logging.info('Model on %s was not blessed by infra validator',
                     model_blessing.uri)
        return False
    if not maybe_model_blessing and not maybe_infra_blessing:
      logging.warning('Pusher is going to push the model without validation. '
                      'Consider using Evaluator or InfraValidator in your '
                      'pipeline.')
    return True 
Example #14
Source File: register.py    From delta with Apache License 2.0 5 votes vote down vote up
def _handle_errors(errors):
  """Log out and possibly reraise errors during import."""
  if not errors:
    return
  for name, err in errors:
    logging.warning("Module {} import failed: {}".format(name, err))
  logging.fatal("Please check these modules.") 
Example #15
Source File: register.py    From delta with Apache License 2.0 5 votes vote down vote up
def __setitem__(self, key, value):
    if not callable(value):
      raise Exception("Value of a Registry must be a callable.")
    if key is None:
      key = value.__name__
    if key in self._dict:
      logging.warning("Key %s already in registry %s." % (key, self._name))
    self._dict[key] = value 
Example #16
Source File: mock_text_nlu_joint_data.py    From delta with Apache License 2.0 5 votes vote down vote up
def after_download(self) -> bool:
    try:
      train_file_path = os.path.join(self.data_dir, self.train_file)
      dev_file_path = os.path.join(self.data_dir, self.dev_file)
      test_file_path = os.path.join(self.data_dir, self.test_file)
      text_vocab_file = os.path.join(self.data_dir, self.text_vocab)

      mock_data(self.samples, train_file_path, dev_file_path,
                test_file_path, text_vocab_file, self.text_vocab_list)

    except Exception as e:
      logging.warning(traceback.format_exc())
      return False
    return True 
Example #17
Source File: atis.py    From delta with Apache License 2.0 5 votes vote down vote up
def after_download(self) -> bool:
    try:
      summary_joint_nlu_data(os.path.join(self.download_dir, "atis.train.pkl"),
                             os.path.join(self.data_dir, self.train_file))
      summary_joint_nlu_data(os.path.join(self.download_dir, "atis.test.pkl"),
                             os.path.join(self.data_dir, self.test_file))
    except Exception as e:

      logging.warning(traceback.format_exc())
      return False
    return True 
Example #18
Source File: atis.py    From delta with Apache License 2.0 5 votes vote down vote up
def download(self) -> bool:
    train_url = "https://github.com/howl-anderson/ATIS_dataset/raw/master/" \
                "data/raw_data/ms-cntk-atis/atis.train.pkl"
    test_url = "https://github.com/howl-anderson/ATIS_dataset/raw/master/" \
               "data/raw_data/ms-cntk-atis/atis.test.pkl"
    try:
      wget.download(train_url, self.download_dir)
      wget.download(test_url, self.download_dir)
    except Exception as e:
      logging.warning(repr(e))
      return False
    return True 
Example #19
Source File: atis2.py    From delta with Apache License 2.0 5 votes vote down vote up
def after_download(self) -> bool:
    try:
      shutil.move(os.path.join(self.download_dir, "JointSLU/data"),
                  os.path.join(self.download_dir, "origin_data"))
      shutil.rmtree(os.path.join(self.download_dir, "JointSLU"))
      self.to_standard_format(os.path.join(self.download_dir, self.train_download),
                              os.path.join(self.data_dir, self.train_file))
      self.to_standard_format(os.path.join(self.download_dir, self.dev_download),
                              os.path.join(self.data_dir, self.dev_file))
      self.to_standard_format(os.path.join(self.download_dir, self.test_download),
                              os.path.join(self.data_dir, self.test_file))
    except Exception as e:
      logging.warning(traceback.format_exc())
      return False
    return True 
Example #20
Source File: conll_2003.py    From delta with Apache License 2.0 5 votes vote down vote up
def after_download(self) -> bool:
    try:
      download_file = os.path.join(self.download_dir, "yahoo_answers_csv.tgz")
      os.system(f"tar zxvf {download_file}  -C {self.download_dir}")
      self.to_standard_format(os.path.join(self.download_dir, self.train_file),
                              os.path.join(self.data_dir, self.train_file))
      self.to_standard_format(os.path.join(self.download_dir, self.dev_file),
                              os.path.join(self.data_dir, self.dev_file))
      self.to_standard_format(os.path.join(self.download_dir, self.test_file),
                              os.path.join(self.data_dir, self.test_file))
    except Exception as e:
      logging.warning(traceback.format_exc())
      return False
    return True 
Example #21
Source File: conll_2003.py    From delta with Apache License 2.0 5 votes vote down vote up
def download(self) -> bool:
    train_url = "https://raw.githubusercontent.com/kyzhouhzau/BERT-NER/master/data/train.txt"
    dev_url = "https://raw.githubusercontent.com/kyzhouhzau/BERT-NER/master/data/dev.txt"
    test_url = "https://raw.githubusercontent.com/kyzhouhzau/BERT-NER/master/data/test.txt"
    try:
      wget.download(train_url, self.download_dir)
      wget.download(dev_url, self.download_dir)
      wget.download(test_url, self.download_dir)
    except Exception as e:
      logging.warning(repr(e))
      return False
    return True 
Example #22
Source File: layout_optimizer.py    From mesh with Apache License 2.0 5 votes vote down vote up
def evaluate_layout(self, layout):
    """The current objective value for the given layout.

    TODO(joshuawang): The current function does not check that the given
    layout is valid.

    Args:
      layout: a string, representing a layout to evaluate (e.g.
          "d_ff:m1;heads:m2").

    Returns:
      A float, the objective value.
    """
    layout_dict = {}
    if layout:
      for pair in layout.split(";"):
        mtf_dimension_name, mesh_dimension_name = pair.split(":", 1)
        if (mtf_dimension_name in
            self._layout_validator.splittable_mtf_dimension_names):
          layout_dict[mtf_dimension_name] = mesh_dimension_name
        else:
          logging.warning("Skipping unsplittable dimension %s.",
                          mtf_dimension_name)

    tensor_memory = {}  # {string: float}, size of each tensor under our layout
    for tensor_name in self._graph.get_all_tensor_names():
      if self._graph.is_tensor_on_canonical_device(tensor_name):
        tensor_memory[tensor_name] = self._graph.get_tensor_size(
            tensor_name, layout_dict,
            self._layout_validator.mesh_dimension_name_to_size)
      else:
        tensor_memory[tensor_name] = 0.0

    peak_memory_usage = 0.0
    for tensor_names in self._get_memory_contents():
      memory_usage = 0.0
      for tensor_name in tensor_names:
        memory_usage += tensor_memory[tensor_name]
      peak_memory_usage = max(peak_memory_usage, memory_usage)
    return peak_memory_usage 
Example #23
Source File: executor.py    From tfx with Apache License 2.0 5 votes vote down vote up
def _Cleanup(self):
    for cleanup in self._cleanups:
      try:
        cleanup()
      except:  # pylint: disable=broad-except, bare-except
        logging.warning('Error occurred during cleanup.', exc_info=True) 
Example #24
Source File: saved_model_v2_predictor.py    From tensor2robot with Apache License 2.0 5 votes vote down vote up
def restore(self):
    """Restores the model parameters from the latest available data."""

    logging.info('Trying to restore saved model from %s',
                 self._saved_model_path)
    # Get the expected assets filename.
    t2r_assets_dir = os.path.join(self._saved_model_path,
                                  tensorspec_utils.EXTRA_ASSETS_DIRECTORY)
    t2r_assets_filename = os.path.join(t2r_assets_dir,
                                       tensorspec_utils.T2R_ASSETS_FILENAME)

    start_time = time.time()
    while time.time() - start_time < self._timeout:
      # Check for the assets.extra/t2r_assets.pbtxt file which is materialized
      # last. Otherwise we should check for saved_model.pb
      if tf.io.gfile.exists(t2r_assets_filename):
        break

      logging.info('Waiting for a saved model to become available at %s.',
                   self._saved_model_path)
      time.sleep(_BUSY_WAITING_SLEEP_TIME_IN_SECS)
    else:
      logging.warning('No saved_model found after %s seconds.',
                      str(self._timeout))
      return False

    # Loading assets for features and labels.
    t2r_assets_file_path = os.path.join(self._saved_model_path,
                                        tensorspec_utils.EXTRA_ASSETS_DIRECTORY,
                                        tensorspec_utils.T2R_ASSETS_FILENAME)
    t2r_assets = tensorspec_utils.load_t2r_assets_to_file(t2r_assets_file_path)

    self._feature_spec = tensorspec_utils.TensorSpecStruct.from_proto(
        t2r_assets.feature_spec)  # pytype: disable=wrong-arg-types
    self._label_spec = tensorspec_utils.TensorSpecStruct.from_proto(
        t2r_assets.label_spec)  # pytype: disable=wrong-arg-types

    self._model = tf.saved_model.load(self._saved_model_path)
    return True 
Example #25
Source File: tensorspec_utils.py    From tensor2robot with Apache License 2.0 5 votes vote down vote up
def map_feed_dict_unsafe(feature_placeholders_spec, np_inputs_spec):
  """Deprecated function to create a feed_dict to be passed to session.run.

  tensorspec_utils.map_feed_dict should be used instead.  map_feed_dict_unsafe
  does not check that there is actually any agreement between
  feature_placeholders_spec or np_inputs spec in terms of dtype, shape
  or additional unused attributes within np_inputs_spec.

  Args:
    feature_placeholders_spec: An TensorSpecStruct containing
      {str: tf.placeholder}.
    np_inputs_spec: The numpy input according to the same spec.

  Returns:
    A mapping {placeholder: np.ndarray} which can be fed to a tensorflow
      session.run.
  """
  logging.warning('map_feed_dict_unsafe is deprecated. '
                  'Please update to map_feed_dict.')
  flat_spec = flatten_spec_structure(feature_placeholders_spec)
  flat_np_inputs = flatten_spec_structure(np_inputs_spec)
  for key, value in flat_np_inputs.items():
    if key not in flat_spec:
      logging.warn(
          'np_inputs has an input: %s, not found in the tensorspec.', key)
  feed_dict = {}
  for key, value in flat_spec.items():
    feed_dict[value] = flat_np_inputs[key]
  return feed_dict 
Example #26
Source File: sun.py    From datasets with Apache License 2.0 5 votes vote down vote up
def _decode_image(fobj, session, filename):
  """Reads and decodes an image from a file object as a Numpy array.

  The SUN dataset contains images in several formats (despite the fact that
  all of them have .jpg extension). Some of them are:
    - BMP (RGB)
    - PNG (grayscale, RGBA, RGB interlaced)
    - JPEG (RGB)
    - GIF (1-frame RGB)
  Since TFDS assumes that all images have the same number of channels, we
  convert all of them to RGB.

  Args:
    fobj: File object to read from.
    session: TF session used to decode the images.
    filename: Filename of the original image in the archive.

  Returns:
    Numpy array with shape (height, width, channels).
  """

  buf = fobj.read()
  image = tfds.core.lazy_imports.cv2.imdecode(
      np.fromstring(buf, dtype=np.uint8), flags=3)  # Note: Converts to RGB.
  if image is None:
    logging.warning(
        "Image %s could not be decoded by OpenCV, falling back to TF", filename)
    try:
      image = tf.image.decode_image(buf, channels=3)
      image = session.run(image)
    except tf.errors.InvalidArgumentError:
      logging.fatal("Image %s could not be decoded by Tensorflow", filename)

  # The GIF images contain a single frame.
  if len(image.shape) == 4:  # rank=4 -> rank=3
    image = image.reshape(image.shape[1:])

  return image 
Example #27
Source File: abstract_model.py    From tensor2robot with Apache License 2.0 5 votes vote down vote up
def default_init_from_checkpoint_fn(checkpoint,
                                    allow_partial_restore = False):
  """init_from_checkpoint_fn that can be used to init a model from a checkpoint.

  Args:
    checkpoint: String pointing to path of TF checkpoint.
    allow_partial_restore: If True, we allow partial restore, otherwise we raise
      an error if a variable cannot be restored.

  Raises:
    A ValueError if a variable(s) is missing and partial restore is not
    explicitly enabled.
  """
  logging.info('Initializing model weights from %s', checkpoint)
  reader = tf.train.load_checkpoint(checkpoint)
  variables_to_restore = contrib_framework.get_variables()
  assignment_map = {}
  for v in variables_to_restore:
    op_name = v.op.name
    if reader.has_tensor(op_name):
      logging.info('Loading variable %s from checkpoint', op_name)
      assignment_map[op_name] = v
    elif allow_partial_restore:
      logging.warning('Variable %s is not in the checkpoint, skipping.',
                      op_name)
    else:
      raise ValueError('Attempting to restore variable {} which is '
                       'not in the checkpoint.'.format(op_name))

  tf.train.init_from_checkpoint(checkpoint, assignment_map) 
Example #28
Source File: wmt.py    From datasets with Apache License 2.0 5 votes vote down vote up
def _parse_hindencorp(path):
  with tf.io.gfile.GFile(path) as f:
    for line_id, line in enumerate(f):
      split_line = line.split("\t")
      if len(split_line) != 5:
        logging.warning("Skipping invalid HindEnCorp line: %s", line)
        continue
      yield line_id, {
          "en": split_line[3].strip(),
          "hi": split_line[4].strip()
      } 
Example #29
Source File: gng_impl.py    From loaner with Apache License 2.0 5 votes vote down vote up
def load_constants_from_storage(self):
    """Attempts to load constants from Google Cloud Storage."""
    try:
      constants = self._storage_api.get_blob(
          self._config.constants_storage_path,
          self._config.bucket,
      )
    except storage.NotFoundError as err:
      logging.error('Constants were not found in storage: %s', err)
    else:
      for name in self._constants.keys():
        try:
          self._constants[name].value = constants[name]
        except ValueError:
          logging.warning(
              'The value %r for %r stored in Google Cloud Storage does not meet'
              ' the requirements. Using the default value...',
              constants[name], name)
        except KeyError:
          logging.info(
              'The key %r was not found in the stored constants, this may be '
              'because a new constant was added since your most recent '
              'configuration. To resolve run `configure` in the main menu.',
              name) 
Example #30
Source File: xsum.py    From datasets with Apache License 2.0 5 votes vote down vote up
def _generate_examples(self, split_ids=None, path=None):
    """Yields examples."""
    missing = 0
    total_num = len(split_ids)
    for i in split_ids:
      filename = os.path.join(path, i + ".data")
      if tf.io.gfile.exists(filename):
        with tf.io.gfile.GFile(filename) as f:
          text = "".join([
              line for line in f.readlines()
              if line not in _REMOVE_LINES and line.strip()
          ])
          # Each file follows below format:
          # [XSUM]URL[XSUM]
          # http://somelink
          #
          # [XSUM]INTRODUCTION[XSUM]
          # some intro
          #
          # [XSUM]RESTBODY[XSUM]
          # text line.
          # another text line.
          # "another text line."
          segs = text.split("[XSUM]")
          yield i, {_DOCUMENT: segs[6].strip(), _SUMMARY: segs[4].strip()}
      else:
        missing += 1
        logging.info("id %s missing.", i)
    if missing:
      logging.warning("%d out of %d examples are missing.", missing, total_num)