Python tarfile.open() Examples

The following are 30 code examples of tarfile.open(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tarfile , or try the search function .
Example #1
Source File: device.py    From Paradrop with Apache License 2.0 10 votes vote down vote up
def create(ctx):
    """
    Install a chute from the working directory.
    """
    url = "{}/chutes/".format(ctx.obj['base_url'])
    headers = {'Content-Type': 'application/x-tar'}

    if not os.path.exists("paradrop.yaml"):
        raise Exception("No paradrop.yaml file found in working directory.")

    with tempfile.TemporaryFile() as temp:
        tar = tarfile.open(fileobj=temp, mode="w")
        for dirName, subdirList, fileList in os.walk('.'):
            for fname in fileList:
                path = os.path.join(dirName, fname)
                arcname = os.path.normpath(path)
                tar.add(path, arcname=arcname)
        tar.close()

        temp.seek(0)
        res = router_request("POST", url, headers=headers, data=temp)
        data = res.json()
        ctx.invoke(watch, change_id=data['change_id']) 
Example #2
Source File: notify.py    From wechat-alfred-workflow with MIT License 7 votes vote down vote up
def convert_image(inpath, outpath, size):
    """Convert an image file using ``sips``.

    Args:
        inpath (str): Path of source file.
        outpath (str): Path to destination file.
        size (int): Width and height of destination image in pixels.

    Raises:
        RuntimeError: Raised if ``sips`` exits with non-zero status.
    """
    cmd = [
        b'sips',
        b'-z', str(size), str(size),
        inpath,
        b'--out', outpath]
    # log().debug(cmd)
    with open(os.devnull, 'w') as pipe:
        retcode = subprocess.call(cmd, stdout=pipe, stderr=subprocess.STDOUT)

    if retcode != 0:
        raise RuntimeError('sips exited with %d' % retcode) 
Example #3
Source File: dataset_utils.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def download_and_uncompress_tarball(tarball_url, dataset_dir):
  """Downloads the `tarball_url` and uncompresses it locally.

  Args:
    tarball_url: The URL of a tarball file.
    dataset_dir: The directory where the temporary files are stored.
  """
  filename = tarball_url.split('/')[-1]
  filepath = os.path.join(dataset_dir, filename)

  def _progress(count, block_size, total_size):
    sys.stdout.write('\r>> Downloading %s %.1f%%' % (
        filename, float(count * block_size) / float(total_size) * 100.0))
    sys.stdout.flush()
  filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)
  print()
  statinfo = os.stat(filepath)
  print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
  tarfile.open(filepath, 'r:gz').extractall(dataset_dir) 
Example #4
Source File: persistence.py    From multibootusb with GNU General Public License v2.0 6 votes vote down vote up
def detect_missing_tools(distro):
    tools_dir = os.path.join('data', 'tools')
    if platform.system() == 'Windows':
        _7zip_exe = gen.resource_path(
            os.path.join(tools_dir, '7zip', '7z.exe'))
        e2fsck_exe = gen.resource_path(os.path.join(tools_dir, 'cygwin', 'e2fsck.exe'))
        resize2fs_exe = gen.resource_path(os.path.join(tools_dir, 'cygwin', 'resize2fs.exe'))
    else:
        _7zip_exe = '7z'
        e2fsck_exe = 'e2fsck'
        resize2fs_exe = 'resize2fs'
    if distro not in creator_dict or \
       creator_dict[distro][0] is not create_persistence_using_resize2fs:
        return None
    try:
        with open(os.devnull) as devnull:
            for tool in [e2fsck_exe, resize2fs_exe]:
                p = subprocess.Popen([tool], stdout=devnull, stderr=devnull)
                p.communicate()
    except FileNotFoundError:  # Windows
        return "'%s.exe' is not installed or not available for use." % tool
    except OSError:            # Linux
        return "'%s' is not installed or not available for use." % tool
    return None 
Example #5
Source File: node.py    From Paradrop with Apache License 2.0 6 votes vote down vote up
def update_chute(ctx, directory):
    """
    Install a new version of the chute from the working directory.

    Install the files in the current directory as a chute on the node.
    The directory must contain a paradrop.yaml file.  The entire directory
    will be copied to the node for installation.
    """
    os.chdir(directory)

    if not os.path.exists("paradrop.yaml"):
        raise Exception("No paradrop.yaml file found in chute directory.")

    with open('paradrop.yaml', 'r') as source:
        config = yaml.safe_load(source)

    if 'name' not in config:
        click.echo('Chute name is not defined in paradrop.yaml.')
        return

    client = ctx.obj['client']
    with tempfile.TemporaryFile() as temp:
        tar = tarfile.open(fileobj=temp, mode="w")
        for dirName, subdirList, fileList in os.walk("."):
            for fname in fileList:
                path = os.path.join(dirName, fname)
                arcname = os.path.normpath(path)
                tar.add(path, arcname=arcname)
        tar.close()

        temp.seek(0)
        result = client.install_tar(temp, name=config['name'])
        ctx.invoke(watch_change_logs, change_id=result['change_id']) 
Example #6
Source File: archive.py    From CAMISIM with Apache License 2.0 6 votes vote down vote up
def __init__(self, default_compression="gz", logfile=None, verbose=True):
        """
        Constructor

        @attention:

        @param default_compression: default compression used for files
        @type default_compression: str | unicode
        @param logfile: file handler or file path to a log file
        @type logfile: file | io.FileIO | StringIO.StringIO | basestring
        @param verbose: Not verbose means that only warnings and errors will be past to stream
        @type verbose: bool

        @return: None
        @rtype: None
        """
        assert logfile is None or isinstance(logfile, basestring) or self.is_stream(logfile)
        assert isinstance(default_compression, basestring), "separator must be string"
        assert isinstance(verbose, bool), "verbose must be true or false"
        assert default_compression.lower() in self._open, "Unknown compression: '{}'".format(default_compression)

        super(Archive, self).__init__(label="Archive", default_compression=default_compression, logfile=logfile, verbose=verbose)

        self._open['tar'] = tarfile.open
        self._default_compression = default_compression 
Example #7
Source File: style_transfer.py    From fine-lm with MIT License 6 votes vote down vote up
def generate_samples(self, data_dir, tmp_dir, dataset_split):
    dataset = self.dataset_url(dataset_split)

    tag = "train" if dataset_split == problem.DatasetSplit.TRAIN else "dev"

    url = dataset[0][0]
    compressed_filename = os.path.basename(url)
    compressed_filepath = os.path.join(tmp_dir, compressed_filename)
    generator_utils.maybe_download(tmp_dir, compressed_filename, url)

    mode = "r:gz" if compressed_filepath.endswith("gz") else "r"
    with tarfile.open(compressed_filepath, mode) as corpus_tar:
      corpus_tar.extractall(tmp_dir)

    if self.vocab_type == text_problems.VocabType.SUBWORD:
      generator_utils.get_or_generate_vocab(
          data_dir, tmp_dir, self.vocab_filename, self.approx_vocab_size,
          self.vocab_data_files())

    source_file = os.path.join(tmp_dir, tag + ".modern")
    target_file = os.path.join(tmp_dir, tag + ".original")
    return text_problems.text2text_txt_iterator(source_file,
                                                target_file) 
Example #8
Source File: cnn_dailymail.py    From fine-lm with MIT License 6 votes vote down vote up
def write_raw_text_to_files(all_files, urls_path, tmp_dir, is_training):
  """Write text to files."""

  def write_to_file(all_files, urls_path, tmp_dir, filename):
    with io.open(os.path.join(tmp_dir, filename + ".source"), "w") as fstory:
      with io.open(os.path.join(tmp_dir, filename + ".target"),
                   "w") as fsummary:
        for example in example_generator(all_files, urls_path, sum_token=True):
          story, summary = _story_summary_split(example)
          fstory.write(story + "\n")
          fsummary.write(summary + "\n")

  filename = "cnndm.train" if is_training else "cnndm.dev"
  tf.logging.info("Writing %s" % filename)
  write_to_file(all_files, urls_path, tmp_dir, filename)

  if not is_training:
    test_urls_path = generator_utils.maybe_download(tmp_dir, "all_test.txt",
                                                    _TEST_URLS)
    filename = "cnndm.test"
    tf.logging.info("Writing %s" % filename)
    write_to_file(all_files, test_urls_path, tmp_dir, filename) 
Example #9
Source File: babi_qa.py    From fine-lm with MIT License 6 votes vote down vote up
def _prepare_babi_data(tmp_dir, data_dir):
  """Downloads and extracts the dataset.

  Args:
    tmp_dir: temp directory to download and extract the dataset
    data_dir: The base directory where data and vocab files are stored.

  Returns:
    tmp_dir: temp directory containing the raw data.
  """
  if not tf.gfile.Exists(data_dir):
    tf.gfile.MakeDirs(data_dir)

  # TODO(dehghani@): find a solution for blocking user-agent (download)
  file_path = generator_utils.maybe_download(tmp_dir, _TAR, _URL)
  tar = tarfile.open(file_path)
  tar.extractall(tmp_dir)
  tar.close()

  return tmp_dir 
Example #10
Source File: bair_robot_pushing.py    From fine-lm with MIT License 6 votes vote down vote up
def generate_samples(self, data_dir, tmp_dir, dataset_split):
    path = generator_utils.maybe_download(
        tmp_dir, os.path.basename(DATA_URL), DATA_URL)

    tar = tarfile.open(path)
    tar.extractall(tmp_dir)
    tar.close()

    if dataset_split == problem.DatasetSplit.TRAIN:
      base_dir = os.path.join(tmp_dir, "softmotion30_44k/train/*")
    else:
      base_dir = os.path.join(tmp_dir, "softmotion30_44k/test/*")

    filenames = tf.gfile.Glob(base_dir)
    for frame_number, frame, state, action in self.parse_frames(filenames):
      yield {
          "frame_number": [frame_number],
          "frame": frame,
          "state": state,
          "action": action,
      } 
Example #11
Source File: common_voice.py    From fine-lm with MIT License 6 votes vote down vote up
def _collect_data(directory):
  """Traverses directory collecting input and target files.

  Args:
   directory: base path to extracted audio and transcripts.
  Returns:
   list of (media_base, media_filepath, label) tuples
  """
  # Returns:
  data_files = []
  transcripts = [
      filename for filename in os.listdir(directory)
      if filename.endswith(".csv")
  ]
  for transcript in transcripts:
    transcript_path = os.path.join(directory, transcript)
    with open(transcript_path, "r") as transcript_file:
      transcript_reader = csv.reader(transcript_file)
      _ = transcript_reader.next()  # Skip headers.
      for transcript_line in transcript_reader:
        media_name, label = transcript_line[0:2]
        filename = os.path.join(directory, media_name)
        data_files.append((media_name, filename, label))
  return data_files 
Example #12
Source File: librispeech.py    From fine-lm with MIT License 6 votes vote down vote up
def _collect_data(directory, input_ext, transcription_ext):
  """Traverses directory collecting input and target files."""
  # Directory from string to tuple pair of strings
  # key: the filepath to a datafile including the datafile's basename. Example,
  #   if the datafile was "/path/to/datafile.wav" then the key would be
  #   "/path/to/datafile"
  # value: a pair of strings (media_filepath, label)
  data_files = dict()
  for root, _, filenames in os.walk(directory):
    transcripts = [filename for filename in filenames
                   if transcription_ext in filename]
    for transcript in transcripts:
      transcript_path = os.path.join(root, transcript)
      with open(transcript_path, "r") as transcript_file:
        for transcript_line in transcript_file:
          line_contents = transcript_line.strip().split(" ", 1)
          media_base, label = line_contents
          key = os.path.join(root, media_base)
          assert key not in data_files
          media_name = "%s.%s"%(media_base, input_ext)
          media_path = os.path.join(root, media_name)
          data_files[key] = (media_base, media_path, label)
  return data_files 
Example #13
Source File: download.py    From nmp_qc with MIT License 6 votes vote down vote up
def download_figshare(file_name, file_ext, dir_path='./', change_name = None):
    prepare_data_dir(dir_path)
    url = 'https://ndownloader.figshare.com/files/' + file_name
    wget.download(url, out=dir_path)
    file_path = os.path.join(dir_path, file_name)

    if file_ext == '.zip':
        zip_ref = zipfile.ZipFile(file_path,'r')
        if change_name is not None:
            dir_path = os.path.join(dir_path, change_name)
        zip_ref.extractall(dir_path)
        zip_ref.close()
        os.remove(file_path)
    elif file_ext == '.tar.bz2':
        tar_ref = tarfile.open(file_path,'r:bz2')
        if change_name is not None:
            dir_path = os.path.join(dir_path, change_name)
        tar_ref.extractall(dir_path)
        tar_ref.close()
        os.remove(file_path)
    elif change_name is not None:
        os.rename(file_path, os.path.join(dir_path, change_name))

# Download QM9 dataset 
Example #14
Source File: node.py    From Paradrop with Apache License 2.0 6 votes vote down vote up
def import_ssh_key(ctx, path, user):
    """
    Add an authorized key from a public key file.

    PATH must be a path to a public key file, which corresponds to
    a private key that SSH can use for authentication. Typically,
    ssh-keygen will place the public key in "~/.ssh/id_rsa.pub".
    """
    client = ctx.obj['client']
    with open(path, 'r') as source:
        key_string = source.read().strip()

    match = re.search("-----BEGIN \w+ PRIVATE KEY-----", key_string)
    if match is not None:
        print("The path ({}) contains a private key.".format(path))
        print("Please provide the path to your public key.")
        return None

    result = client.add_ssh_key(key_string, user=user)
    if result is not None:
        print("Added public key from {}".format(path))

    return result 
Example #15
Source File: device.py    From Paradrop with Apache License 2.0 6 votes vote down vote up
def reconfigure(ctx):
    """
    Reconfigure the chute without rebuilding.
    """
    url = ctx.obj['chute_url'] + "/config"

    if not os.path.exists("paradrop.yaml"):
        raise Exception("No paradrop.yaml file found in working directory.")

    with open("paradrop.yaml", "r") as source:
        data = yaml.safe_load(source)
        config = data.get('config', {})

    res = router_request("PUT", url, json=config)
    data = res.json()
    ctx.invoke(watch, change_id=data['change_id']) 
Example #16
Source File: rooter.py    From ToonRooter with MIT License 6 votes vote down vote up
def write_payload(self):
        port = self._port
        tar_path = self.create_payload_tar()

        log.debug(port.read_until("/ # "))
        port.write("base64 -d | tar zxf -\n")
        port.flush()
        #(tarr, tarw) = os.pipe()
        #tar = tarfile.open(mode='w|gz', fileobj=tarw)
        #tar.add("payload/patch_toon.sh")

        log.info("Transferring payload")
        with open(tar_path, 'r') as f:
            base64.encode(f, port)

        os.remove(tar_path)

        port.flush()
        port.reset_input_buffer()
        port.write("\x04")
        port.flush() 
Example #17
Source File: data.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def get_caltech101_data():
    url = "https://s3.us-east-2.amazonaws.com/mxnet-public/101_ObjectCategories.tar.gz"
    dataset_name = "101_ObjectCategories"
    data_folder = "data"
    if not os.path.isdir(data_folder):
        os.makedirs(data_folder)
    tar_path = mx.gluon.utils.download(url, path=data_folder)
    if (not os.path.isdir(os.path.join(data_folder, "101_ObjectCategories")) or
        not os.path.isdir(os.path.join(data_folder, "101_ObjectCategories_test"))):
        tar = tarfile.open(tar_path, "r:gz")
        tar.extractall(data_folder)
        tar.close()
        print('Data extracted')
    training_path = os.path.join(data_folder, dataset_name)
    testing_path = os.path.join(data_folder, "{}_test".format(dataset_name))
    return training_path, testing_path 
Example #18
Source File: super_resolution.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def resolve(ctx):
    from PIL import Image
    if isinstance(ctx, list):
        ctx = [ctx[0]]
    net.load_parameters('superres.params', ctx=ctx)
    img = Image.open(opt.resolve_img).convert('YCbCr')
    y, cb, cr = img.split()
    data = mx.nd.expand_dims(mx.nd.expand_dims(mx.nd.array(y), axis=0), axis=0)
    out_img_y = mx.nd.reshape(net(data), shape=(-3, -2)).asnumpy()
    out_img_y = out_img_y.clip(0, 255)
    out_img_y = Image.fromarray(np.uint8(out_img_y[0]), mode='L')

    out_img_cb = cb.resize(out_img_y.size, Image.BICUBIC)
    out_img_cr = cr.resize(out_img_y.size, Image.BICUBIC)
    out_img = Image.merge('YCbCr', [out_img_y, out_img_cb, out_img_cr]).convert('RGB')

    out_img.save('resolved.png') 
Example #19
Source File: datasets.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def _get_data(self):
        if self._train:
            data, label = self._train_data, self._train_label
        else:
            data, label = self._test_data, self._test_label

        namespace = 'gluon/dataset/'+self._namespace
        data_file = download(_get_repo_file_url(namespace, data[0]),
                             path=self._root,
                             sha1_hash=data[1])
        label_file = download(_get_repo_file_url(namespace, label[0]),
                              path=self._root,
                              sha1_hash=label[1])

        with gzip.open(label_file, 'rb') as fin:
            struct.unpack(">II", fin.read(8))
            label = np.frombuffer(fin.read(), dtype=np.uint8).astype(np.int32)

        with gzip.open(data_file, 'rb') as fin:
            struct.unpack(">IIII", fin.read(16))
            data = np.frombuffer(fin.read(), dtype=np.uint8)
            data = data.reshape(len(label), 28, 28, 1)

        self._data = nd.array(data, dtype=data.dtype)
        self._label = label 
Example #20
Source File: datasets.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def _get_data(self):
        if any(not os.path.exists(path) or not check_sha1(path, sha1)
               for path, sha1 in ((os.path.join(self._root, name), sha1)
                                  for name, sha1 in self._train_data + self._test_data)):
            namespace = 'gluon/dataset/'+self._namespace
            filename = download(_get_repo_file_url(namespace, self._archive_file[0]),
                                path=self._root,
                                sha1_hash=self._archive_file[1])

            with tarfile.open(filename) as tar:
                tar.extractall(self._root)

        if self._train:
            data_files = self._train_data
        else:
            data_files = self._test_data
        data, label = zip(*(self._read_batch(os.path.join(self._root, name))
                            for name, _ in data_files))
        data = np.concatenate(data)
        label = np.concatenate(label)

        self._data = nd.array(data, dtype=data.dtype)
        self._label = label 
Example #21
Source File: wmt_utils.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def get_wmt_enfr_dev_set(directory):
  """Download the WMT en-fr training corpus to directory unless it's there."""
  dev_name = "newstest2013"
  dev_path = os.path.join(directory, dev_name)
  if not (tf.gfile.Exists(dev_path + ".fr") and
          tf.gfile.Exists(dev_path + ".en")):
    dev_file = maybe_download(directory, "dev-v2.tgz", _WMT_ENFR_DEV_URL)
    print "Extracting tgz file %s" % dev_file
    with tarfile.open(dev_file, "r:gz") as dev_tar:
      fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr")
      en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en")
      fr_dev_file.name = dev_name + ".fr"  # Extract without "dev/" prefix.
      en_dev_file.name = dev_name + ".en"
      dev_tar.extract(fr_dev_file, directory)
      dev_tar.extract(en_dev_file, directory)
  return dev_path 
Example #22
Source File: test_image.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_image_detiter(self):
        im_list = [_generate_objects() + [x] for x in TestImage.IMAGES]
        det_iter = mx.image.ImageDetIter(2, (3, 300, 300), imglist=im_list, path_root='')
        for _ in range(3):
            for batch in det_iter:
                pass
            det_iter.reset()

        val_iter = mx.image.ImageDetIter(2, (3, 300, 300), imglist=im_list, path_root='')
        det_iter = val_iter.sync_label_shape(det_iter)

        # test file list
        fname = './data/test_imagedetiter.lst'
        im_list = [[k] + _generate_objects() + [x] for k, x in enumerate(TestImage.IMAGES)]
        with open(fname, 'w') as f:
            for line in im_list:
                line = '\t'.join([str(k) for k in line])
                f.write(line + '\n')

        det_iter = mx.image.ImageDetIter(2, (3, 400, 400), path_imglist=fname,
            path_root='')
        for batch in det_iter:
            pass 
Example #23
Source File: input.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def extract_mnist_data(filename, num_images, image_size, pixel_depth):
  """
  Extract the images into a 4D tensor [image index, y, x, channels].

  Values are rescaled from [0, 255] down to [-0.5, 0.5].
  """
  # if not os.path.exists(file):
  if not tf.gfile.Exists(filename+".npy"):
    with gzip.open(filename) as bytestream:
      bytestream.read(16)
      buf = bytestream.read(image_size * image_size * num_images)
      data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
      data = (data - (pixel_depth / 2.0)) / pixel_depth
      data = data.reshape(num_images, image_size, image_size, 1)
      np.save(filename, data)
      return data
  else:
    with tf.gfile.Open(filename+".npy", mode='r') as file_obj:
      return np.load(file_obj) 
Example #24
Source File: cifar10.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def maybe_download_and_extract():
  """Download and extract the tarball from Alex's website."""
  dest_directory = FLAGS.data_dir
  if not os.path.exists(dest_directory):
    os.makedirs(dest_directory)
  filename = DATA_URL.split('/')[-1]
  filepath = os.path.join(dest_directory, filename)
  if not os.path.exists(filepath):
    def _progress(count, block_size, total_size):
      sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
          float(count * block_size) / float(total_size) * 100.0))
      sys.stdout.flush()
    filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
    print()
    statinfo = os.stat(filepath)
    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
  extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')
  if not os.path.exists(extracted_dir_path):
    tarfile.open(filepath, 'r:gz').extractall(dest_directory) 
Example #25
Source File: download_and_convert_cifar10.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def _download_and_uncompress_dataset(dataset_dir):
  """Downloads cifar10 and uncompresses it locally.

  Args:
    dataset_dir: The directory where the temporary files are stored.
  """
  filename = _DATA_URL.split('/')[-1]
  filepath = os.path.join(dataset_dir, filename)

  if not os.path.exists(filepath):
    def _progress(count, block_size, total_size):
      sys.stdout.write('\r>> Downloading %s %.1f%%' % (
          filename, float(count * block_size) / float(total_size) * 100.0))
      sys.stdout.flush()
    filepath, _ = urllib.request.urlretrieve(_DATA_URL, filepath, _progress)
    print()
    statinfo = os.stat(filepath)
    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
    tarfile.open(filepath, 'r:gz').extractall(dataset_dir) 
Example #26
Source File: ptb.py    From fine-lm with MIT License 6 votes vote down vote up
def _build_vocab(filename, vocab_path, vocab_size):
  """Reads a file to build a vocabulary of `vocab_size` most common words.

   The vocabulary is sorted by occurrence count and has one word per line.
   Originally from:
   https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/reader.py

  Args:
    filename: file to read list of words from.
    vocab_path: path where to save the vocabulary.
    vocab_size: size of the vocabulary to generate.
  """
  data = _read_words(filename)
  counter = collections.Counter(data)
  count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
  words, _ = list(zip(*count_pairs))
  words = words[:vocab_size]
  with open(vocab_path, "w") as f:
    f.write("\n".join(words)) 
Example #27
Source File: input.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def extract_mnist_labels(filename, num_images):
  """
  Extract the labels into a vector of int64 label IDs.
  """
  # if not os.path.exists(file):
  if not tf.gfile.Exists(filename+".npy"):
    with gzip.open(filename) as bytestream:
      bytestream.read(8)
      buf = bytestream.read(1 * num_images)
      labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int32)
      np.save(filename, labels)
    return labels
  else:
    with tf.gfile.Open(filename+".npy", mode='r') as file_obj:
      return np.load(file_obj) 
Example #28
Source File: wmt_utils.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def gunzip_file(gz_path, new_path):
  """Unzips from gz_path into new_path."""
  print "Unpacking %s to %s" % (gz_path, new_path)
  with gzip.open(gz_path, "rb") as gz_file:
    with open(new_path, "wb") as new_file:
      for line in gz_file:
        new_file.write(line) 
Example #29
Source File: device.py    From Paradrop with Apache License 2.0 5 votes vote down vote up
def add(ctx, path):
    """
    Add an authorized key from a file.
    """
    url = '{sshkeys_url}/{sshkeys_user}'.format(**ctx.obj)
    with open(path, 'r') as source:
        key_string = source.read().strip()
        data = {
            'key': key_string
        }

        result = router_request("POST", url, json=data, dump=False)
        if result.ok:
            data = result.json()
            print("Added: " + data.get('key', '')) 
Example #30
Source File: cifar.py    From fine-lm with MIT License 5 votes vote down vote up
def _get_cifar(directory, url):
  """Download and extract CIFAR to directory unless it is there."""
  filename = os.path.basename(url)
  path = generator_utils.maybe_download(directory, filename, url)
  tarfile.open(path, "r:gz").extractall(directory)