Python gzip.open() Examples

The following are 30 code examples of gzip.open(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gzip , or try the search function

Example #1

Source File: utils.py From deep-learning-note with MIT License

9 votes

def parse_data(path, dataset, flatten):
    if dataset != 'train' and dataset != 't10k':
        raise NameError('dataset must be train or t10k')

    label_file = os.path.join(path, dataset + '-labels-idx1-ubyte')
    with open(label_file, 'rb') as file:
        _, num = struct.unpack(">II", file.read(8))
        labels = np.fromfile(file, dtype=np.int8)  # int8
        new_labels = np.zeros((num, 10))
        new_labels[np.arange(num), labels] = 1

    img_file = os.path.join(path, dataset + '-images-idx3-ubyte')
    with open(img_file, 'rb') as file:
        _, num, rows, cols = struct.unpack(">IIII", file.read(16))
        imgs = np.fromfile(file, dtype=np.uint8).reshape(num, rows, cols)  # uint8
        imgs = imgs.astype(np.float32) / 255.0
        if flatten:
            imgs = imgs.reshape([num, -1])

    return imgs, new_labels

Example #2

Source File: core.py From neuropythy with GNU Affero General Public License v3.0

6 votes

def save_csv(filename, dat, index=False, **kw):
    '''
    save_csv(filename, d) writes a pandas dataframe d to a CSV file with the given name. If pandas
      cannot be loaded, then an error is raised. If d is not a dataframe, to_dataframe() is called
      on it.

    All optional arguments are passed along to the pandas.DataFrame.to_csv function.
    '''
    import pandas
    from neuropythy.util import to_dataframe
    d = to_dataframe(dat)
    if any(filename.endswith(s) for s in ('.gz', '.bz2', '.lzma')):
        with gzip.open(filename, 'wt', newlines='') as fl: d.to_csv(fl, index=index, **kw)
    else:
        with open(filename, 'wt') as fl: d.to_csv(fl, index=index, **kw)
    return dat

Example #3

Source File: ggtnn_train.py From gated-graph-transformer-network with MIT License

6 votes

def assemble_batch(story_fns, num_answer_words, format_spec):
    stories = []
    for sfn in story_fns:
        with gzip.open(sfn,'rb') as f:
            cvtd_story, _, _, _ = pickle.load(f)
        stories.append(cvtd_story)
    sents, graphs, queries, answers = zip(*stories)
    cvtd_sents = np.array(sents, np.int32)
    cvtd_queries = np.array(queries, np.int32)
    max_ans_len = max(len(a) for a in answers)
    cvtd_answers = np.stack([convert_answer(answer, num_answer_words, format_spec, max_ans_len) for answer in answers])
    num_new_nodes, new_node_strengths, new_node_ids, next_edges = zip(*graphs)
    num_new_nodes = np.stack(num_new_nodes)
    new_node_strengths = np.stack(new_node_strengths)
    new_node_ids = np.stack(new_node_ids)
    next_edges = np.stack(next_edges)
    return cvtd_sents, cvtd_queries, cvtd_answers, num_new_nodes, new_node_strengths, new_node_ids, next_edges

Example #4

Source File: input.py From DOTA_models with Apache License 2.0

6 votes

def extract_mnist_data(filename, num_images, image_size, pixel_depth):
  """
  Extract the images into a 4D tensor [image index, y, x, channels].

  Values are rescaled from [0, 255] down to [-0.5, 0.5].
  """
  # if not os.path.exists(file):
  if not tf.gfile.Exists(filename+".npy"):
    with gzip.open(filename) as bytestream:
      bytestream.read(16)
      buf = bytestream.read(image_size * image_size * num_images)
      data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
      data = (data - (pixel_depth / 2.0)) / pixel_depth
      data = data.reshape(num_images, image_size, image_size, 1)
      np.save(filename, data)
      return data
  else:
    with tf.gfile.Open(filename+".npy", mode='r') as file_obj:
      return np.load(file_obj)

Example #5

Source File: input_data.py From IntroToDeepLearning with MIT License

6 votes

def extract_images(filename):
  """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2051:
      raise ValueError(
          'Invalid magic number %d in MNIST image file: %s' %
          (magic, filename))
    num_images = _read32(bytestream)
    rows = _read32(bytestream)
    cols = _read32(bytestream)
    buf = bytestream.read(rows * cols * num_images)
    data = numpy.frombuffer(buf, dtype=numpy.uint8)
    data = data.reshape(num_images, rows, cols, 1)
    return data

Example #6

Source File: simplify_nq_data.py From natural-questions with Apache License 2.0

6 votes

def main(_):
  """Runs `text_utils.simplify_nq_example` over all shards of a split.

  Prints simplified examples to a single gzipped file in the same directory
  as the input shards.
  """
  split = os.path.basename(FLAGS.data_dir)
  outpath = os.path.join(FLAGS.data_dir,
                         "simplified-nq-{}.jsonl.gz".format(split))
  with gzip.open(outpath, "wb") as fout:
    num_processed = 0
    start = time.time()
    for inpath in glob.glob(os.path.join(FLAGS.data_dir, "nq-*-??.jsonl.gz")):
      print("Processing {}".format(inpath))
      with gzip.open(inpath, "rb") as fin:
        for l in fin:
          utf8_in = l.decode("utf8", "strict")
          utf8_out = json.dumps(
              text_utils.simplify_nq_example(json.loads(utf8_in))) + u"\n"
          fout.write(utf8_out.encode("utf8"))
          num_processed += 1
          if not num_processed % 100:
            print("Processed {} examples in {}.".format(num_processed,
                                                        time.time() - start))

Example #7

Source File: input_helpers.py From deep-siamese-text-similarity with MIT License

6 votes

def loadW2V(self,emb_path, type="bin"):
        print("Loading W2V data...")
        num_keys = 0
        if type=="textgz":
            # this seems faster than gensim non-binary load
            for line in gzip.open(emb_path):
                l = line.strip().split()
                st=l[0].lower()
                self.pre_emb[st]=np.asarray(l[1:])
            num_keys=len(self.pre_emb)
        if type=="text":
            # this seems faster than gensim non-binary load
            for line in open(emb_path):
                l = line.strip().split()
                st=l[0].lower()
                self.pre_emb[st]=np.asarray(l[1:])
            num_keys=len(self.pre_emb)
        else:
            self.pre_emb = Word2Vec.load_word2vec_format(emb_path,binary=True)
            self.pre_emb.init_sims(replace=True)
            num_keys=len(self.pre_emb.vocab)
        print("loaded word2vec len ", num_keys)
        gc.collect()

Example #8

Source File: log-parser.py From aws-waf-security-automations with Apache License 2.0

6 votes

def write_output(bucket_name, key_name, output_key_name, outstanding_requesters):
    logging.getLogger().debug('[write_output] Start')

    try:
        current_data = '/tmp/' + key_name.split('/')[-1] + '_LOCAL.json'
        with open(current_data, 'w') as outfile:
            json.dump(outstanding_requesters, outfile)

        s3 = boto3.client('s3')
        s3.upload_file(current_data, bucket_name, output_key_name, ExtraArgs={'ContentType': "application/json"})
        remove(current_data)

    except Exception as e:
        logging.getLogger().error("[write_output] \tError to write output file")
        logging.getLogger().error(e)

    logging.getLogger().debug('[write_output] End')

Example #9

Source File: input_helpers.py From deep-siamese-text-similarity with MIT License

6 votes

def dumpValidation(self,x1_text,x2_text,y,shuffled_index,dev_idx,i):
        print("dumping validation "+str(i))
        x1_shuffled=x1_text[shuffled_index]
        x2_shuffled=x2_text[shuffled_index]
        y_shuffled=y[shuffled_index]
        x1_dev=x1_shuffled[dev_idx:]
        x2_dev=x2_shuffled[dev_idx:]
        y_dev=y_shuffled[dev_idx:]
        del x1_shuffled
        del y_shuffled
        with open('validation.txt'+str(i),'w') as f:
            for text1,text2,label in zip(x1_dev,x2_dev,y_dev):
                f.write(str(label)+"\t"+text1+"\t"+text2+"\n")
            f.close()
        del x1_dev
        del y_dev
    
    # Data Preparatopn
    # ==================================================

Example #10

Source File: input_data.py From IntroToDeepLearning with MIT License

6 votes

def extract_images(filename):
  """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2051:
      raise ValueError(
          'Invalid magic number %d in MNIST image file: %s' %
          (magic, filename))
    num_images = _read32(bytestream)
    rows = _read32(bytestream)
    cols = _read32(bytestream)
    buf = bytestream.read(rows * cols * num_images)
    data = numpy.frombuffer(buf, dtype=numpy.uint8)
    data = data.reshape(num_images, rows, cols, 1)
    return data

Example #11

Source File: input_helpers.py From deep-siamese-text-similarity with MIT License

6 votes

def getTsvData(self, filepath):
        print("Loading training data from "+filepath)
        x1=[]
        x2=[]
        y=[]
        # positive samples from file
        for line in open(filepath):
            l=line.strip().split("\t")
            if len(l)<2:
                continue
            if random() > 0.5:
                x1.append(l[0].lower())
                x2.append(l[1].lower())
            else:
                x1.append(l[1].lower())
                x2.append(l[0].lower())
            y.append(int(l[2]))
        return np.asarray(x1),np.asarray(x2),np.asarray(y)

Example #12

Source File: wmt_utils.py From DOTA_models with Apache License 2.0

6 votes

def get_wmt_enfr_dev_set(directory):
  """Download the WMT en-fr training corpus to directory unless it's there."""
  dev_name = "newstest2013"
  dev_path = os.path.join(directory, dev_name)
  if not (tf.gfile.Exists(dev_path + ".fr") and
          tf.gfile.Exists(dev_path + ".en")):
    dev_file = maybe_download(directory, "dev-v2.tgz", _WMT_ENFR_DEV_URL)
    print "Extracting tgz file %s" % dev_file
    with tarfile.open(dev_file, "r:gz") as dev_tar:
      fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr")
      en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en")
      fr_dev_file.name = dev_name + ".fr"  # Extract without "dev/" prefix.
      en_dev_file.name = dev_name + ".en"
      dev_tar.extract(fr_dev_file, directory)
      dev_tar.extract(en_dev_file, directory)
  return dev_path

Example #13

Source File: dataset_tool.py From disentangling_conditional_gans with MIT License

6 votes

def create_mnist(tfrecord_dir, mnist_dir):
    print('Loading MNIST from "%s"' % mnist_dir)
    import gzip
    with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
        images = np.frombuffer(file.read(), np.uint8, offset=16)
    with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file:
        labels = np.frombuffer(file.read(), np.uint8, offset=8)
    images = images.reshape(-1, 1, 28, 28)
    images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0)
    assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8
    assert labels.shape == (60000,) and labels.dtype == np.uint8
    assert np.min(images) == 0 and np.max(images) == 255
    assert np.min(labels) == 0 and np.max(labels) == 9
    onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
    onehot[np.arange(labels.size), labels] = 1.0
    
    with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            tfr.add_image(images[order[idx]])
        tfr.add_labels(onehot[order])

#----------------------------------------------------------------------------

Example #14

Source File: dataset_tool.py From disentangling_conditional_gans with MIT License

6 votes

def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123):
    print('Loading MNIST from "%s"' % mnist_dir)
    import gzip
    with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
        images = np.frombuffer(file.read(), np.uint8, offset=16)
    images = images.reshape(-1, 28, 28)
    images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0)
    assert images.shape == (60000, 32, 32) and images.dtype == np.uint8
    assert np.min(images) == 0 and np.max(images) == 255
    
    with TFRecordExporter(tfrecord_dir, num_images) as tfr:
        rnd = np.random.RandomState(random_seed)
        for idx in range(num_images):
            tfr.add_image(images[rnd.randint(images.shape[0], size=3)])

#----------------------------------------------------------------------------

Example #15

Source File: dataset_tool.py From disentangling_conditional_gans with MIT License

6 votes

def create_cifar100(tfrecord_dir, cifar100_dir):
    print('Loading CIFAR-100 from "%s"' % cifar100_dir)
    import pickle
    with open(os.path.join(cifar100_dir, 'train'), 'rb') as file:
        data = pickle.load(file, encoding='latin1')
    images = data['data'].reshape(-1, 3, 32, 32)
    labels = np.array(data['fine_labels'])
    assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8
    assert labels.shape == (50000,) and labels.dtype == np.int32
    assert np.min(images) == 0 and np.max(images) == 255
    assert np.min(labels) == 0 and np.max(labels) == 99
    onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
    onehot[np.arange(labels.size), labels] = 1.0

    with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            tfr.add_image(images[order[idx]])
        tfr.add_labels(onehot[order])

#----------------------------------------------------------------------------

Example #16

Source File: dataset_tool.py From disentangling_conditional_gans with MIT License

6 votes

def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121):
    print('Loading CelebA from "%s"' % celeba_dir)
    glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png')
    image_filenames = sorted(glob.glob(glob_pattern))
    expected_images = 202599
    if len(image_filenames) != expected_images:
        error('Expected to find %d images' % expected_images)
    
    with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            img = np.asarray(PIL.Image.open(image_filenames[order[idx]]))
            assert img.shape == (218, 178, 3)
            img = img[cy - 64 : cy + 64, cx - 64 : cx + 64]
            img = img.transpose(2, 0, 1) # HWC => CHW
            tfr.add_image(img)

#----------------------------------------------------------------------------

Example #17

Source File: download_and_convert_mnist.py From DOTA_models with Apache License 2.0

6 votes

def _extract_labels(filename, num_labels):
  """Extract the labels into a vector of int64 label IDs.

  Args:
    filename: The path to an MNIST labels file.
    num_labels: The number of labels in the file.

  Returns:
    A numpy array of shape [number_of_labels]
  """
  print('Extracting labels from: ', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(8)
    buf = bytestream.read(1 * num_labels)
    labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
  return labels

Example #18

Source File: download_and_convert_mnist.py From DOTA_models with Apache License 2.0

6 votes

def _extract_images(filename, num_images):
  """Extract the images into a numpy array.

  Args:
    filename: The path to an MNIST images file.
    num_images: The number of images in the file.

  Returns:
    A numpy array of shape [number_of_images, height, width, channels].
  """
  print('Extracting images from: ', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(16)
    buf = bytestream.read(
        _IMAGE_SIZE * _IMAGE_SIZE * num_images * _NUM_CHANNELS)
    data = np.frombuffer(buf, dtype=np.uint8)
    data = data.reshape(num_images, _IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)
  return data

Example #19

Source File: test_utils.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def discard_stderr():
    """
    Discards error output of a routine if invoked as:

    with discard_stderr():
        ...
    """
    with open(os.devnull, 'w') as bit_bucket:
        try:
            stderr_fileno = sys.stderr.fileno()
            old_stderr = os.dup(stderr_fileno)
            try:
                os.dup2(bit_bucket.fileno(), stderr_fileno)
                yield
            finally:
                os.dup2(old_stderr, stderr_fileno)
        except AttributeError:
            # On some systems is stderr not a file descriptor but actually a virtual pipeline
            # that can not be copied
            yield

Example #20

Source File: datasets.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def _get_data(self):
        if any(not os.path.exists(path) or not check_sha1(path, sha1)
               for path, sha1 in ((os.path.join(self._root, name), sha1)
                                  for name, sha1 in self._train_data + self._test_data)):
            namespace = 'gluon/dataset/'+self._namespace
            filename = download(_get_repo_file_url(namespace, self._archive_file[0]),
                                path=self._root,
                                sha1_hash=self._archive_file[1])

            with tarfile.open(filename) as tar:
                tar.extractall(self._root)

        if self._train:
            data_files = self._train_data
        else:
            data_files = self._test_data
        data, label = zip(*(self._read_batch(os.path.join(self._root, name))
                            for name, _ in data_files))
        data = np.concatenate(data)
        label = np.concatenate(label)

        self._data = nd.array(data, dtype=data.dtype)
        self._label = label

Example #21

Source File: create_joint_gs.py From CAMISIM with Apache License 2.0

6 votes

def create_gsa_mapping(path, metadata, sample_name, shuffle):
    """
    Creates the binning gold standard/gsa mapping
    """
    to_genome = name_to_genome(metadata)
    gsa_path = os.path.join(path, "anonymous_gsa.fasta") #
    count = 0
    if not os.path.exists(gsa_path):
        gsa_path = os.path.join(path, "anonymous_gsa.fasta.gz") # if zipped
        with gzip.open(gsa_path,'r') as gsa:
            for line in gsa:
                if line.startswith('>'):
                    count += 1
        with gzip.open(gsa_path,'r') as gsa:
            gsa_temp = shuffle_anonymize(gsa, path, to_genome, metadata, sample_name, count, shuffle)
    else:
        with open(gsa_path,'r') as gsa:
            for line in gsa:
                if line.startswith('>'):
                    count += 1
        with open(gsa_path,'r') as gsa:
            gsa_temp = shuffle_anonymize(gsa, path, to_genome, metadata, sample_name, count, shuffle)
    os.rename(gsa_temp, gsa_path)

Example #22

Source File: get_genomes.py From CAMISIM with Apache License 2.0

6 votes

def read_genomes_list(genomes_path, additional_file = None):
    genomes_map = {}
    total_genomes = 0
    if additional_file is not None:
        with open(additional_file,'r') as add:
            for line in add:
                ncbi_id, sci_name, path, novelty = line.strip().split('\t')
                if ncbi_id in genomes_map:
                    genomes_map[ncbi_id][1].append(path)
                else:
                    genomes_map[ncbi_id] = (sci_name, [path], novelty) # this might not be a http path
                total_genomes += 1
    with open(genomes_path,'r') as genomes:
        for line in genomes:
            ncbi_id, sci_name, ftp = line.strip().split('\t')
            http = ftp.replace("ftp://","http://") # not using ftp address but http (proxies)
            if ncbi_id in genomes_map:
                genomes_map[ncbi_id][1].append(http)
            else:
                genomes_map[ncbi_id] = (sci_name, [http], 'known_strain') # sci_name is always the same for same taxid (?)
            total_genomes += 1
    return genomes_map, total_genomes

Example #23

Source File: get_genomes.py From CAMISIM with Apache License 2.0

6 votes

def download_genome(genome, out_path):
    genome_path = os.path.join(out_path,"genomes")
    out_name = genome.rstrip().split('/')[-1]
    http_address = os.path.join(genome, out_name + "_genomic.fna.gz")
    opened = urllib2.urlopen(http_address)
    out = os.path.join(genome_path, out_name + ".fa")
    tmp_out = os.path.join(genome_path, out_name + "tmp.fa")
    out_gz = out + ".gz"
    with open(out_gz,'wb') as outF:
        outF.write(opened.read())
    gf = gzip.open(out_gz)
    new_out = open(tmp_out,'wb')
    new_out.write(gf.read())
    gf.close()
    os.remove(out_gz)
    new_out.close()
    split_by_N(tmp_out, out)
    return out

Example #24

Source File: datasets.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def _get_data(self):
        if self._train:
            data, label = self._train_data, self._train_label
        else:
            data, label = self._test_data, self._test_label

        namespace = 'gluon/dataset/'+self._namespace
        data_file = download(_get_repo_file_url(namespace, data[0]),
                             path=self._root,
                             sha1_hash=data[1])
        label_file = download(_get_repo_file_url(namespace, label[0]),
                              path=self._root,
                              sha1_hash=label[1])

        with gzip.open(label_file, 'rb') as fin:
            struct.unpack(">II", fin.read(8))
            label = np.frombuffer(fin.read(), dtype=np.uint8).astype(np.int32)

        with gzip.open(data_file, 'rb') as fin:
            struct.unpack(">IIII", fin.read(16))
            data = np.frombuffer(fin.read(), dtype=np.uint8)
            data = data.reshape(len(label), 28, 28, 1)

        self._data = nd.array(data, dtype=data.dtype)
        self._label = label

Example #25

Source File: core.py From neuropythy with GNU Affero General Public License v3.0

6 votes

def load_string(filename, to=None):
    '''
    load_string(filename) loads the given file as a string. The optional argument to can be set to
      'lines' to load a list of lines or to 'bytes' to load the data as a byte-string.
    '''
    if to is None: to = 'string'
    to = to.lower()
    if to in ['string', 'str', 'text', 'txt', 't', 's']:
        with open(filename, 'r') as fl:
            return fl.read()
    elif to in ['binary', 'bytes', 'b', 'raw']:
        with open(filename, 'rb') as fl:
            return fl.read()
    elif to in ['lines', 'l']:
        with open(filename, 'r') as fl:
            return fl.read().splitlines()

Example #26

Source File: core.py From neuropythy with GNU Affero General Public License v3.0

6 votes

def load_json(filename, to='auto'):
    '''
    load_json(filename) yields the object represented by the json file or stream object filename.
    
    The optional argument to may be set to None to indicate that the JSON data should be returned
    verbatim rather than parsed by neuropythy's denormalize system.
    '''
    from neuropythy.util import denormalize as denorm
    if pimms.is_str(filename):
        try:
            with gzip.open(filename, 'rt') as fl: dat = json.load(fl)
        except Exception:
            with open(filename, 'rt') as fl: dat = json.load(fl)
    else:
        dat = json.load(filename)
        filename = '<stream>'
    if to is None: return dat
    elif to == 'auto': return denorm(dat)
    else: raise ValueError('unrecognized to option: %s' % to)

Example #27

Source File: core.py From neuropythy with GNU Affero General Public License v3.0

6 votes

def save_json(filename, obj, normalize=True):
    '''
    save_json(filename, obj) writes the given object to the given filename (or stream) in a
      normalized JSON format.

    The optional argument normalize (default True) may be set to False to prevent the object from
    being run through neuropythy's normalize system.
    '''
    from neuropythy.util import normalize as norm
    dat = norm(obj) if normalize else obj
    if pimms.is_str(filename):
        jsonstr = json.dumps(dat)
        if any(filename.endswith(s) for s in ('.gz', '.bz2', '.lzma')):
            with gzip.open(filename, 'wt') as fl: fl.write(jsonstr)
        else:
            with open(filename, 'wt') as fl: fl.write(jsonstr)
    else: json.dump(dat, filename)
    return filename

Example #28

Source File: core.py From neuropythy with GNU Affero General Public License v3.0

6 votes

def save_tsv(filename, dat, sep='\t', index=False, **kw):
    '''
    save_tsv(filename, d) writes a pandas dataframe d to a TSV file with the given name. If pandas
      cannot be loaded, then an error is raised. If d is not a dataframe, to_dataframe() is called
      on it.

    All optional arguments are passed along to the pandas.DataFrame.to_csv function. Note that this
    function is identical to save_csv() except that it has a default sep value of '\t' instead of
    ','.
    '''
    import pandas
    from neuropythy.util import to_dataframe
    d = to_dataframe(dat)
    if any(filename.endswith(s) for s in ('.gz', '.bz2', '.lzma')):
        with gzip.open(filename, 'wt', newlines='') as fl: d.to_csv(fl, sep=sep, index=index, **kw)
    else:
        with open(filename, 'wt') as fl: d.to_csv(fl, sep=sep, index=index, **kw)
    return dat

# Nifti!

Example #29

Source File: app.py From svviz with MIT License

6 votes

def saveState(dataHub):
    import pickle as pickle
    import gzip

    pickle.dump(dataHub, gzip.open(dataHub.args.save_state, "wb"))
    logging.warn("^"*20 + " saving state to pickle and exiting " + "^"*20)

Example #30

Source File: capsulenet.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def read_data(label_url, image_url):
    with gzip.open(download_data(label_url)) as flbl:
        magic, num = struct.unpack(">II", flbl.read(8))
        label = np.fromstring(flbl.read(), dtype=np.int8)
    with gzip.open(download_data(image_url), 'rb') as fimg:
        magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
        image = np.fromstring(fimg.read(), dtype=np.uint8).reshape(len(label), rows, cols)
    return label, image