Python gzip.open() Examples

The following are code examples for showing how to use gzip.open(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: Coulomb   Author: DynamoDS   File: data_files_to_sessions.py    MIT License 8 votes vote down vote up
def flush(session_maps):
    lns_count = 0
    for sessions_lst in session_maps.values():
        lns_count += len(sessions_lst)

    log ("Flushing lines/sessions: " + str(lns_count) + " / " + str(len(session_maps.keys())))

    # Verify that the folders exist
    for session_id in sorted(session_maps.keys()):
        log ("Flushing session: " + session_id)
        sessionPath = ensure_have_session(session_id)
        log ("Session path: " + sessionPath)
            
        o = gzip.open(sessionPath, 'ab')
        for ln in session_maps[session_id]:
            assert (json.loads(ln)["SessionID"] == session_id)
            o.write(ln)
        o.flush()
        log ("Flushing complete for: " + session_id)

    log ("Flushing complete. Total sessions:\t" + str(len(sessionIDSet)) + "\tTotal new sessions:\t" + str(len(newSessionIDSet))) 
Example 2
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 7 votes vote down vote up
def test_stream_padding(self):
        # Test for bug #1543303.
        tar = tarfile.open(tmpname, self.mode)
        tar.close()

        if self.mode.endswith("gz"):
            fobj = gzip.GzipFile(tmpname)
            data = fobj.read()
            fobj.close()
        elif self.mode.endswith("bz2"):
            dec = bz2.BZ2Decompressor()
            data = open(tmpname, "rb").read()
            data = dec.decompress(data)
            self.assertTrue(len(dec.unused_data) == 0,
                    "found trailing data")
        else:
            fobj = open(tmpname, "rb")
            data = fobj.read()
            fobj.close()

        self.assertTrue(data.count("\0") == tarfile.RECORDSIZE,
                         "incorrect zero padding") 
Example 3
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_extract_hardlink(self):
        # Test hardlink extraction (e.g. bug #857297).
        with tarfile.open(tarname, errorlevel=1, encoding="iso8859-1") as tar:
            tar.extract("ustar/regtype", TEMPDIR)
            self.addCleanup(os.remove, os.path.join(TEMPDIR, "ustar/regtype"))

            tar.extract("ustar/lnktype", TEMPDIR)
            self.addCleanup(os.remove, os.path.join(TEMPDIR, "ustar/lnktype"))
            with open(os.path.join(TEMPDIR, "ustar/lnktype"), "rb") as f:
                data = f.read()
            self.assertEqual(md5sum(data), md5_regtype)

            tar.extract("ustar/symtype", TEMPDIR)
            self.addCleanup(os.remove, os.path.join(TEMPDIR, "ustar/symtype"))
            with open(os.path.join(TEMPDIR, "ustar/symtype"), "rb") as f:
                data = f.read()
            self.assertEqual(md5sum(data), md5_regtype) 
Example 4
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_init_close_fobj(self):
        # Issue #7341: Close the internal file object in the TarFile
        # constructor in case of an error. For the test we rely on
        # the fact that opening an empty file raises a ReadError.
        empty = os.path.join(TEMPDIR, "empty")
        open(empty, "wb").write("")

        try:
            tar = object.__new__(tarfile.TarFile)
            try:
                tar.__init__(empty)
            except tarfile.ReadError:
                self.assertTrue(tar.fileobj.closed)
            else:
                self.fail("ReadError not raised")
        finally:
            os.remove(empty) 
Example 5
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_compare_members(self):
        tar1 = tarfile.open(tarname, encoding="iso8859-1")
        tar2 = self.tar

        while True:
            t1 = tar1.next()
            t2 = tar2.next()
            if t1 is None:
                break
            self.assertTrue(t2 is not None, "stream.next() failed.")

            if t2.islnk() or t2.issym():
                self.assertRaises(tarfile.StreamError, tar2.extractfile, t2)
                continue

            v1 = tar1.extractfile(t1)
            v2 = tar2.extractfile(t2)
            if v1 is None:
                continue
            self.assertTrue(v2 is not None, "stream.extractfile() failed")
            self.assertTrue(v1.read() == v2.read(), "stream extraction failed")

        tar1.close() 
Example 6
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_pax_global_headers(self):
        tar = tarfile.open(tarname, encoding="iso8859-1")

        tarinfo = tar.getmember("pax/regtype1")
        self.assertEqual(tarinfo.uname, "foo")
        self.assertEqual(tarinfo.gname, "bar")
        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"�������")

        tarinfo = tar.getmember("pax/regtype2")
        self.assertEqual(tarinfo.uname, "")
        self.assertEqual(tarinfo.gname, "bar")
        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"�������")

        tarinfo = tar.getmember("pax/regtype3")
        self.assertEqual(tarinfo.uname, "tarfile")
        self.assertEqual(tarinfo.gname, "tarfile")
        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"�������") 
Example 7
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_link_size(self):
        if hasattr(os, "link"):
            link = os.path.join(TEMPDIR, "link")
            target = os.path.join(TEMPDIR, "link_target")
            fobj = open(target, "wb")
            fobj.write("aaa")
            fobj.close()
            os.link(target, link)
            try:
                tar = tarfile.open(tmpname, self.mode)
                # Record the link target in the inodes list.
                tar.gettarinfo(target)
                tarinfo = tar.gettarinfo(link)
                self.assertEqual(tarinfo.size, 0)
            finally:
                os.remove(target)
                os.remove(link) 
Example 8
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_exclude(self):
        tempdir = os.path.join(TEMPDIR, "exclude")
        os.mkdir(tempdir)
        try:
            for name in ("foo", "bar", "baz"):
                name = os.path.join(tempdir, name)
                open(name, "wb").close()

            exclude = os.path.isfile

            tar = tarfile.open(tmpname, self.mode, encoding="iso8859-1")
            with test_support.check_warnings(("use the filter argument",
                                              DeprecationWarning)):
                tar.add(tempdir, arcname="empty_dir", exclude=exclude)
            tar.close()

            tar = tarfile.open(tmpname, "r")
            self.assertEqual(len(tar.getmembers()), 1)
            self.assertEqual(tar.getnames()[0], "empty_dir")
        finally:
            shutil.rmtree(tempdir) 
Example 9
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def _test_pathname(self, path, cmp_path=None, dir=False):
        # Create a tarfile with an empty member named path
        # and compare the stored name with the original.
        foo = os.path.join(TEMPDIR, "foo")
        if not dir:
            open(foo, "w").close()
        else:
            os.mkdir(foo)

        tar = tarfile.open(tmpname, self.mode)
        tar.add(foo, arcname=path)
        tar.close()

        tar = tarfile.open(tmpname, "r")
        t = tar.next()
        tar.close()

        if not dir:
            os.remove(foo)
        else:
            os.rmdir(foo)

        self.assertEqual(t.name, cmp_path or path.replace(os.sep, "/")) 
Example 10
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_cwd(self):
        # Test adding the current working directory.
        cwd = os.getcwd()
        os.chdir(TEMPDIR)
        try:
            open("foo", "w").close()

            tar = tarfile.open(tmpname, self.mode)
            tar.add(".")
            tar.close()

            tar = tarfile.open(tmpname, "r")
            for t in tar:
                self.assertTrue(t.name == "." or t.name.startswith("./"))
            tar.close()
        finally:
            os.chdir(cwd) 
Example 11
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def _test(self, name, link=None):
        tarinfo = tarfile.TarInfo(name)
        if link:
            tarinfo.linkname = link
            tarinfo.type = tarfile.LNKTYPE

        tar = tarfile.open(tmpname, "w")
        tar.format = tarfile.GNU_FORMAT
        tar.addfile(tarinfo)

        v1 = self._calc_size(name, link)
        v2 = tar.offset
        self.assertTrue(v1 == v2, "GNU longname/longlink creation failed")

        tar.close()

        tar = tarfile.open(tmpname)
        member = tar.next()
        self.assertIsNotNone(member,
                "unable to read longname member")
        self.assertEqual(tarinfo.name, member.name,
                "unable to read longname member")
        self.assertEqual(tarinfo.linkname, member.linkname,
                "unable to read longname member") 
Example 12
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_pax_extended_header(self):
        # The fields from the pax header have priority over the
        # TarInfo.
        pax_headers = {u"path": u"foo", u"uid": u"123"}

        tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1")
        t = tarfile.TarInfo()
        t.name = u"���"     # non-ASCII
        t.uid = 8**8        # too large
        t.pax_headers = pax_headers
        tar.addfile(t)
        tar.close()

        tar = tarfile.open(tmpname, encoding="iso8859-1")
        t = tar.getmembers()[0]
        self.assertEqual(t.pax_headers, pax_headers)
        self.assertEqual(t.name, "foo")
        self.assertEqual(t.uid, 123) 
Example 13
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_unicode_filename_error(self):
        tar = tarfile.open(tmpname, "w", format=self.format, encoding="ascii", errors="strict")
        tarinfo = tarfile.TarInfo()

        tarinfo.name = "���"
        if self.format == tarfile.PAX_FORMAT:
            self.assertRaises(UnicodeError, tar.addfile, tarinfo)
        else:
            tar.addfile(tarinfo)

        tarinfo.name = u"���"
        self.assertRaises(UnicodeError, tar.addfile, tarinfo)

        tarinfo.name = "foo"
        tarinfo.uname = u"���"
        self.assertRaises(UnicodeError, tar.addfile, tarinfo) 
Example 14
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def _test_partial_input(self, mode):
        class MyStringIO(StringIO.StringIO):
            hit_eof = False
            def read(self, n):
                if self.hit_eof:
                    raise AssertionError("infinite loop detected in tarfile.open()")
                self.hit_eof = self.pos == self.len
                return StringIO.StringIO.read(self, n)
            def seek(self, *args):
                self.hit_eof = False
                return StringIO.StringIO.seek(self, *args)

        data = bz2.compress(tarfile.TarInfo("foo").tobuf())
        for x in range(len(data) + 1):
            try:
                tarfile.open(fileobj=MyStringIO(data[:x]), mode=mode)
            except tarfile.ReadError:
                pass # we have no interest in ReadErrors 
Example 15
Project: natural-questions   Author: google-research-datasets   File: simplify_nq_data.py    Apache License 2.0 6 votes vote down vote up
def main(_):
  """Runs `text_utils.simplify_nq_example` over all shards of a split.

  Prints simplified examples to a single gzipped file in the same directory
  as the input shards.
  """
  split = os.path.basename(FLAGS.data_dir)
  outpath = os.path.join(FLAGS.data_dir,
                         "simplified-nq-{}.jsonl.gz".format(split))
  with gzip.open(outpath, "wb") as fout:
    num_processed = 0
    start = time.time()
    for inpath in glob.glob(os.path.join(FLAGS.data_dir, "nq-*-??.jsonl.gz")):
      print("Processing {}".format(inpath))
      with gzip.open(inpath, "rb") as fin:
        for l in fin:
          utf8_in = l.decode("utf8", "strict")
          utf8_out = json.dumps(
              text_utils.simplify_nq_example(json.loads(utf8_in))) + u"\n"
          fout.write(utf8_out.encode("utf8"))
          num_processed += 1
          if not num_processed % 100:
            print("Processed {} examples in {}.".format(num_processed,
                                                        time.time() - start)) 
Example 16
Project: deep-siamese-text-similarity   Author: dhwajraj   File: input_helpers.py    MIT License 6 votes vote down vote up
def loadW2V(self,emb_path, type="bin"):
        print("Loading W2V data...")
        num_keys = 0
        if type=="textgz":
            # this seems faster than gensim non-binary load
            for line in gzip.open(emb_path):
                l = line.strip().split()
                st=l[0].lower()
                self.pre_emb[st]=np.asarray(l[1:])
            num_keys=len(self.pre_emb)
        if type=="text":
            # this seems faster than gensim non-binary load
            for line in open(emb_path):
                l = line.strip().split()
                st=l[0].lower()
                self.pre_emb[st]=np.asarray(l[1:])
            num_keys=len(self.pre_emb)
        else:
            self.pre_emb = Word2Vec.load_word2vec_format(emb_path,binary=True)
            self.pre_emb.init_sims(replace=True)
            num_keys=len(self.pre_emb.vocab)
        print("loaded word2vec len ", num_keys)
        gc.collect() 
Example 17
Project: deep-siamese-text-similarity   Author: dhwajraj   File: input_helpers.py    MIT License 6 votes vote down vote up
def getTsvData(self, filepath):
        print("Loading training data from "+filepath)
        x1=[]
        x2=[]
        y=[]
        # positive samples from file
        for line in open(filepath):
            l=line.strip().split("\t")
            if len(l)<2:
                continue
            if random() > 0.5:
                x1.append(l[0].lower())
                x2.append(l[1].lower())
            else:
                x1.append(l[1].lower())
                x2.append(l[0].lower())
            y.append(int(l[2]))
        return np.asarray(x1),np.asarray(x2),np.asarray(y) 
Example 18
Project: deep-siamese-text-similarity   Author: dhwajraj   File: input_helpers.py    MIT License 6 votes vote down vote up
def dumpValidation(self,x1_text,x2_text,y,shuffled_index,dev_idx,i):
        print("dumping validation "+str(i))
        x1_shuffled=x1_text[shuffled_index]
        x2_shuffled=x2_text[shuffled_index]
        y_shuffled=y[shuffled_index]
        x1_dev=x1_shuffled[dev_idx:]
        x2_dev=x2_shuffled[dev_idx:]
        y_dev=y_shuffled[dev_idx:]
        del x1_shuffled
        del y_shuffled
        with open('validation.txt'+str(i),'w') as f:
            for text1,text2,label in zip(x1_dev,x2_dev,y_dev):
                f.write(str(label)+"\t"+text1+"\t"+text2+"\n")
            f.close()
        del x1_dev
        del y_dev
    
    # Data Preparatopn
    # ================================================== 
Example 19
Project: Coulomb   Author: DynamoDS   File: uses_list_at_level.py    MIT License 5 votes vote down vote up
def updateResultFile():
    outF = open(outPath, 'w')
    outF.write("Date, No L@L, L@L\n")
    for k in sorted(dtWsCount):
        v = dtWsCount[k]
        outF.write(k + ", " + str(v[False]) + ", " + str(v[True]) + "\n")
        outF.flush() 
Example 20
Project: Coulomb   Author: DynamoDS   File: extract_sessions_buffered.py    MIT License 5 votes vote down vote up
def flush(outpath, session_maps):

    lns_count = 0
    for sessions_lst in session_maps.values():
        lns_count += len(sessions_lst)

    log ("Flushing lines/sessions: " + str(lns_count) + " / " + str(len(session_maps.keys())))

    # Verify that the folders exist
    for session in session_maps.keys():
        sessions_folder_path = join(outPath, session[0:3])
        if sessions_folder_path not in existing_sessions_path:
            if not os.path.exists(sessions_folder_path):
                os.makedirs(sessions_folder_path)
            existing_sessions_path.add(sessions_folder_path)

        sessionPath = join(sessions_folder_path, session + ".gz")
        if not os.path.exists(sessionPath):
            newSessionIDSet.add(session)
            

        o = gzip.open(sessionPath, 'a')
        # o = open(join(outPath, session + ".json"), 'a')
        for ln in session_maps[session]:
            o.write(ln)
        o.flush()

        f = open(completedInputListPath, 'a')
        for filePath in completedInputFiles_buffer:
            completedInputFiles.add(filePath)
            f.write(filePath + "\n")
        f.flush()
        completedInputFiles_buffer.clear()

    log ("Flushing complete. Total sessions:\t" + str(len(sessionIDSet)) + "\tTotal new sessions:\t" + str(len(newSessionIDSet))) 
Example 21
Project: Coulomb   Author: DynamoDS   File: session_merger.py    MIT License 5 votes vote down vote up
def countLinesInGzipFile(path):
  nr_lines = 0
  with gzip.open(path) as f:
    for _ in f:
      nr_lines = nr_lines + 1
  return nr_lines 
Example 22
Project: pyblish-win   Author: pyblish   File: fileinput.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def hook_compressed(filename, mode):
    ext = os.path.splitext(filename)[1]
    if ext == '.gz':
        import gzip
        return gzip.open(filename, mode)
    elif ext == '.bz2':
        import bz2
        return bz2.BZ2File(filename, mode)
    else:
        return open(filename, mode) 
Example 23
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def setUp(self):
        self.tar = tarfile.open(self.tarname, mode=self.mode, encoding="iso8859-1") 
Example 24
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_fileobj_readlines(self):
        self.tar.extract("ustar/regtype", TEMPDIR)
        tarinfo = self.tar.getmember("ustar/regtype")
        fobj1 = open(os.path.join(TEMPDIR, "ustar/regtype"), "rU")
        fobj2 = self.tar.extractfile(tarinfo)

        lines1 = fobj1.readlines()
        lines2 = fobj2.readlines()
        self.assertTrue(lines1 == lines2,
                "fileobj.readlines() failed")
        self.assertTrue(len(lines2) == 114,
                "fileobj.readlines() failed")
        self.assertTrue(lines2[83] ==
                "I will gladly admit that Python is not the fastest running scripting language.\n",
                "fileobj.readlines() failed") 
Example 25
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def setUp(self):
        self.tar = tarfile.open(self.tarname, mode=self.mode) 
Example 26
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_empty_tarfile(self):
        # Test for issue6123: Allow opening empty archives.
        # This test checks if tarfile.open() is able to open an empty tar
        # archive successfully. Note that an empty tar archive is not the
        # same as an empty file!
        tarfile.open(tmpname, self.mode.replace("r", "w")).close()
        try:
            tar = tarfile.open(tmpname, self.mode)
            tar.getnames()
        except tarfile.ReadError:
            self.fail("tarfile.open() failed on empty archive")
        self.assertListEqual(tar.getmembers(), []) 
Example 27
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_null_tarfile(self):
        # Test for issue6123: Allow opening empty archives.
        # This test guarantees that tarfile.open() does not treat an empty
        # file as an empty tar archive.
        open(tmpname, "wb").close()
        self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, self.mode)
        self.assertRaises(tarfile.ReadError, tarfile.open, tmpname) 
Example 28
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_non_existent_tarfile(self):
        # Test for issue11513: prevent non-existent gzipped tarfiles raising
        # multiple exceptions.
        exctype = OSError if '|' in self.mode else IOError
        with self.assertRaisesRegexp(exctype, "xxx") as ex:
            tarfile.open("xxx", self.mode)
        self.assertEqual(ex.exception.errno, errno.ENOENT) 
Example 29
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_no_name_argument(self):
        fobj = open(self.tarname, "rb")
        tar = tarfile.open(fileobj=fobj, mode=self.mode)
        self.assertEqual(tar.name, os.path.abspath(fobj.name)) 
Example 30
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_no_name_attribute(self):
        data = open(self.tarname, "rb").read()
        fobj = StringIO.StringIO(data)
        self.assertRaises(AttributeError, getattr, fobj, "name")
        tar = tarfile.open(fileobj=fobj, mode=self.mode)
        self.assertEqual(tar.name, None) 
Example 31
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_empty_name_attribute(self):
        data = open(self.tarname, "rb").read()
        fobj = StringIO.StringIO(data)
        fobj.name = ""
        tar = tarfile.open(fileobj=fobj, mode=self.mode)
        self.assertEqual(tar.name, None) 
Example 32
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_illegal_mode_arg(self):
        with open(tmpname, 'wb'):
            pass
        self.addCleanup(os.unlink, tmpname)
        with self.assertRaisesRegexp(ValueError, 'mode must be '):
            tar = self.taropen(tmpname, 'q')
        with self.assertRaisesRegexp(ValueError, 'mode must be '):
            tar = self.taropen(tmpname, 'rw')
        with self.assertRaisesRegexp(ValueError, 'mode must be '):
            tar = self.taropen(tmpname, '') 
Example 33
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_fileobj_with_offset(self):
        # Skip the first member and store values from the second member
        # of the testtar.
        tar = tarfile.open(self.tarname, mode=self.mode)
        tar.next()
        t = tar.next()
        name = t.name
        offset = t.offset
        data = tar.extractfile(t).read()
        tar.close()

        # Open the testtar and seek to the offset of the second member.
        if self.mode.endswith(":gz"):
            _open = gzip.GzipFile
        elif self.mode.endswith(":bz2"):
            _open = bz2.BZ2File
        else:
            _open = open
        fobj = _open(self.tarname, "rb")
        fobj.seek(offset)

        # Test if the tarfile starts with the second member.
        tar = tar.open(self.tarname, mode="r:", fileobj=fobj)
        t = tar.next()
        self.assertEqual(t.name, name)
        # Read to the end of fileobj and test if seeking back to the
        # beginning works.
        tar.getmembers()
        self.assertEqual(tar.extractfile(t).read(), data,
                "seek back did not work")
        tar.close() 
Example 34
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_extractall(self):
        # Test if extractall() correctly restores directory permissions
        # and times (see issue1735).
        tar = tarfile.open(tarname, encoding="iso8859-1")
        directories = [t for t in tar if t.isdir()]
        tar.extractall(TEMPDIR, directories)
        for tarinfo in directories:
            path = os.path.join(TEMPDIR, tarinfo.name)
            if sys.platform != "win32":
                # Win32 has no support for fine grained permissions.
                self.assertEqual(tarinfo.mode & 0777, os.stat(path).st_mode & 0777)
            self.assertEqual(tarinfo.mtime, os.path.getmtime(path))
        tar.close() 
Example 35
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_parallel_iteration(self):
        # Issue #16601: Restarting iteration over tarfile continued
        # from where it left off.
        with tarfile.open(self.tarname) as tar:
            for m1, m2 in zip(tar, tar):
                self.assertEqual(m1.offset, m2.offset)
                self.assertEqual(m1.name, m2.name) 
Example 36
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def _testfunc_fileobj(self, name, mode):
        try:
            tarfile.open(name, mode, fileobj=open(name, "rb"))
        except tarfile.ReadError:
            self.fail() 
Example 37
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def _test_modes(self, testfunc):
        testfunc(tarname, "r")
        testfunc(tarname, "r:")
        testfunc(tarname, "r:*")
        testfunc(tarname, "r|")
        testfunc(tarname, "r|*")

        if gzip:
            self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r:gz")
            self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r|gz")
            self.assertRaises(tarfile.ReadError, tarfile.open, gzipname, mode="r:")
            self.assertRaises(tarfile.ReadError, tarfile.open, gzipname, mode="r|")

            testfunc(gzipname, "r")
            testfunc(gzipname, "r:*")
            testfunc(gzipname, "r:gz")
            testfunc(gzipname, "r|*")
            testfunc(gzipname, "r|gz")

        if bz2:
            self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r:bz2")
            self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r|bz2")
            self.assertRaises(tarfile.ReadError, tarfile.open, bz2name, mode="r:")
            self.assertRaises(tarfile.ReadError, tarfile.open, bz2name, mode="r|")

            testfunc(bz2name, "r")
            testfunc(bz2name, "r:*")
            testfunc(bz2name, "r:bz2")
            testfunc(bz2name, "r|*")
            testfunc(bz2name, "r|bz2") 
Example 38
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_detect_stream_bz2(self):
        # Originally, tarfile's stream detection looked for the string
        # "BZh91" at the start of the file. This is incorrect because
        # the '9' represents the blocksize (900kB). If the file was
        # compressed using another blocksize autodetection fails.
        with open(tarname, "rb") as fobj:
            data = fobj.read()

        # Compress with blocksize 100kB, the file starts with "BZh11".
        with bz2.BZ2File(tmpname, "wb", compresslevel=1) as fobj:
            fobj.write(data)

        self._testfunc_file(tmpname, "r|*") 
Example 39
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_find_pax_umlauts(self):
        self.tar = tarfile.open(self.tarname, mode=self.mode, encoding="iso8859-1")
        tarinfo = self.tar.getmember("pax/umlauts-�������")
        self._test_member(tarinfo, size=7011, chksum=md5_regtype) 
Example 40
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_truncated_longname(self):
        longname = self.subdir + "/" + "123/" * 125 + "longname"
        tarinfo = self.tar.getmember(longname)
        offset = tarinfo.offset
        self.tar.fileobj.seek(offset)
        fobj = StringIO.StringIO(self.tar.fileobj.read(3 * 512))
        self.assertRaises(tarfile.ReadError, tarfile.open, name="foo.tar", fileobj=fobj) 
Example 41
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_pax_number_fields(self):
        # All following number fields are read from the pax header.
        tar = tarfile.open(tarname, encoding="iso8859-1")
        tarinfo = tar.getmember("pax/regtype4")
        self.assertEqual(tarinfo.size, 7011)
        self.assertEqual(tarinfo.uid, 123)
        self.assertEqual(tarinfo.gid, 123)
        self.assertEqual(tarinfo.mtime, 1041808783.0)
        self.assertEqual(type(tarinfo.mtime), float)
        self.assertEqual(float(tarinfo.pax_headers["atime"]), 1041808783.0)
        self.assertEqual(float(tarinfo.pax_headers["ctime"]), 1041808783.0) 
Example 42
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_fileobj_no_close(self):
        fobj = StringIO.StringIO()
        tar = tarfile.open(fileobj=fobj, mode=self.mode)
        tar.addfile(tarfile.TarInfo("foo"))
        tar.close()
        self.assertTrue(fobj.closed is False, "external fileobjs must never closed")
        # Issue #20238: Incomplete gzip output with mode="w:gz"
        data = fobj.getvalue()
        del tar
        test_support.gc_collect()
        self.assertFalse(fobj.closed)
        self.assertEqual(data, fobj.getvalue()) 
Example 43
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_100_char_name(self):
        # The name field in a tar header stores strings of at most 100 chars.
        # If a string is shorter than 100 chars it has to be padded with '\0',
        # which implies that a string of exactly 100 chars is stored without
        # a trailing '\0'.
        name = "0123456789" * 10
        tar = tarfile.open(tmpname, self.mode)
        t = tarfile.TarInfo(name)
        tar.addfile(t)
        tar.close()

        tar = tarfile.open(tmpname)
        self.assertTrue(tar.getnames()[0] == name,
                "failed to store 100 char filename")
        tar.close() 
Example 44
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_tar_size(self):
        # Test for bug #1013882.
        tar = tarfile.open(tmpname, self.mode)
        path = os.path.join(TEMPDIR, "file")
        fobj = open(path, "wb")
        fobj.write("aaa")
        fobj.close()
        tar.add(path)
        tar.close()
        self.assertTrue(os.path.getsize(tmpname) > 0,
                "tarfile is empty")

    # The test_*_size tests test for bug #1167128. 
Example 45
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_directory_size(self):
        path = os.path.join(TEMPDIR, "directory")
        os.mkdir(path)
        try:
            tar = tarfile.open(tmpname, self.mode)
            tarinfo = tar.gettarinfo(path)
            self.assertEqual(tarinfo.size, 0)
        finally:
            os.rmdir(path) 
Example 46
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_symlink_size(self):
        if hasattr(os, "symlink"):
            path = os.path.join(TEMPDIR, "symlink")
            os.symlink("link_target", path)
            try:
                tar = tarfile.open(tmpname, self.mode)
                tarinfo = tar.gettarinfo(path)
                self.assertEqual(tarinfo.size, 0)
            finally:
                os.remove(path) 
Example 47
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_add_self(self):
        # Test for #1257255.
        dstname = os.path.abspath(tmpname)

        tar = tarfile.open(tmpname, self.mode)
        self.assertTrue(tar.name == dstname, "archive name must be absolute")

        tar.add(dstname)
        self.assertTrue(tar.getnames() == [], "added the archive to itself")

        cwd = os.getcwd()
        os.chdir(TEMPDIR)
        tar.add(dstname)
        os.chdir(cwd)
        self.assertTrue(tar.getnames() == [], "added the archive to itself") 
Example 48
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_extractall_symlinks(self):
        # Test if extractall works properly when tarfile contains symlinks
        tempdir = os.path.join(TEMPDIR, "testsymlinks")
        temparchive = os.path.join(TEMPDIR, "testsymlinks.tar")
        os.mkdir(tempdir)
        try:
            source_file = os.path.join(tempdir,'source')
            target_file = os.path.join(tempdir,'symlink')
            with open(source_file,'w') as f:
                f.write('something\n')
            os.symlink(source_file, target_file)
            tar = tarfile.open(temparchive,'w')
            tar.add(source_file, arcname=os.path.basename(source_file))
            tar.add(target_file, arcname=os.path.basename(target_file))
            tar.close()
            # Let's extract it to the location which contains the symlink
            tar = tarfile.open(temparchive,'r')
            # this should not raise OSError: [Errno 17] File exists
            try:
                tar.extractall(path=tempdir)
            except OSError:
                self.fail("extractall failed with symlinked files")
            finally:
                tar.close()
        finally:
            os.unlink(temparchive)
            shutil.rmtree(tempdir) 
Example 49
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_extractall_broken_symlinks(self):
        # Test if extractall works properly when tarfile contains broken
        # symlinks
        tempdir = os.path.join(TEMPDIR, "testsymlinks")
        temparchive = os.path.join(TEMPDIR, "testsymlinks.tar")
        os.mkdir(tempdir)
        try:
            source_file = os.path.join(tempdir,'source')
            target_file = os.path.join(tempdir,'symlink')
            with open(source_file,'w') as f:
                f.write('something\n')
            os.symlink(source_file, target_file)
            tar = tarfile.open(temparchive,'w')
            tar.add(target_file, arcname=os.path.basename(target_file))
            tar.close()
            # remove the real file
            os.unlink(source_file)
            # Let's extract it to the location which contains the symlink
            tar = tarfile.open(temparchive,'r')
            # this should not raise OSError: [Errno 17] File exists
            try:
                tar.extractall(path=tempdir)
            except OSError:
                self.fail("extractall failed with broken symlinked files")
            finally:
                tar.close()
        finally:
            os.unlink(temparchive)
            shutil.rmtree(tempdir) 
Example 50
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_extractall_hardlinks(self):
        # Test if extractall works properly when tarfile contains symlinks
        tempdir = os.path.join(TEMPDIR, "testsymlinks")
        temparchive = os.path.join(TEMPDIR, "testsymlinks.tar")
        os.mkdir(tempdir)
        try:
            source_file = os.path.join(tempdir,'source')
            target_file = os.path.join(tempdir,'symlink')
            with open(source_file,'w') as f:
                f.write('something\n')
            os.link(source_file, target_file)
            tar = tarfile.open(temparchive,'w')
            tar.add(source_file, arcname=os.path.basename(source_file))
            tar.add(target_file, arcname=os.path.basename(target_file))
            tar.close()
            # Let's extract it to the location which contains the symlink
            tar = tarfile.open(temparchive,'r')
            # this should not raise OSError: [Errno 17] File exists
            try:
                tar.extractall(path=tempdir)
            except OSError:
                self.fail("extractall failed with linked files")
            finally:
                tar.close()
        finally:
            os.unlink(temparchive)
            shutil.rmtree(tempdir) 
Example 51
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_file_mode(self):
        # Test for issue #8464: Create files with correct
        # permissions.
        if os.path.exists(tmpname):
            os.remove(tmpname)

        original_umask = os.umask(0022)
        try:
            tar = tarfile.open(tmpname, self.mode)
            tar.close()
            mode = os.stat(tmpname).st_mode & 0777
            self.assertEqual(mode, 0644, "wrong file permissions")
        finally:
            os.umask(original_umask) 
Example 52
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_issue13639(self):
        try:
            with tarfile.open(unicode(tmpname, sys.getfilesystemencoding()), self.mode):
                pass
        except UnicodeDecodeError:
            self.fail("_Stream failed to write unicode filename") 
Example 53
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def setUp(self):
        self.foo = os.path.join(TEMPDIR, "foo")
        self.bar = os.path.join(TEMPDIR, "bar")

        fobj = open(self.foo, "wb")
        fobj.write("foo")
        fobj.close()

        os.link(self.foo, self.bar)

        self.tar = tarfile.open(tmpname, "w")
        self.tar.add(self.foo) 
Example 54
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_pax_global_header(self):
        pax_headers = {
                u"foo": u"bar",
                u"uid": u"0",
                u"mtime": u"1.23",
                u"test": u"���",
                u"���": u"test"}

        tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
                pax_headers=pax_headers)
        tar.addfile(tarfile.TarInfo("test"))
        tar.close()

        # Test if the global header was written correctly.
        tar = tarfile.open(tmpname, encoding="iso8859-1")
        self.assertEqual(tar.pax_headers, pax_headers)
        self.assertEqual(tar.getmembers()[0].pax_headers, pax_headers)

        # Test if all the fields are unicode.
        for key, val in tar.pax_headers.iteritems():
            self.assertTrue(type(key) is unicode)
            self.assertTrue(type(val) is unicode)
            if key in tarfile.PAX_NUMBER_FIELDS:
                try:
                    tarfile.PAX_NUMBER_FIELDS[key](val)
                except (TypeError, ValueError):
                    self.fail("unable to convert pax header field") 
Example 55
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def _test_unicode_filename(self, encoding):
        tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict")
        name = u"���"
        tar.addfile(tarfile.TarInfo(name))
        tar.close()

        tar = tarfile.open(tmpname, encoding=encoding)
        self.assertTrue(type(tar.getnames()[0]) is not unicode)
        self.assertEqual(tar.getmembers()[0].name, name.encode(encoding))
        tar.close() 
Example 56
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_unicode_argument(self):
        tar = tarfile.open(tarname, "r", encoding="iso8859-1", errors="strict")
        for t in tar:
            self.assertTrue(type(t.name) is str)
            self.assertTrue(type(t.linkname) is str)
            self.assertTrue(type(t.uname) is str)
            self.assertTrue(type(t.gname) is str)
        tar.close() 
Example 57
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def _create_unicode_name(self, name):
        tar = tarfile.open(tmpname, "w", format=self.format)
        t = tarfile.TarInfo()
        t.pax_headers["path"] = name
        tar.addfile(t)
        tar.close() 
Example 58
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_error_handlers(self):
        # Test if the unicode error handlers work correctly for characters
        # that cannot be expressed in a given encoding.
        self._create_unicode_name(u"���")

        for handler, name in (("utf-8", u"���".encode("utf8")),
                    ("replace", "???"), ("ignore", "")):
            tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
                    errors=handler)
            self.assertEqual(tar.getnames()[0], name)

        self.assertRaises(UnicodeError, tarfile.open, tmpname,
                encoding="ascii", errors="strict") 
Example 59
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_error_handler_utf8(self):
        # Create a pathname that has one component representable using
        # iso8859-1 and the other only in iso8859-15.
        self._create_unicode_name(u"���/�")

        tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
                errors="utf-8")
        self.assertEqual(tar.getnames()[0], "���/" + u"�".encode("utf8")) 
Example 60
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def _add_testfile(self, fileobj=None):
        tar = tarfile.open(self.tarname, "a", fileobj=fileobj)
        tar.addfile(tarfile.TarInfo("bar"))
        tar.close() 
Example 61
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def _create_testtar(self, mode="w:"):
        src = tarfile.open(tarname, encoding="iso8859-1")
        t = src.getmember("ustar/regtype")
        t.name = "foo"
        f = src.extractfile(t)
        tar = tarfile.open(self.tarname, mode)
        tar.addfile(t, f)
        tar.close() 
Example 62
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_empty(self):
        tarfile.open(self.tarname, "w:").close()
        self._add_testfile()
        self._test() 
Example 63
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_fileobj(self):
        self._create_testtar()
        data = open(self.tarname).read()
        fobj = StringIO.StringIO(data)
        self._add_testfile(fobj)
        fobj.seek(0)
        self._test(names=["foo", "bar"], fileobj=fobj) 
Example 64
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_append_gz(self):
        self._create_testtar("w:gz")
        self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a") 
Example 65
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_append_bz2(self):
        self._create_testtar("w:bz2")
        self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")

    # Append mode is supposed to fail if the tarfile to append to
    # does not end with a zero block. 
Example 66
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def _test_error(self, data):
        open(self.tarname, "wb").write(data)
        self.assertRaises(tarfile.ReadError, self._add_testfile) 
Example 67
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_closed(self):
        # The __enter__() method is supposed to raise IOError
        # if the TarFile object is already closed.
        tar = tarfile.open(tarname)
        tar.close()
        with self.assertRaises(IOError):
            with tar:
                pass 
Example 68
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_exception(self):
        # Test if the IOError exception is passed through properly.
        with self.assertRaises(Exception) as exc:
            with tarfile.open(tarname) as tar:
                raise IOError
        self.assertIsInstance(exc.exception, IOError,
                              "wrong exception raised in context manager")
        self.assertTrue(tar.closed, "context manager failed") 
Example 69
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_no_eof(self):
        # __exit__() must not write end-of-archive blocks if an
        # exception was raised.
        try:
            with tarfile.open(tmpname, "w") as tar:
                raise Exception
        except:
            pass
        self.assertEqual(os.path.getsize(tmpname), 0,
                "context manager wrote an end-of-archive block")
        self.assertTrue(tar.closed, "context manager failed") 
Example 70
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_eof(self):
        # __exit__() must write end-of-archive blocks, i.e. call
        # TarFile.close() if there was no error.
        with tarfile.open(tmpname, "w"):
            pass
        self.assertNotEqual(os.path.getsize(tmpname), 0,
                "context manager wrote no end-of-archive block") 
Example 71
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_fileobj(self):
        # Test that __exit__() did not close the external file
        # object.
        fobj = open(tmpname, "wb")
        try:
            with tarfile.open(fileobj=fobj, mode="w") as tar:
                raise Exception
        except:
            pass
        self.assertFalse(fobj.closed, "external file object was closed")
        self.assertTrue(tar.closed, "context manager failed")
        fobj.close() 
Example 72
Project: deep-siamese-text-similarity   Author: dhwajraj   File: input_helpers.py    MIT License 5 votes vote down vote up
def getTsvDataCharBased(self, filepath):
        print("Loading training data from "+filepath)
        x1=[]
        x2=[]
        y=[]
        # positive samples from file
        for line in open(filepath):
            l=line.strip().split("\t")
            if len(l)<2:
                continue
            if random() > 0.5:
               x1.append(l[0].lower())
               x2.append(l[1].lower())
            else:
               x1.append(l[1].lower())
               x2.append(l[0].lower())
            y.append(1)#np.array([0,1]))
        # generate random negative samples
        combined = np.asarray(x1+x2)
        shuffle_indices = np.random.permutation(np.arange(len(combined)))
        combined_shuff = combined[shuffle_indices]
        for i in xrange(len(combined)):
            x1.append(combined[i])
            x2.append(combined_shuff[i])
            y.append(0) #np.array([1,0]))
        return np.asarray(x1),np.asarray(x2),np.asarray(y) 
Example 73
Project: deep-siamese-text-similarity   Author: dhwajraj   File: input_helpers.py    MIT License 5 votes vote down vote up
def getTsvTestData(self, filepath):
        print("Loading testing/labelled data from "+filepath)
        x1=[]
        x2=[]
        y=[]
        # positive samples from file
        for line in open(filepath):
            l=line.strip().split("\t")
            if len(l)<3:
                continue
            x1.append(l[1].lower())
            x2.append(l[2].lower())
            y.append(int(l[0])) #np.array([0,1]))
        return np.asarray(x1),np.asarray(x2),np.asarray(y) 
Example 74
Project: Rackfocus   Author: Antrikshy   File: compilation.py    MIT License 5 votes vote down vote up
def fetch_datasets(self):
        for model_class in DatasetModel.__subclasses__():
            model = model_class()
            output_file = os.path.join(self.working_dir, model.get_file_name())
            download_url = model.get_download_url()
            print("Downloading dataset: {}".format(output_file))
            with urllib.request.urlopen(download_url) as response, open(output_file, 'wb') as out:
                shutil.copyfileobj(response, out) 
Example 75
Project: fs_image   Author: facebookincubator   File: yum_dnf_from_snapshot.py    MIT License 4 votes vote down vote up
def _prepare_versionlock_dir(yum_dnf: YumDnf, list_path: Path) -> Path:
    '''
    This prepares a directory containing:
      - the versionlock plugin code (see the Buck target for its provenance)
      - the plugin configuration
      - the actual list of locked versions

    This directory is used by `YumDnfConfIsolator.isolate_main` to tell
    `yum` / `dnf` to use the plugin.
    '''
    with temp_dir() as d:
        vl_conf = textwrap.dedent(f'''\
            [main]
            enabled = 1
            locklist = {d.decode()}/versionlock.list
        ''')
        with open(d / 'versionlock.conf', 'w') as outf:
            outf.write(vl_conf)

        # `dnf` and `yum` expect different formats, so we parse our own.
        template = {
            YumDnf.yum: '{e}:{n}-{v}-{r}.{a}',
            YumDnf.dnf: '{n}-{e}:{v}-{r}.{a}',
        }[yum_dnf]
        with open(list_path) as rf, open(d / 'versionlock.list', 'w') as wf:
            for l in rf:
                e, n, v, r, a = l.split('\t')
                wf.write(template.format(e=e, n=n, v=v, r=r, a=a))

        with importlib.resources.path(
            'rpm', f'{yum_dnf.value}_versionlock.gz',
        ) as p, gzip.open(p) as rf, open(d / 'versionlock.py', 'wb') as wf:
            wf.write(rf.read())

        yield d

        # Clean up, making sure that there are no new files.

        # Comparing the contents of the plugin & its list is too much effort
        with open(d / 'versionlock.conf') as infile:
            assert infile.read() == vl_conf

        assert (set(os.listdir(d)) - {b'versionlock.pyc'}) == {
            b'versionlock.conf', b'versionlock.list', b'versionlock.py',
        }, os.listdir(d) 
Example 76
Project: Coulomb   Author: DynamoDS   File: sessions_to_sorted_deduped_sessions.py    MIT License 4 votes vote down vote up
def sort_blob(blob_name, sorted_blob_name, temp_path):
    log("Sorting: {} => {}".format(blob_name, sorted_blob_name))

    blob_session_name = blob_name.split('/')[-1]
    blob_session_name_sorted = sorted_blob_name.split('/')[-1]

    blob_path_to_proc = os.path.join(temp_path, blob_session_name)
    out_path = os.path.join(temp_path, blob_session_name_sorted)
    
    log ("Downloading {} => {}".format(blob_name, blob_path_to_proc))
    download_blob(SESSIONS_BUCKET, blob_name, blob_path_to_proc)

    f = gzip.open(blob_path_to_proc)
    data_set = set()
    data = []
    byte_counter = 0
    skip_file = False

    for ln in f:
        data_set.add(ln)
        byte_counter += sys.getsizeof(ln)

        if byte_counter > MAX_MEM:
            skip_file = True
            log ("Skipped large file: " + blob_path_to_proc)
            break

    if skip_file:
        os.remove(blob_path_to_proc)
        return False

    for ln in data_set:
        data.append(json.loads(ln))

    data.sort(key=lambda x: int(x["MicroTime"]))
    sortedF = gzip.open(out_path, 'w')
    for d in data:
        sortedF.write((json.dumps(d) + "\n").encode('utf-8'))

    sortedF.flush()
    sortedF.close()
    log("Sorted: {} => {}".format(blob_session_name, blob_session_name_sorted))
    upload_blob(SESSIONS_BUCKET, out_path, sorted_blob_name)
    
    log("About to remove: {}".format(blob_path_to_proc))
    os.remove(blob_path_to_proc)
    
    log("About to remove: {}".format(out_path))
    os.remove(out_path)

    return True 
Example 77
Project: pyblish-win   Author: pyblish   File: fileinput.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def readline(self):
        try:
            line = self._buffer[self._bufindex]
        except IndexError:
            pass
        else:
            self._bufindex += 1
            self._lineno += 1
            self._filelineno += 1
            return line
        if not self._file:
            if not self._files:
                return ""
            self._filename = self._files[0]
            self._files = self._files[1:]
            self._filelineno = 0
            self._file = None
            self._isstdin = False
            self._backupfilename = 0
            if self._filename == '-':
                self._filename = '<stdin>'
                self._file = sys.stdin
                self._isstdin = True
            else:
                if self._inplace:
                    self._backupfilename = (
                        self._filename + (self._backup or os.extsep+"bak"))
                    try: os.unlink(self._backupfilename)
                    except os.error: pass
                    # The next few lines may raise IOError
                    os.rename(self._filename, self._backupfilename)
                    self._file = open(self._backupfilename, self._mode)
                    try:
                        perm = os.fstat(self._file.fileno()).st_mode
                    except OSError:
                        self._output = open(self._filename, "w")
                    else:
                        fd = os.open(self._filename,
                                     os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
                                     perm)
                        self._output = os.fdopen(fd, "w")
                        try:
                            if hasattr(os, 'chmod'):
                                os.chmod(self._filename, perm)
                        except OSError:
                            pass
                    self._savestdout = sys.stdout
                    sys.stdout = self._output
                else:
                    # This may raise IOError
                    if self._openhook:
                        self._file = self._openhook(self._filename, self._mode)
                    else:
                        self._file = open(self._filename, self._mode)
        self._buffer = self._file.readlines(self._bufsize)
        self._bufindex = 0
        if not self._buffer:
            self.nextfile()
        # Recursive call
        return self.readline() 
Example 78
Project: pyblish-win   Author: pyblish   File: fileinput.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def hook_encoded(encoding):
    import io
    def openhook(filename, mode):
        mode = mode.replace('U', '').replace('b', '') or 'r'
        return io.open(filename, mode, encoding=encoding, newline='')
    return openhook 
Example 79
Project: pyblish-win   Author: pyblish   File: test_tarfile.py    GNU Lesser General Public License v3.0 4 votes vote down vote up
def test_fileobj_seek(self):
        self.tar.extract("ustar/regtype", TEMPDIR)
        fobj = open(os.path.join(TEMPDIR, "ustar/regtype"), "rb")
        data = fobj.read()
        fobj.close()

        tarinfo = self.tar.getmember("ustar/regtype")
        fobj = self.tar.extractfile(tarinfo)

        text = fobj.read()
        fobj.seek(0)
        self.assertTrue(0 == fobj.tell(),
                     "seek() to file's start failed")
        fobj.seek(2048, 0)
        self.assertTrue(2048 == fobj.tell(),
                     "seek() to absolute position failed")
        fobj.seek(-1024, 1)
        self.assertTrue(1024 == fobj.tell(),
                     "seek() to negative relative position failed")
        fobj.seek(1024, 1)
        self.assertTrue(2048 == fobj.tell(),
                     "seek() to positive relative position failed")
        s = fobj.read(10)
        self.assertTrue(s == data[2048:2058],
                     "read() after seek failed")
        fobj.seek(0, 2)
        self.assertTrue(tarinfo.size == fobj.tell(),
                     "seek() to file's end failed")
        self.assertTrue(fobj.read() == "",
                     "read() at file's end did not return empty string")
        fobj.seek(-tarinfo.size, 2)
        self.assertTrue(0 == fobj.tell(),
                     "relative seek() to file's start failed")
        fobj.seek(512)
        s1 = fobj.readlines()
        fobj.seek(512)
        s2 = fobj.readlines()
        self.assertTrue(s1 == s2,
                     "readlines() after seek failed")
        fobj.seek(0)
        self.assertTrue(len(fobj.readline()) == fobj.tell(),
                     "tell() after readline() failed")
        fobj.seek(512)
        self.assertTrue(len(fobj.readline()) + 512 == fobj.tell(),
                     "tell() after seek() and readline() failed")
        fobj.seek(0)
        line = fobj.readline()
        self.assertTrue(fobj.read() == data[len(line):],
                     "read() after readline() failed")
        fobj.close()

    # Test if symbolic and hard links are resolved by extractfile().  The
    # test link members each point to a regular member whose data is
    # supposed to be exported. 
Example 80
Project: pybench   Author: pentschev   File: benchmark_ml.py    Apache License 2.0 4 votes vote down vote up
def load_data(nrows, ncols, cached, train_split=1.0, label_col=None):
    import gzip
    import os
    import numpy as np, gzip, os
    import pandas as pd

    train_rows = int(nrows * train_split)

    if os.path.exists(cached):
        with gzip.open(cached) as f:
            X = np.load(f)

        if train_split < 1.0 and label_col is not None:
            X = X[:, [i for i in range(X.shape[1]) if i != label_col]]
            y = X[:, label_col : label_col + 1]
            rindices = np.random.randint(0, X.shape[0] - 1, nrows)
            X = X[rindices, :ncols]
            y = y[rindices]
            df_y_train = pd.DataFrame(
                {"fea%d" % i: y[0:train_rows, i] for i in range(y.shape[1])}
            )
            df_y_test = pd.DataFrame(
                {"fea%d" % i: y[train_rows:, i] for i in range(y.shape[1])}
            )
        else:
            X = X[np.random.randint(0, X.shape[0] - 1, nrows), :ncols]

    else:
        # throws FileNotFoundError error if mortgage dataset is not present
        raise FileNotFoundError(
            "Please download the required dataset or check the path"
        )

    if train_split < 1.0 and label_col is not None:
        df_X_train = pd.DataFrame(
            {"fea%d" % i: X[0:train_rows, i] for i in range(X.shape[1])}
        )
        df_X_test = pd.DataFrame(
            {"fea%d" % i: X[train_rows:, i] for i in range(X.shape[1])}
        )

        return {
            "X_train": df_X_train,
            "X_test": df_X_test,
            "y_train": df_y_train,
            "y_test": df_y_test,
        }
    else:
        df = pd.DataFrame({"fea%d" % i: X[:, i] for i in range(X.shape[1])})
        return df