import os from threading import local import unittest import pycurl from twisted.internet.defer import FirstError from twisted.python.compat import unicode from landscape.lib import testing from landscape.lib.fetch import ( fetch, fetch_async, fetch_many_async, fetch_to_files, url_to_filename, HTTPCodeError, PyCurlError) class CurlStub(object): def __init__(self, result=None, infos=None, error=None): self.result = result self.infos = infos if self.infos is None: self.infos = {pycurl.HTTP_CODE: 200} self.options = {} self.performed = False self.error = error def getinfo(self, what): if what in self.infos: return self.infos[what] raise RuntimeError("Stub doesn't know about %d info" % what) def setopt(self, option, value): if isinstance(value, unicode): raise AssertionError("setopt() doesn't accept unicode values") if self.performed: raise AssertionError("setopt() can't be called after perform()") self.options[option] = value def perform(self): if self.error: raise self.error if self.performed: raise AssertionError("Can't perform twice") self.options[pycurl.WRITEFUNCTION](self.result) self.performed = True class CurlManyStub(object): def __init__(self, url_results): self.curls = {} for url in url_results: result = url_results[url] if not isinstance(result, tuple): body = result http_code = 200 else: body = result[0] http_code = result[1] self.curls[url] = CurlStub( result=body, infos={pycurl.HTTP_CODE: http_code}) # Use thread local storage to keep the current CurlStub since # CurlManyStub is passed to multiple threads, but the state needs to be # local. self._local = local() self._local.current = None def getinfo(self, what): if not self._local.current.performed: raise AssertionError("getinfo() can't be called before perform()") result = self._local.current.getinfo(what) self._local.current = None return result def setopt(self, option, value): if option == pycurl.URL: # This seems a bit weird, but the curls have a str as key and we # want to keep it like this. But when we set the value for pycurl # option we already have the encoded bytes object, that's why we # have to decode again. self._local.current = self.curls[value.decode('ascii')] self._local.current.setopt(option, value) def perform(self): self._local.current.perform() class Any(object): def __eq__(self, other): return True class FetchTest(testing.FSTestCase, testing.TwistedTestCase, unittest.TestCase): def test_basic(self): curl = CurlStub(b"result") result = fetch("http://example.com", curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"}) def test_post(self): curl = CurlStub(b"result") result = fetch("http://example.com", post=True, curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.POST: True, pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"}) def test_post_data(self): curl = CurlStub(b"result") result = fetch("http://example.com", post=True, data="data", curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options[pycurl.READFUNCTION](), b"data") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.POST: True, pycurl.POSTFIELDSIZE: 4, pycurl.READFUNCTION: Any(), pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"}) def test_cainfo(self): curl = CurlStub(b"result") result = fetch("https://example.com", cainfo="cainfo", curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"https://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.CAINFO: b"cainfo", pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"}) def test_cainfo_on_http(self): curl = CurlStub(b"result") result = fetch("http://example.com", cainfo="cainfo", curl=curl) self.assertEqual(result, b"result") self.assertTrue(pycurl.CAINFO not in curl.options) def test_headers(self): curl = CurlStub(b"result") result = fetch("http://example.com", headers={"a": "1", "b": "2"}, curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.HTTPHEADER: ["a: 1", "b: 2"], pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"}) def test_timeouts(self): curl = CurlStub(b"result") result = fetch("http://example.com", connect_timeout=5, total_timeout=30, curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 5, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 30, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"}) def test_unicode(self): """ The L{fetch} function converts the C{url} parameter to C{bytes} before passing it to curl. """ curl = CurlStub(b"result") result = fetch(u"http://example.com", curl=curl) self.assertEqual(result, b"result") self.assertEqual(curl.options[pycurl.URL], b"http://example.com") self.assertTrue(isinstance(curl.options[pycurl.URL], bytes)) def test_non_200_result(self): curl = CurlStub(b"result", {pycurl.HTTP_CODE: 404}) try: fetch("http://example.com", curl=curl) except HTTPCodeError as error: self.assertEqual(error.http_code, 404) self.assertEqual(error.body, b"result") else: self.fail("HTTPCodeError not raised") def test_http_error_str(self): self.assertEqual(str(HTTPCodeError(501, "")), "Server returned HTTP code 501") def test_http_error_repr(self): self.assertEqual(repr(HTTPCodeError(501, "")), "<HTTPCodeError http_code=501>") def test_pycurl_error(self): curl = CurlStub(error=pycurl.error(60, "pycurl error")) try: fetch("http://example.com", curl=curl) except PyCurlError as error: self.assertEqual(error.error_code, 60) self.assertEqual(error.message, "pycurl error") else: self.fail("PyCurlError not raised") def test_pycurl_insecure(self): curl = CurlStub(b"result") result = fetch("http://example.com/get-ca-cert", curl=curl, insecure=True) self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com/get-ca-cert", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.SSL_VERIFYPEER: False, pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"}) def test_pycurl_error_str(self): self.assertEqual(str(PyCurlError(60, "pycurl error")), "Error 60: pycurl error") def test_pycurl_error_repr(self): self.assertEqual(repr(PyCurlError(60, "pycurl error")), "<PyCurlError args=(60, 'pycurl error')>") def test_pycurl_follow_true(self): curl = CurlStub(b"result") result = fetch("http://example.com", curl=curl, follow=True) self.assertEqual(result, b"result") self.assertEqual(1, curl.options[pycurl.FOLLOWLOCATION]) def test_pycurl_follow_false(self): curl = CurlStub(b"result") result = fetch("http://example.com", curl=curl, follow=False) self.assertEqual(result, b"result") self.assertNotIn(pycurl.FOLLOWLOCATION, curl.options.keys()) def test_pycurl_user_agent(self): """If provided, the user-agent is set in the request.""" curl = CurlStub(b"result") result = fetch( "http://example.com", curl=curl, user_agent="user-agent") self.assertEqual(result, b"result") self.assertEqual(b"user-agent", curl.options[pycurl.USERAGENT]) def test_pycurl_proxy(self): """If provided, the proxy is set in the request.""" curl = CurlStub(b"result") proxy = "http://my.little.proxy" result = fetch("http://example.com", curl=curl, proxy=proxy) self.assertEqual(b"result", result) self.assertEqual(proxy.encode('ascii'), curl.options[pycurl.PROXY]) def test_create_curl(self): curls = [] def pycurl_Curl(): curl = CurlStub(b"result") curls.append(curl) return curl Curl = pycurl.Curl try: pycurl.Curl = pycurl_Curl result = fetch("http://example.com") curl = curls[0] self.assertEqual(result, b"result") self.assertEqual(curl.options, {pycurl.URL: b"http://example.com", pycurl.FOLLOWLOCATION: 1, pycurl.MAXREDIRS: 5, pycurl.CONNECTTIMEOUT: 30, pycurl.LOW_SPEED_LIMIT: 1, pycurl.LOW_SPEED_TIME: 600, pycurl.NOSIGNAL: 1, pycurl.WRITEFUNCTION: Any(), pycurl.DNS_CACHE_TIMEOUT: 0, pycurl.ENCODING: b"gzip,deflate"}) finally: pycurl.Curl = Curl def test_async_fetch(self): curl = CurlStub(b"result") d = fetch_async("http://example.com/", curl=curl) def got_result(result): self.assertEqual(result, b"result") return d.addCallback(got_result) def test_async_fetch_with_error(self): curl = CurlStub(b"result", {pycurl.HTTP_CODE: 501}) d = fetch_async("http://example.com/", curl=curl) def got_error(failure): self.assertEqual(failure.value.http_code, 501) self.assertEqual(failure.value.body, b"result") return failure d.addErrback(got_error) self.assertFailure(d, HTTPCodeError) return d def test_fetch_many_async(self): """ L{fetch_many_async} retrieves multiple URLs, and returns a C{DeferredList} firing its callback when all the URLs have successfully completed. """ url_results = {"http://good/": b"good", "http://better/": b"better"} def callback(result, url): self.assertIn(result, url_results.values()) self.assertIn(url, url_results) url_results.pop(url) def errback(failure, url): self.fail() curl = CurlManyStub(url_results) d = fetch_many_async(url_results.keys(), callback=callback, errback=errback, curl=curl) def completed(result): self.assertEqual(url_results, {}) return d.addCallback(completed) def test_fetch_many_async_with_error(self): """ L{fetch_many_async} aborts as soon as one URL fails. """ url_results = {"http://right/": b"right", "http://wrong/": (b"wrong", 501), "http://impossible/": b"impossible"} failed_urls = [] def errback(failure, url): failed_urls.append(url) self.assertEqual(failure.value.body, b"wrong") self.assertEqual(failure.value.http_code, 501) return failure curl = CurlManyStub(url_results) urls = ["http://right/", "http://wrong/", "http://impossible/"] result = fetch_many_async(urls, callback=None, errback=errback, curl=curl) def check_failure(failure): self.assertTrue(isinstance(failure.subFailure.value, HTTPCodeError)) self.assertEqual(failed_urls, ["http://wrong/"]) self.assertFailure(result, FirstError) return result.addCallback(check_failure) def test_url_to_filename(self): """ L{url_to_filename} extracts the filename part of an URL, optionally prepending a directory path to it. """ self.assertEqual(url_to_filename("http://some/file"), "file") self.assertEqual(url_to_filename("http://some/file/"), "file") self.assertEqual(url_to_filename("http://some/file", directory="dir"), os.path.join("dir", "file")) def test_fetch_to_files(self): """ L{fetch_to_files} fetches a list of URLs and save their content in the given directory. """ url_results = {"http://good/file": b"file", "http://even/better-file": b"better-file"} directory = self.makeDir() curl = CurlManyStub(url_results) result = fetch_to_files(url_results.keys(), directory, curl=curl) def check_files(ignored): for url, result in url_results.items(): filename = url.rstrip("/").split("/")[-1] fd = open(os.path.join(directory, filename), 'rb') self.assertEqual(fd.read(), result) fd.close() result.addCallback(check_files) return result def test_fetch_to_files_with_trailing_slash(self): """ L{fetch_to_files} discards trailing slashes from the final component of the given URLs when saving them as files. """ directory = self.makeDir() curl = CurlStub(b"data") result = fetch_to_files(["http:///with/slash/"], directory, curl=curl) def check_files(ignored): os.path.exists(os.path.join(directory, "slash")) result.addCallback(check_files) return result def test_fetch_to_files_with_errors(self): """ L{fetch_to_files} optionally logs an error message as soon as one URL fails, and aborts. """ url_results = {"http://im/right": b"right", "http://im/wrong": (b"wrong", 404), "http://im/not": b"not"} directory = self.makeDir() messages = [] logger = (lambda message: messages.append(message)) curl = CurlManyStub(url_results) result = fetch_to_files(url_results.keys(), directory, logger=logger, curl=curl) def check_messages(failure): self.assertEqual(len(messages), 1) self.assertEqual(messages[0], "Couldn't fetch file from http://im/wrong " "(Server returned HTTP code 404)") messages.pop() def check_files(ignored): self.assertEqual(messages, []) self.assertFalse(os.path.exists(os.path.join(directory, "wrong"))) result.addErrback(check_messages) result.addCallback(check_files) return result def test_fetch_to_files_with_non_existing_directory(self): """ The deferred list returned by L{fetch_to_files} results in a failure if the destination directory doesn't exist. """ url_results = {"http://im/right": b"right"} directory = "i/dont/exist/" curl = CurlManyStub(url_results) result = fetch_to_files(url_results.keys(), directory, curl=curl) def check_error(failure): error = str(failure.value.subFailure.value) self.assertEqual(error, ("[Errno 2] No such file or directory: " "'i/dont/exist/right'")) self.assertFalse(os.path.exists(os.path.join(directory, "right"))) result.addErrback(check_error) return result