Python scrapy.item.Item() Examples
The following are 5
code examples of scrapy.item.Item().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scrapy.item
, or try the search function
.
Example #1
Source File: EuropythonSpyder.py From Learning-Python-Networking-Second-Edition with MIT License | 6 votes |
def main(): """Main routine for the execution of the Spider""" # set up signal to catch items scraped def catch_item(sender, item, **kwargs): print("Item extracted:", item) dispatcher.connect(catch_item, signal=signals.item_passed) settings = Settings() settings.set("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36") settings.set("LOG_ENABLED",False) # setup crawler from scrapy.crawler import CrawlerProcess crawler = CrawlerProcess(settings) # define the spider for the crawler crawler.crawl(EuropythonSpyder()) # start scrapy print("STARTING ENGINE") crawler.start() #iniciar el crawler llamando al spider definido print("ENGINE STOPPED")
Example #2
Source File: utils.py From scrapy-autounit with BSD 3-Clause "New" or "Revised" License | 6 votes |
def binary_check(fx_obj, cb_obj, encoding): if isinstance(cb_obj, (dict, Item)): fx_obj = { key: binary_check(value, cb_obj[key], encoding) for key, value in fx_obj.items() } if isinstance(cb_obj, list): fx_obj = [ binary_check(fxitem, cbitem, encoding) for fxitem, cbitem in zip(fx_obj, cb_obj) ] if isinstance(cb_obj, Request): headers = {} for key, value in fx_obj['headers'].items(): key = to_bytes(key, encoding) headers[key] = [to_bytes(v, encoding) for v in value] fx_obj['headers'] = headers fx_obj['body'] = to_bytes(fx_obj['body'], encoding) if isinstance(cb_obj, six.binary_type): fx_obj = fx_obj.encode(encoding) return fx_obj
Example #3
Source File: file.py From openslack-crawler with Apache License 2.0 | 6 votes |
def another_process_item(self, result, item, info): """ custom process_item func,so it will manage the Request result. """ assert isinstance(result, (Item, Request)), \ "WoaiduBookFile pipeline' item_completed must return Item or Request, got %s" % \ (type(result)) if isinstance(result, Item): return result elif isinstance(result, Request): dlist = [self._process_request(r, info) for r in arg_to_iter(result)] dfd = DeferredList(dlist, consumeErrors=1) dfd.addCallback(self.item_completed, item, info) # XXX:This will cause one item maybe return many times,it depends on how many # times the download url failed.But it doesn't matter.Because when raise errors, # the items are no longer processed by further pipeline components.And when all # url download failed we can drop that item which book_file or book_file_url are # empty. return dfd.addCallback(self.another_process_item, item, info) else: raise NofilesDrop
Example #4
Source File: __init__.py From learn_python3_spider with MIT License | 5 votes |
def _get_item_field_attr(self, field_name, key, default=None): if isinstance(self.item, Item): value = self.item.fields[field_name].get(key, default) else: value = default return value
Example #5
Source File: __init__.py From learn_python3_spider with MIT License | 5 votes |
def _get_item_field_attr(self, field_name, key, default=None): if isinstance(self.item, Item): value = self.item.fields[field_name].get(key, default) else: value = default return value