Python scrapy.item.Field() Examples

The following are 6 code examples of scrapy.item.Field(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scrapy.item , or try the search function .
Example #1
Source File: item.py    From scrapy-jsonschema with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __new__(mcs, class_name, bases, attrs):
        cls = super(JsonSchemaMeta, mcs).__new__(mcs, class_name, bases, attrs)
        fields = {}
        schema = attrs.get('jsonschema', {})
        if cls.merge_schema:
            # priority: left to right
            for base in bases:
                base_schema = getattr(base, 'jsonschema', None)
                if base_schema:
                    schema = _merge_schema(schema, base_schema)
            setattr(cls, 'jsonschema', schema)
        if not schema:
            raise ValueError(
                '{} must contain "jsonschema" attribute'.format(cls.__name__)
            )
        cls.validator = cls._get_validator(schema)
        cls.validator.check_schema(schema)
        for k in cls.get_top_level_property_names(schema):
            fields[k] = Field()
        cls.fields = cls.fields.copy()
        cls.fields.update(fields)

        pattern_properties = schema.get('patternProperties', {})
        cls.pattern_properties = [
            re.compile(p)
            for p in pattern_properties.keys()
            if p is not 'additionalProperties'
        ]
        return cls 
Example #2
Source File: item.py    From scrapy-jsonschema with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __setitem__(self, key, value):
        if key in self.fields:
            self._values[key] = value
        elif any(x.match(key) for x in self.pattern_properties):
            self.fields[key] = Field()
            self._values[key] = value

        else:
            raise KeyError(
                "%s does not support field: %s"
                % (self.__class__.__name__, key)
            ) 
Example #3
Source File: __init__.py    From scrapy-djangoitem with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __new__(mcs, class_name, bases, attrs):
        cls = super(DjangoItemMeta, mcs).__new__(mcs, class_name, bases, attrs)
        cls.fields = cls.fields.copy()

        if cls.django_model:
            cls._model_fields = []
            cls._model_meta = cls.django_model._meta
            for model_field in cls._model_meta.fields:
                if not model_field.auto_created:
                    if model_field.name not in cls.fields:
                        cls.fields[model_field.name] = Field()
                    cls._model_fields.append(model_field.name)
        return cls 
Example #4
Source File: scrapy_pagestorage.py    From scrapy-pagestorage with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def process_spider_output(self, response, result, spider):
        fp = request_fingerprint(response.request)
        try:
            for r in result:
                if isinstance(r, DictItem):
                    r.fields["_cached_page_id"] = Field()
                    r._values["_cached_page_id"] = fp
                elif isinstance(r, dict):
                    r["_cached_page_id"] = fp
                yield r
        except Exception as exc:
            self.process_spider_exception(response, exc, spider)
            raise 
Example #5
Source File: items.py    From OpenScraper with MIT License 5 votes vote down vote up
def __init__(self, datamodel_list, *args, **kwargs ) : 
		
		print "::: GenericItem - datamodel_list : ", datamodel_list
		super(GenericItem, self).__init__(*args, **kwargs)
		
		for field in datamodel_list : 
			self.__dict__[field] = scrapy.Field()


### cf : https://github.com/scrapy/scrapy/issues/398 
Example #6
Source File: items.py    From OpenScraper with MIT License 5 votes vote down vote up
def create_item_class(class_name, fields_list):

	"""generic Item class creator populated from a list"""

	fields_dict = {}
	for field_name in fields_list:
		fields_dict[field_name] = Field()
	return type( str(class_name), (DictItem,), {'fields': fields_dict} )