Python lxml.etree.ProcessingInstruction() Examples

The following are 23 code examples of lxml.etree.ProcessingInstruction(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module lxml.etree , or try the search function .
Example #1
Source File: sax.py    From learn_python3_spider with MIT License 6 votes vote down vote up
def saxify(self):
        self._content_handler.startDocument()

        element = self._element
        if hasattr(element, 'getprevious'):
            siblings = []
            sibling = element.getprevious()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                siblings.append(sibling)
                sibling = sibling.getprevious()
            for sibling in siblings[::-1]:
                self._recursive_saxify(sibling, {})

        self._recursive_saxify(element, {})

        if hasattr(element, 'getnext'):
            sibling = element.getnext()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                self._recursive_saxify(sibling, {})
                sibling = sibling.getnext()

        self._content_handler.endDocument() 
Example #2
Source File: sax.py    From lambda-text-extractor with Apache License 2.0 6 votes vote down vote up
def saxify(self):
        self._content_handler.startDocument()

        element = self._element
        if hasattr(element, 'getprevious'):
            siblings = []
            sibling = element.getprevious()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                siblings.append(sibling)
                sibling = sibling.getprevious()
            for sibling in siblings[::-1]:
                self._recursive_saxify(sibling, {})

        self._recursive_saxify(element, {})

        if hasattr(element, 'getnext'):
            sibling = element.getnext()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                self._recursive_saxify(sibling, {})
                sibling = sibling.getnext()

        self._content_handler.endDocument() 
Example #3
Source File: sax.py    From lambda-text-extractor with Apache License 2.0 6 votes vote down vote up
def saxify(self):
        self._content_handler.startDocument()

        element = self._element
        if hasattr(element, 'getprevious'):
            siblings = []
            sibling = element.getprevious()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                siblings.append(sibling)
                sibling = sibling.getprevious()
            for sibling in siblings[::-1]:
                self._recursive_saxify(sibling, {})

        self._recursive_saxify(element, {})

        if hasattr(element, 'getnext'):
            sibling = element.getnext()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                self._recursive_saxify(sibling, {})
                sibling = sibling.getnext()

        self._content_handler.endDocument() 
Example #4
Source File: sax.py    From stopstalk-deployment with MIT License 6 votes vote down vote up
def saxify(self):
        self._content_handler.startDocument()

        element = self._element
        if hasattr(element, 'getprevious'):
            siblings = []
            sibling = element.getprevious()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                siblings.append(sibling)
                sibling = sibling.getprevious()
            for sibling in siblings[::-1]:
                self._recursive_saxify(sibling, {})

        self._recursive_saxify(element, {})

        if hasattr(element, 'getnext'):
            sibling = element.getnext()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                self._recursive_saxify(sibling, {})
                sibling = sibling.getnext()

        self._content_handler.endDocument() 
Example #5
Source File: sax.py    From aws-lambda-lxml with GNU General Public License v3.0 6 votes vote down vote up
def saxify(self):
        self._content_handler.startDocument()

        element = self._element
        if hasattr(element, 'getprevious'):
            siblings = []
            sibling = element.getprevious()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                siblings.append(sibling)
                sibling = sibling.getprevious()
            for sibling in siblings[::-1]:
                self._recursive_saxify(sibling, {})

        self._recursive_saxify(element, {})

        if hasattr(element, 'getnext'):
            sibling = element.getnext()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                self._recursive_saxify(sibling, {})
                sibling = sibling.getnext()

        self._content_handler.endDocument() 
Example #6
Source File: sax.py    From aws-lambda-lxml with GNU General Public License v3.0 6 votes vote down vote up
def saxify(self):
        self._content_handler.startDocument()

        element = self._element
        if hasattr(element, 'getprevious'):
            siblings = []
            sibling = element.getprevious()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                siblings.append(sibling)
                sibling = sibling.getprevious()
            for sibling in siblings[::-1]:
                self._recursive_saxify(sibling, {})

        self._recursive_saxify(element, {})

        if hasattr(element, 'getnext'):
            sibling = element.getnext()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                self._recursive_saxify(sibling, {})
                sibling = sibling.getnext()

        self._content_handler.endDocument() 
Example #7
Source File: sax.py    From aws-lambda-lxml with GNU General Public License v3.0 6 votes vote down vote up
def saxify(self):
        self._content_handler.startDocument()

        element = self._element
        if hasattr(element, 'getprevious'):
            siblings = []
            sibling = element.getprevious()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                siblings.append(sibling)
                sibling = sibling.getprevious()
            for sibling in siblings[::-1]:
                self._recursive_saxify(sibling, {})

        self._recursive_saxify(element, {})

        if hasattr(element, 'getnext'):
            sibling = element.getnext()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                self._recursive_saxify(sibling, {})
                sibling = sibling.getnext()

        self._content_handler.endDocument() 
Example #8
Source File: sax.py    From aws-lambda-lxml with GNU General Public License v3.0 6 votes vote down vote up
def saxify(self):
        self._content_handler.startDocument()

        element = self._element
        if hasattr(element, 'getprevious'):
            siblings = []
            sibling = element.getprevious()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                siblings.append(sibling)
                sibling = sibling.getprevious()
            for sibling in siblings[::-1]:
                self._recursive_saxify(sibling, {})

        self._recursive_saxify(element, {})

        if hasattr(element, 'getnext'):
            sibling = element.getnext()
            while getattr(sibling, 'tag', None) is ProcessingInstruction:
                self._recursive_saxify(sibling, {})
                sibling = sibling.getnext()

        self._content_handler.endDocument() 
Example #9
Source File: sax.py    From aws-lambda-lxml with GNU General Public License v3.0 5 votes vote down vote up
def processingInstruction(self, target, data):
        pi = ProcessingInstruction(target, data)
        if self._root is None:
            self._root_siblings.append(pi)
        else:
            self._element_stack[-1].append(pi) 
Example #10
Source File: sax.py    From lambda-text-extractor with Apache License 2.0 5 votes vote down vote up
def processingInstruction(self, target, data):
        pi = ProcessingInstruction(target, data)
        if self._root is None:
            self._root_siblings.append(pi)
        else:
            self._element_stack[-1].append(pi) 
Example #11
Source File: sax.py    From lambda-text-extractor with Apache License 2.0 5 votes vote down vote up
def processingInstruction(self, target, data):
        pi = ProcessingInstruction(target, data)
        if self._root is None:
            self._root_siblings.append(pi)
        else:
            self._element_stack[-1].append(pi) 
Example #12
Source File: markup.py    From ChemDataExtractor with MIT License 5 votes vote down vote up
def _is_inline(self, element):
        """Return True if an element is inline."""
        if element.tag not in {etree.Comment, etree.ProcessingInstruction} and element.tag.lower() in self.inline_elements:
            return True
        return False 
Example #13
Source File: markup.py    From ChemDataExtractor with MIT License 5 votes vote down vote up
def _parse_element_r(self, el, specials, refs, id=None, element_cls=Paragraph):
        """Recursively parse HTML/XML element and its children into a list of Document elements."""
        elements = []
        if el.tag in {etree.Comment, etree.ProcessingInstruction}:
            return []
        # if el in refs:
        #     return [element_cls('', references=refs[el])]
        if el in specials:
            return specials[el]
        id = el.get('id', id)
        references = refs.get(el, [])
        if el.text is not None:
            elements.append(element_cls(six.text_type(el.text), id=id, references=references))
        elif references:
            elements.append(element_cls('', id=id, references=references))
        for child in el:
            # br is a special case - technically inline, but we want to split
            if child.tag not in {etree.Comment, etree.ProcessingInstruction} and child.tag.lower() == 'br':
                elements.append(element_cls(''))

            child_elements = self._parse_element_r(child, specials=specials, refs=refs, id=id, element_cls=element_cls)
            if (self._is_inline(child) and len(elements) > 0 and len(child_elements) > 0 and
                    isinstance(elements[-1], (Text, Sentence)) and isinstance(child_elements[0], (Text, Sentence)) and
                    type(elements[-1]) == type(child_elements[0])):
                elements[-1] += child_elements.pop(0)
            elements.extend(child_elements)
            if child.tail is not None:
                if self._is_inline(child) and len(elements) > 0 and isinstance(elements[-1], element_cls):
                    elements[-1] += element_cls(six.text_type(child.tail), id=id)
                else:
                    elements.append(element_cls(six.text_type(child.tail), id=id))
        return elements 
Example #14
Source File: sax.py    From aws-lambda-lxml with GNU General Public License v3.0 5 votes vote down vote up
def processingInstruction(self, target, data):
        pi = ProcessingInstruction(target, data)
        if self._root is None:
            self._root_siblings.append(pi)
        else:
            self._element_stack[-1].append(pi) 
Example #15
Source File: sax.py    From stopstalk-deployment with MIT License 5 votes vote down vote up
def processingInstruction(self, target, data):
        pi = ProcessingInstruction(target, data)
        if self._root is None:
            self._root_siblings.append(pi)
        else:
            self._element_stack[-1].append(pi) 
Example #16
Source File: sax.py    From aws-lambda-lxml with GNU General Public License v3.0 5 votes vote down vote up
def processingInstruction(self, target, data):
        pi = ProcessingInstruction(target, data)
        if self._root is None:
            self._root_siblings.append(pi)
        else:
            self._element_stack[-1].append(pi) 
Example #17
Source File: sax.py    From learn_python3_spider with MIT License 5 votes vote down vote up
def processingInstruction(self, target, data):
        pi = ProcessingInstruction(target, data)
        if self._root is None:
            self._root_siblings.append(pi)
        else:
            self._element_stack[-1].append(pi) 
Example #18
Source File: sax.py    From aws-lambda-lxml with GNU General Public License v3.0 5 votes vote down vote up
def processingInstruction(self, target, data):
        pi = ProcessingInstruction(target, data)
        if self._root is None:
            self._root_siblings.append(pi)
        else:
            self._element_stack[-1].append(pi) 
Example #19
Source File: sax.py    From stopstalk-deployment with MIT License 4 votes vote down vote up
def _recursive_saxify(self, element, prefixes):
        content_handler = self._content_handler
        tag = element.tag
        if tag is Comment or tag is ProcessingInstruction:
            if tag is ProcessingInstruction:
                content_handler.processingInstruction(
                    element.target, element.text)
            if element.tail:
                content_handler.characters(element.tail)
            return

        new_prefixes = []
        build_qname = self._build_qname
        attribs = element.items()
        if attribs:
            attr_values = {}
            attr_qnames = {}
            for attr_ns_name, value in attribs:
                attr_ns_tuple = _getNsTag(attr_ns_name)
                attr_values[attr_ns_tuple] = value
                attr_qnames[attr_ns_tuple] = build_qname(
                    attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
            sax_attributes = self._attr_class(attr_values, attr_qnames)
        else:
            sax_attributes = self._empty_attributes

        ns_uri, local_name = _getNsTag(tag)
        qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)

        for prefix, uri in new_prefixes:
            content_handler.startPrefixMapping(prefix, uri)
        content_handler.startElementNS((ns_uri, local_name),
                                       qname, sax_attributes)
        if element.text:
            content_handler.characters(element.text)
        for child in element:
            self._recursive_saxify(child, prefixes)
        content_handler.endElementNS((ns_uri, local_name), qname)
        for prefix, uri in new_prefixes:
            content_handler.endPrefixMapping(prefix)
        if element.tail:
            content_handler.characters(element.tail) 
Example #20
Source File: sax.py    From learn_python3_spider with MIT License 4 votes vote down vote up
def _recursive_saxify(self, element, parent_nsmap):
        content_handler = self._content_handler
        tag = element.tag
        if tag is Comment or tag is ProcessingInstruction:
            if tag is ProcessingInstruction:
                content_handler.processingInstruction(
                    element.target, element.text)
            tail = element.tail
            if tail:
                content_handler.characters(tail)
            return

        element_nsmap = element.nsmap
        new_prefixes = []
        if element_nsmap != parent_nsmap:
            # There have been updates to the namespace
            for prefix, ns_uri in element_nsmap.items():
                if parent_nsmap.get(prefix) != ns_uri:
                    new_prefixes.append( (prefix, ns_uri) )

        attribs = element.items()
        if attribs:
            attr_values = {}
            attr_qnames = {}
            for attr_ns_name, value in attribs:
                attr_ns_tuple = _getNsTag(attr_ns_name)
                attr_values[attr_ns_tuple] = value
                attr_qnames[attr_ns_tuple] = self._build_qname(
                    attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap,
                    preferred_prefix=None, is_attribute=True)
            sax_attributes = self._attr_class(attr_values, attr_qnames)
        else:
            sax_attributes = self._empty_attributes

        ns_uri, local_name = _getNsTag(tag)
        qname = self._build_qname(
            ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False)

        for prefix, uri in new_prefixes:
            content_handler.startPrefixMapping(prefix, uri)
        content_handler.startElementNS(
            (ns_uri, local_name), qname, sax_attributes)
        text = element.text
        if text:
            content_handler.characters(text)
        for child in element:
            self._recursive_saxify(child, element_nsmap)
        content_handler.endElementNS((ns_uri, local_name), qname)
        for prefix, uri in new_prefixes:
            content_handler.endPrefixMapping(prefix)
        tail = element.tail
        if tail:
            content_handler.characters(tail) 
Example #21
Source File: sax.py    From aws-lambda-lxml with GNU General Public License v3.0 4 votes vote down vote up
def _recursive_saxify(self, element, prefixes):
        content_handler = self._content_handler
        tag = element.tag
        if tag is Comment or tag is ProcessingInstruction:
            if tag is ProcessingInstruction:
                content_handler.processingInstruction(
                    element.target, element.text)
            if element.tail:
                content_handler.characters(element.tail)
            return

        new_prefixes = []
        build_qname = self._build_qname
        attribs = element.items()
        if attribs:
            attr_values = {}
            attr_qnames = {}
            for attr_ns_name, value in attribs:
                attr_ns_tuple = _getNsTag(attr_ns_name)
                attr_values[attr_ns_tuple] = value
                attr_qnames[attr_ns_tuple] = build_qname(
                    attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
            sax_attributes = self._attr_class(attr_values, attr_qnames)
        else:
            sax_attributes = self._empty_attributes

        ns_uri, local_name = _getNsTag(tag)
        qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)

        for prefix, uri in new_prefixes:
            content_handler.startPrefixMapping(prefix, uri)
        content_handler.startElementNS((ns_uri, local_name),
                                       qname, sax_attributes)
        if element.text:
            content_handler.characters(element.text)
        for child in element:
            self._recursive_saxify(child, prefixes)
        content_handler.endElementNS((ns_uri, local_name), qname)
        for prefix, uri in new_prefixes:
            content_handler.endPrefixMapping(prefix)
        if element.tail:
            content_handler.characters(element.tail) 
Example #22
Source File: sax.py    From lambda-text-extractor with Apache License 2.0 4 votes vote down vote up
def _recursive_saxify(self, element, prefixes):
        content_handler = self._content_handler
        tag = element.tag
        if tag is Comment or tag is ProcessingInstruction:
            if tag is ProcessingInstruction:
                content_handler.processingInstruction(
                    element.target, element.text)
            if element.tail:
                content_handler.characters(element.tail)
            return

        new_prefixes = []
        build_qname = self._build_qname
        attribs = element.items()
        if attribs:
            attr_values = {}
            attr_qnames = {}
            for attr_ns_name, value in attribs:
                attr_ns_tuple = _getNsTag(attr_ns_name)
                attr_values[attr_ns_tuple] = value
                attr_qnames[attr_ns_tuple] = build_qname(
                    attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
            sax_attributes = self._attr_class(attr_values, attr_qnames)
        else:
            sax_attributes = self._empty_attributes

        ns_uri, local_name = _getNsTag(tag)
        qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)

        for prefix, uri in new_prefixes:
            content_handler.startPrefixMapping(prefix, uri)
        content_handler.startElementNS((ns_uri, local_name),
                                       qname, sax_attributes)
        if element.text:
            content_handler.characters(element.text)
        for child in element:
            self._recursive_saxify(child, prefixes)
        content_handler.endElementNS((ns_uri, local_name), qname)
        for prefix, uri in new_prefixes:
            content_handler.endPrefixMapping(prefix)
        if element.tail:
            content_handler.characters(element.tail) 
Example #23
Source File: sax.py    From aws-lambda-lxml with GNU General Public License v3.0 4 votes vote down vote up
def _recursive_saxify(self, element, prefixes):
        content_handler = self._content_handler
        tag = element.tag
        if tag is Comment or tag is ProcessingInstruction:
            if tag is ProcessingInstruction:
                content_handler.processingInstruction(
                    element.target, element.text)
            if element.tail:
                content_handler.characters(element.tail)
            return

        new_prefixes = []
        build_qname = self._build_qname
        attribs = element.items()
        if attribs:
            attr_values = {}
            attr_qnames = {}
            for attr_ns_name, value in attribs:
                attr_ns_tuple = _getNsTag(attr_ns_name)
                attr_values[attr_ns_tuple] = value
                attr_qnames[attr_ns_tuple] = build_qname(
                    attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
            sax_attributes = self._attr_class(attr_values, attr_qnames)
        else:
            sax_attributes = self._empty_attributes

        ns_uri, local_name = _getNsTag(tag)
        qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)

        for prefix, uri in new_prefixes:
            content_handler.startPrefixMapping(prefix, uri)
        content_handler.startElementNS((ns_uri, local_name),
                                       qname, sax_attributes)
        if element.text:
            content_handler.characters(element.text)
        for child in element:
            self._recursive_saxify(child, prefixes)
        content_handler.endElementNS((ns_uri, local_name), qname)
        for prefix, uri in new_prefixes:
            content_handler.endPrefixMapping(prefix)
        if element.tail:
            content_handler.characters(element.tail)