Python lxml.etree.tounicode() Examples

The following are 27 code examples of lxml.etree.tounicode(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module lxml.etree , or try the search function .
Example #1
Source File: test_formatting.py    From xmldiff with MIT License 6 votes vote down vote up
def test_do_undo_element_double_format(self):
        replacer = formatting.PlaceholderMaker(['p'], ['b', 'u'])

        # Formatting tags get replaced, and the content remains
        text = u'<p>This is <u>doubly <b>formatted</b></u> text.</p>'
        element = etree.fromstring(text)
        replacer.do_element(element)

        self.assertEqual(
            element.text,
            u'This is \ue006doubly \ue008formatted\ue007'
            u'\ue005 text.')

        replacer.undo_element(element)
        result = etree.tounicode(element)
        self.assertEqual(result, text) 
Example #2
Source File: main.py    From python-examples with MIT License 6 votes vote down vote up
def execute(url):

    html = etree.parse(url)
    #print(etree.tounicode(html))

    root = html.getroot()
    #print(root)

    for tag in root:
        #print('tag:', tag.tag)

        #for subtag in tag:
        #    print('subtag:', subtag.tag, '=', subtag.text)

        if tag.tag == 'pozycja':
            print( [subtag.text for subtag in tag if tag.tag == 'pozycja'] )

        #print('-----') 
Example #3
Source File: base.py    From PySIGNFe with GNU Lesser General Public License v2.1 6 votes vote down vote up
def _le_xml(self, arquivo):
        if arquivo is None:
            return False

        if not isinstance(arquivo, basestring):
            arquivo = etree.tounicode(arquivo)

        if arquivo is not None:
            if isinstance(arquivo, basestring): 
                if NAMESPACE_NFSE in arquivo:
                    arquivo = por_acentos(arquivo)
                if u'<' in arquivo:
                    self._xml = etree.fromstring(tira_abertura(arquivo))
                else:
                    arq = open(arquivo)
                    txt = ''.join(arq.readlines())
                    txt = tira_abertura(txt)
                    arq.close()
                    self._xml = etree.fromstring(txt)
            else:
                self._xml = etree.parse(arquivo)
            return True

        return False 
Example #4
Source File: tests.py    From yournextrepresentative with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_all_basic_feed_with_one_item(self):
        response = self.app.get('/results/all-basic.atom')
        root = etree.XML(response.content)
        xml_pretty = etree.tounicode(root, pretty_print=True)

        result_event = ResultEvent.objects.first()
        expected = '''<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-gb">
  <title>Election results from example.com</title>
  <link href="http://example.com/" rel="alternate"/>
  <link href="http://example.com/results/all-basic.atom" rel="self"/>
  <id>http://example.com/</id>
  <updated>{updated}</updated>
  <entry>
    <title>Tessa Jowell (Labour Party) won in Member of Parliament for Dulwich and West Norwood</title>
    <link href="http://example.com/#{item_id}" rel="alternate"/>
    <published>{updated}</published>
    <updated>{updated}</updated>
    <author>
      <name>john</name>
    </author>
    <id>http://example.com/#{item_id}</id>
    <summary type="html">A example.com volunteer recorded at {space_separated} that Tessa Jowell (Labour Party) won the ballot in Member of Parliament for Dulwich and West Norwood, quoting the source 'Seen on the BBC news').</summary>
  </entry>
</feed>
'''.format(
    updated=rfc3339_date(result_event.created),
    space_separated=result_event.created.strftime("%Y-%m-%d %H:%M:%S"),
    item_id=result_event.id,
)
        self.compare_xml(expected, xml_pretty) 
Example #5
Source File: rpc_response.py    From voltha with Apache License 2.0 5 votes vote down vote up
def build_yang_response(self, root, request, yang_options=None,
                            custom_rpc=False):
        try:
            self.custom_rpc = custom_rpc
            yang_xml = self.to_yang_xml(root, request, yang_options,
                                        custom_rpc)
            log.info('yang-xml', yang_xml=etree.tounicode(yang_xml,
                                                          pretty_print=True))
            return self.build_xml_response(request, yang_xml, custom_rpc)
        except Exception as e:
            log.exception('error-building-yang-response', request=request,
                          xml=etree.tostring(root))
            self.rpc_response.is_error = True
            self.rpc_response.node = ncerror.BadMsg(request)
            return 
Example #6
Source File: nc_protocol_handler.py    From voltha with Apache License 2.0 5 votes vote down vote up
def send_custom_rpc_reply(self, rpc_reply, origmsg):
        reply = etree.Element(qmap(C.NC) + C.RPC_REPLY, attrib=origmsg.attrib,
                              nsmap=rpc_reply.nsmap)
        try:
            reply.extend(rpc_reply.getchildren())
        except AttributeError:
            reply.extend(rpc_reply)
        ucode = etree.tounicode(reply, pretty_print=True)
        log.info("Custom-RPC-Reply", reply=ucode)
        self.send_message(ucode) 
Example #7
Source File: nc_protocol_handler.py    From voltha with Apache License 2.0 5 votes vote down vote up
def send_rpc_reply(self, rpc_reply, origmsg):
        reply = etree.Element(qmap(C.NC) + C.RPC_REPLY, attrib=origmsg.attrib,
                              nsmap=origmsg.nsmap)
        try:
            rpc_reply.getchildren
            reply.append(rpc_reply)
        except AttributeError:
            reply.extend(rpc_reply)
        ucode = etree.tounicode(reply, pretty_print=True)
        log.info("RPC-Reply", reply=ucode)
        self.send_message(ucode) 
Example #8
Source File: error.py    From voltha with Apache License 2.0 5 votes vote down vote up
def get_xml_reply(self):
        return etree.tounicode(self.reply) 
Example #9
Source File: error.py    From voltha with Apache License 2.0 5 votes vote down vote up
def get_xml_reply(self):
        return etree.tounicode(self.reply) 
Example #10
Source File: error.py    From voltha with Apache License 2.0 5 votes vote down vote up
def get_xml_reply(self):
        return etree.tounicode(self.reply) 
Example #11
Source File: error.py    From voltha with Apache License 2.0 5 votes vote down vote up
def get_xml_reply(self):
        return etree.tounicode(self.reply) 
Example #12
Source File: error.py    From voltha with Apache License 2.0 5 votes vote down vote up
def get_xml_reply(self):
        return etree.tounicode(self.reply) 
Example #13
Source File: error.py    From voltha with Apache License 2.0 5 votes vote down vote up
def get_xml_reply(self):
        return etree.tounicode(self.reply) 
Example #14
Source File: dataset.py    From calamari with Apache License 2.0 5 votes vote down vote up
def store(self, extension):
        if self._last_page_id:
            self._store_page(extension, self._last_page_id)
            self._last_page_id = None
        else:
            for xml, page in tqdm(zip(self.xmlfiles, self.pages), desc="Writing PageXML files", total=len(self.xmlfiles)):
                with open(split_all_ext(xml)[0] + extension, 'w') as f:
                    f.write(etree.tounicode(page.getroottree())) 
Example #15
Source File: tests.py    From yournextrepresentative with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_all_feed_with_one_item(self):
        response = self.app.get('/results/all.atom')
        root = etree.XML(response.content)
        xml_pretty = etree.tounicode(root, pretty_print=True)

        result_event = ResultEvent.objects.first()
        expected = '''<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-gb">
  <title>Election results from example.com (with extra data)</title>
  <link href="http://example.com/" rel="alternate"/>
  <link href="http://example.com/results/all.atom" rel="self"/>
  <id>http://example.com/</id>
  <updated>{updated}</updated>
  <entry>
    <title>Tessa Jowell (Labour Party) won in Member of Parliament for Dulwich and West Norwood</title>
    <link href="http://example.com/#{item_id}" rel="alternate"/>
    <published>{updated}</published>
    <updated>{updated}</updated>
    <author>
      <name>john</name>
    </author>
    <id>http://example.com/#{item_id}</id>
    <summary type="html">A example.com volunteer recorded at {space_separated} that Tessa Jowell (Labour Party) won the ballot in Member of Parliament for Dulwich and West Norwood, quoting the source 'Seen on the BBC news').</summary>
    <post_id>65808</post_id>
    <winner_person_id>4322</winner_person_id>
    <winner_person_name>Tessa Jowell</winner_person_name>
    <winner_party_id>party:53</winner_party_id>
    <winner_party_name>Labour Party</winner_party_name>
    <user_id>{user_id}</user_id>
    <post_name>Member of Parliament for Dulwich and West Norwood</post_name>
    <information_source>Seen on the BBC news</information_source>
    <parlparse_id>uk.org.publicwhip/person/123456</parlparse_id>
  </entry>
</feed>
'''.format(
    updated=rfc3339_date(result_event.created),
    space_separated=result_event.created.strftime("%Y-%m-%d %H:%M:%S"),
    item_id=result_event.id,
    user_id=self.user.id,
)
        self.compare_xml(expected, xml_pretty) 
Example #16
Source File: test_patch.py    From xmldiff with MIT License 5 votes vote down vote up
def _test(self, start, action, end):
        tree = etree.fromstring(start)
        self.patcher.handle_action(action, tree)
        self.assertEqual(etree.tounicode(tree), end) 
Example #17
Source File: test_formatting.py    From xmldiff with MIT License 5 votes vote down vote up
def test_do_element(self):
        replacer = formatting.PlaceholderMaker(['p'], ['b'])

        # Formatting tags get replaced, and the content remains
        text = u'<p>This is a tag with <b>formatted</b> text.</p>'
        element = etree.fromstring(text)
        replacer.do_element(element)

        self.assertEqual(
            etree.tounicode(element),
            u'<p>This is a tag with \ue006formatted\ue005 text.</p>')

        replacer.undo_element(element)
        self.assertEqual(etree.tounicode(element), text)

        # Non formatting tags get replaced with content
        text = u'<p>This is a tag with <foo>formatted</foo> text.</p>'
        element = etree.fromstring(text)
        replacer.do_element(element)
        result = etree.tounicode(element)
        self.assertEqual(
            result,
            u'<p>This is a tag with \ue007 text.</p>')

        # Single formatting tags still get two placeholders.
        text = u'<p>This is a <b/> with <foo/> text.</p>'
        element = etree.fromstring(text)
        replacer.do_element(element)
        result = etree.tounicode(element)
        self.assertEqual(
            result,
            u'<p>This is a \ue009\ue008 with \ue00a text.</p>') 
Example #18
Source File: formatting.py    From xmldiff with MIT License 5 votes vote down vote up
def render(self, result):
        return etree.tounicode(result, pretty_print=self.pretty_print) 
Example #19
Source File: formatting.py    From xmldiff with MIT License 5 votes vote down vote up
def get_placeholder(self, element, ttype, close_ph):
        tag = etree.tounicode(element)
        ph = self.tag2placeholder.get((tag, ttype, close_ph))
        if ph is not None:
            return ph

        self.placeholder += 1
        ph = six.unichr(self.placeholder)
        self.placeholder2tag[ph] = PlaceholderEntry(element, ttype, close_ph)
        self.tag2placeholder[tag, ttype, close_ph] = ph
        return ph 
Example #20
Source File: main.py    From xmldiff with MIT License 5 votes vote down vote up
def patch_file(actions, tree):
    """Takes two filenames or streams, one with XML the other a diff"""
    tree = etree.parse(tree)

    if isinstance(actions, six.string_types):
        # It's a string, so it's a filename
        with open(actions) as f:
            actions = f.read()
    else:
        # We assume it's a stream
        actions = actions.read()

    actions = patch.DiffParser().parse(actions)
    tree = patch_tree(actions, tree)
    return etree.tounicode(tree) 
Example #21
Source File: main.py    From xmldiff with MIT License 5 votes vote down vote up
def patch_text(actions, tree):
    """Takes a string with XML and a string with actions"""
    tree = etree.fromstring(tree)
    actions = patch.DiffParser().parse(actions)
    tree = patch_tree(actions, tree)
    return etree.tounicode(tree) 
Example #22
Source File: content.py    From xiachufang-api with Apache License 2.0 5 votes vote down vote up
def clean_steps(self, nodes):
        # HTML tag <p/>
        re_p = re.compile('</?p[^>]*>')
        # HTML tag <br/>
        re_br = re.compile('<br\s*?/?>')
        steps = [{
            'step': idx + 1,
            'desc': re_br.sub('\n', re_p.sub('', etree.tounicode(node.find('p')).strip())).strip(),
            'img': node.find('img').get('src') if node.find('img') is not None else ''
        } for idx, node in enumerate(nodes)]
        return steps 
Example #23
Source File: dataset.py    From calamari with Apache License 2.0 5 votes vote down vote up
def _store_page(self, extension, page_id):
        page = self.pages[self.xmlfiles.index(page_id)]
        with open(split_all_ext(page_id)[0] + extension, 'w') as f:
            f.write(etree.tounicode(page.getroottree())) 
Example #24
Source File: bmi_wrapper.py    From indra with BSD 2-Clause "Simplified" License 4 votes vote down vote up
def make_repository_component(self):
        """Return an XML string representing this BMI in a workflow.

        This description is required by EMELI to discover and load models.

        Returns
        -------
        xml : str
            String serialized XML representation of the component in the
            model repository.
        """
        component = etree.Element('component')

        comp_name = etree.Element('comp_name')
        comp_name.text = self.model.name
        component.append(comp_name)

        mod_path = etree.Element('module_path')
        mod_path.text = os.getcwd()
        component.append(mod_path)

        mod_name = etree.Element('module_name')
        mod_name.text = self.model.name
        component.append(mod_name)

        class_name = etree.Element('class_name')
        class_name.text = 'model_class'
        component.append(class_name)

        model_name = etree.Element('model_name')
        model_name.text = self.model.name
        component.append(model_name)

        lang = etree.Element('language')
        lang.text = 'python'
        component.append(lang)

        ver = etree.Element('version')
        ver.text = self.get_attribute('version')
        component.append(ver)

        au = etree.Element('author')
        au.text = self.get_attribute('author_name')
        component.append(au)

        hu = etree.Element('help_url')
        hu.text = 'http://github.com/sorgerlab/indra'
        component.append(hu)

        for tag in ('cfg_template', 'time_step_type', 'time_units',
                    'grid_type', 'description', 'comp_type', 'uses_types'):
            elem = etree.Element(tag)
            elem.text = tag
            component.append(elem)

        return etree.tounicode(component, pretty_print=True) 
Example #25
Source File: agenda.py    From legco-watch with MIT License 4 votes vote down vote up
def __init__(self, elements, english=True):
        self._elements = elements

        # Get the asker
        text = elements[0].text_content().strip()
        pattern = QUESTION_PATTERN_E if english else QUESTION_PATTERN_C
        match = re.match(pattern, text)
        if match is not None:
            self.number = match.group(1)
            self.asker = match.group(2)
            # Get question type
            # Can be oral or written.  Could also be urgent, but have not yet seen how these are
            # indicated
            if text.startswith('*'):
                self.type = self.QTYPE_WRITTEN
            else:
                self.type = self.QTYPE_ORAL
        else:
            logger.warn(u'Could not find asker of question in element: {}'.format(text))
            self.number = None
            self.asker = None
            self.type = None

        # Get the responder
        # If the question is the last question, then there may be a note
        # that begins with an asterisk that says which questions were
        # for written reply
        # As a heuristic, just search the last two elements, and keep track
        # of which is the last index of the body of the question

        # In other cases, if there is more than one public officer to reply, then
        # the list of public officers could be split across two elements.  See, for example,
        # the agenda from June 18, 2014, question 1
        ending_index = -2
        for e in elements[-2:]:
            text = e.text_content().strip()
            match = re.search(AgendaQuestion.RESPONDER_PATTERN, text)
            if match is not None:
                self.replier = match.group(1)
                break
            ending_index += 1
        else:
            logger.warn(u'Could not find responder of question in element: {}'.format(text))
            self.replier = None

        # Store the rest of the elements into the body as html
        self.body = ''.join([etree.tounicode(xx, method='html') for xx in elements[1:ending_index]]) 
Example #26
Source File: conftest.py    From docxcompose with MIT License 4 votes vote down vote up
def pytest_assertrepr_compare(config, op, left, right):
    if (isinstance(left, ComparableDocument)
            and isinstance(right, ComparableDocument) and op == "=="):

        left.post_compare_failed(right)
        right.post_compare_failed(left)

        if left.has_neq_partnames:
            extra_right = [
                item for item in right.partnames if item not in left.partnames]
            extra_left = [
                item for item in left.partnames if item not in right.partnames]

            explanation = ['documents contain same parts']
            if right.doc is None:
                explanation.append('Right document is None')
            if left.doc is None:
                explanation.append('Left document is None')
            if extra_left:
                explanation.append('Left contains extra parts {}'.format(
                    ', '.join(extra_left)))
            if extra_right:
                explanation.append('Right contains extra parts {}'.format(
                    ', '.join(extra_right)))
            return explanation

        diffs = []
        for lpart, rpart in left.neq_parts:

            if not lpart.partname.endswith('.xml'):
                diffs.append('Binary parts differ {}'.format(lpart.partname))
                diffs.append('')
                continue

            doc = etree.fromstring(lpart.blob)
            left_xml = etree.tounicode(doc, pretty_print=True)
            doc = etree.fromstring(rpart.blob)
            right_xml = etree.tounicode(doc, pretty_print=True)

            diffs.extend(unified_diff(
                left_xml.splitlines(),
                right_xml.splitlines(),
                fromfile=lpart.partname,
                tofile=lpart.partname))
            diffs.append('')

        if diffs:
            filenames = [p[0].partname for p in left.neq_parts]
            diffs.insert(
                0, 'document parts are equal. Not equal parts: {}'.format(
                    ', '.join(filenames)))
            return diffs 
Example #27
Source File: test_formatting.py    From xmldiff with MIT License 4 votes vote down vote up
def test_rml_bug(self):
        etree.register_namespace(formatting.DIFF_PREFIX, formatting.DIFF_NS)
        before_diff = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff">
  <section>
    <para>
      <ref>4</ref>.
      <u><b>At Will Employment</b></u>
      .\u201cText\u201d
    </para>
  </section>
</document>"""
        tree = etree.fromstring(before_diff)
        replacer = formatting.PlaceholderMaker(
            text_tags=('para',), formatting_tags=('b', 'u', 'i',))
        replacer.do_tree(tree)
        after_diff = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff">
  <section>
    <para>
      <insert>\ue005</insert>.
      \ue007\ue009At Will Employment\ue008\ue006
      .\u201c<insert>New </insert>Text\u201d
    </para>
  </section>
</document>"""

        # The diff formatting will find some text to insert.
        delete_attrib = u'{%s}delete-format' % formatting.DIFF_NS
        replacer.placeholder2tag[u'\ue006'
                                 ].element.attrib[delete_attrib] = ''
        replacer.placeholder2tag[u'\ue007'
                                 ].element.attrib[delete_attrib] = ''
        tree = etree.fromstring(after_diff)
        replacer.undo_tree(tree)
        result = etree.tounicode(tree)
        expected = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff">
  <section>
    <para>
      <insert><ref>4</ref></insert>.
      <u diff:delete-format=""><b>At Will Employment</b></u>
      .\u201c<insert>New </insert>Text\u201d
    </para>
  </section>
</document>"""
        self.assertEqual(result, expected)