Python bs4.CData() Examples

The following are 11 code examples of bs4.CData(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bs4 , or try the search function .
Example #1
Source File: update.py    From mftp with GNU General Public License v3.0 6 votes vote down vote up
def check_companies(session, sessionData):
    r = session.get(ERP_COMPANIES_URL, **req_args)

    companies_list = bs(r.text, 'html.parser')
    companies = []
    for row in companies_list.find_all('row'):
        company = {}
        cds = filter(lambda x: isinstance(x, CData), row.find_all(text=True))

        a = bs(cds[0].string, 'html.parser').find_all('a')[0]
        company['name'], company['name_link'] = a.attrs['title'], a.attrs['onclick']

        a = bs(cds[3].string, 'html.parser').find_all('a')[0]
        company['job'], company['job_link'] = a.attrs['title'], a.attrs['onclick']

        a = bs(cds[7].string, 'html.parser').find_all('a')[0]
        company['description_link'] = a.attrs['onclick']

        company['start_date'], company['end_date'] = cds[9], cds[10]
        companies.append(company)

    handle_companies_diff(companies) 
Example #2
Source File: css_match.py    From soupsieve with MIT License 5 votes vote down vote up
def is_cdata(obj):
        """Is CDATA."""
        return isinstance(obj, bs4.CData) 
Example #3
Source File: css_match.py    From soupsieve with MIT License 5 votes vote down vote up
def is_special_string(obj):
        """Is special string."""
        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype)) 
Example #4
Source File: css_match.py    From plugin.git.browser with GNU General Public License v3.0 5 votes vote down vote up
def is_special_string(obj):
        """Is special string."""

        import bs4
        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction)) 
Example #5
Source File: css_match.py    From bazarr with GNU General Public License v3.0 5 votes vote down vote up
def is_cdata(obj):
        """Is CDATA."""

        import bs4
        return isinstance(obj, bs4.CData) 
Example #6
Source File: css_match.py    From bazarr with GNU General Public License v3.0 5 votes vote down vote up
def is_special_string(obj):
        """Is special string."""

        import bs4
        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype)) 
Example #7
Source File: css_match.py    From Tautulli with GNU General Public License v3.0 5 votes vote down vote up
def is_cdata(obj):
        """Is CDATA."""

        import bs4
        return isinstance(obj, bs4.CData) 
Example #8
Source File: css_match.py    From Tautulli with GNU General Public License v3.0 5 votes vote down vote up
def is_special_string(obj):
        """Is special string."""

        import bs4
        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype)) 
Example #9
Source File: update.py    From mftp with GNU General Public License v3.0 4 votes vote down vote up
def check_notices(session, sessionData):
    r = session.get(ERP_NOTICEBOARD_URL, **req_args)
    r = session.get(ERP_NOTICES_URL, **req_args)

    print "ERP and TNP login completed!"

    notices_list = bs(r.text, 'html.parser')

    print "Total number of notices fetched: %d" % len(notices_list.find_all('row'))

    notices = []
    # Only check the first 50 notices
    for row in notices_list.find_all('row')[:NUM_NOTICES_DIFFED]:
        notice = {}

        cds = filter(lambda x: isinstance(x, CData), row.find_all(text=True))

        notice['subject'] = cds[2].string
        notice['company'] = cds[3].string

        a = bs(cds[4].string, 'html.parser').find_all('a')[0]
        try :
            m = re.search(r'ViewNotice\("(.+?)","(.+?)"\)', a.attrs['onclick'])
        except KeyError :
            print("Poorly formatted notice found")
            continue
        year, id_ = m.group(1), m.group(2)
        content = bs(session.get(ERP_NOTICE_CONTENT_URL % (year, id_)).text, 'html.parser')
        content_div = bs.find_all(content, 'div', {'id': 'printableArea'})[0]
        notice['text'] = content_div.decode_contents(formatter='html')
        notice['time'] = cds[6].string

        a = bs(cds[7].string, 'html.parser').find_all('a')[0]
        if a.attrs['title'] == 'Download':
            onclick = a.attrs['onclick']
            m = re.search(r'TPNotice\("(.+)"\)', onclick)
            notice['attachment_url'] = ERP_ATTACHMENT_URL + m.group(1)
            r = session.get(notice['attachment_url'], stream=True)
            r.raw.decode_content = True
            hash_ = hashlib.md5()
            notice['attachment_raw'] = b""
            for chunk in r.iter_content(4096):
                notice['attachment_raw'] += chunk
                hash_.update(chunk)
            notice['attachment_md5'] = hash_.hexdigest()

        notices.append(notice)

    handle_notices_diff(notices) 
Example #10
Source File: css_match.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 4 votes vote down vote up
def is_cdata(obj):
        """Is CDATA."""

        import bs4
        return isinstance(obj, bs4.CData) 
Example #11
Source File: css_match.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 4 votes vote down vote up
def is_special_string(obj):
        """Is special string."""

        import bs4
        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))