Python bs4.FeatureNotFound() Examples

The following are 5 code examples of bs4.FeatureNotFound(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bs4 , or try the search function .
Example #1
Source File: formatter.py    From pytablereader with MIT License 6 votes vote down vote up
def __init__(self, source_data, logger=None):
        super().__init__(source_data)

        if logger:
            self.__logger = logger
        else:
            self.__logger = NullSourceLogger(None)

        self.__table_id = None

        if typepy.is_null_string(source_data):
            raise DataError

        try:
            self.__soup = bs4.BeautifulSoup(self._source_data, "lxml")
        except bs4.FeatureNotFound:
            self.__soup = bs4.BeautifulSoup(self._source_data, "html.parser") 
Example #2
Source File: __init__.py    From bazarr with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, markup, parsers, **kwargs):
        # reject features
        if set(parsers).intersection({'fast', 'permissive', 'strict', 'xml', 'html', 'html5'}):
            raise ValueError('Features not allowed, only parser names')

        # reject some kwargs
        if 'features' in kwargs:
            raise ValueError('Cannot use features kwarg')
        if 'builder' in kwargs:
            raise ValueError('Cannot use builder kwarg')

        # pick the first parser available
        for parser in parsers:
            try:
                super(ParserBeautifulSoup, self).__init__(markup, parser, **kwargs)
                return
            except FeatureNotFound:
                pass

        raise FeatureNotFound 
Example #3
Source File: test_html.py    From Carnets with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_backend_parsers():
    """
    Make sure the user can specify which back-end parser to use
    and that an error is raised if the parser is invalid.
    """
    for parser in ('lxml', 'xml', 'html.parser', 'html5lib'):
        try:
            table = Table.read('data/html2.html', format='ascii.html',
                               htmldict={'parser': parser}, guess=False)
        except FeatureNotFound:
            if parser == 'html.parser':
                raise
            # otherwise ignore if the dependency isn't present

    # reading should fail if the parser is invalid
    with pytest.raises(FeatureNotFound):
        Table.read('data/html2.html', format='ascii.html',
                   htmldict={'parser': 'foo'}, guess=False) 
Example #4
Source File: htmlark.py    From htmlark with MIT License 5 votes vote down vote up
def get_available_parsers():
    """Return a list of parsers that can be used."""
    available = []
    for p in PARSERS:
        try:
            bs4.BeautifulSoup("", p)
        except bs4.FeatureNotFound:
            # Try the next parser
            continue
        else:
            available.append(p)
    return available 
Example #5
Source File: tipue_search.py    From ford with GNU General Public License v3.0 5 votes vote down vote up
def create_node(self, html, loc, meta={}):
        try:
            soup = BeautifulSoup(html,'lxml', parse_only=self.only_text)
            soup_title = BeautifulSoup(html,'lxml', parse_only=self.only_title)
        except FeatureNotFound:
            soup = BeautifulSoup(html,'html.parser', parse_only=self.only_text)
            soup_title = BeautifulSoup(html,'html.parser', parse_only=self.only_title)

        page_text = soup.find("div", {"id": "text"}).get_text(' ', strip=True).replace('\\(','').replace('\\)','').replace('\\[','').replace('\\]','').replace('$$','').replace('^','^')

        # What happens if there is not a title.
        if soup_title.title is not None:
            page_title = '{0}'.format(soup_title.title.string)
        else:
            page_title = ''

        # Should set default category?
        if 'category' in meta:
            page_category = meta['category']
        else:
            page_category = ''

        if self.siteurl != '':
            page_url = urljoin(self.siteurl, loc)
        else:
            page_url = loc

        node = {'title': page_title,
                'text': page_text,
                'tags': page_category,
                'loc': page_url}
        
        self.json_nodes.append(node)