Python bs4.__version__() Examples

The following are 17 code examples of bs4.__version__(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bs4 , or try the search function .
Example #1
Source File: test_html.py    From Computable with MIT License 6 votes vote down vote up
def _skip_if_none_of(module_names):
    if isinstance(module_names, string_types):
        _skip_if_no(module_names)
        if module_names == 'bs4':
            import bs4
            if bs4.__version__ == LooseVersion('4.2.0'):
                raise nose.SkipTest("Bad version of bs4: 4.2.0")
    else:
        not_found = [module_name for module_name in module_names if not
                     _have_module(module_name)]
        if set(not_found) & set(module_names):
            raise nose.SkipTest("{0!r} not found".format(not_found))
        if 'bs4' in module_names:
            import bs4
            if bs4.__version__ == LooseVersion('4.2.0'):
                raise nose.SkipTest("Bad version of bs4: 4.2.0") 
Example #2
Source File: test_html.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def _skip_if_none_of(module_names):
    if isinstance(module_names, string_types):
        _skip_if_no(module_names)
        if module_names == 'bs4':
            import bs4
            if bs4.__version__ == LooseVersion('4.2.0'):
                pytest.skip("Bad version of bs4: 4.2.0")
    else:
        not_found = [module_name for module_name in module_names if not
                     _have_module(module_name)]
        if set(not_found) & set(module_names):
            pytest.skip("{0!r} not found".format(not_found))
        if 'bs4' in module_names:
            import bs4
            if bs4.__version__ == LooseVersion('4.2.0'):
                pytest.skip("Bad version of bs4: 4.2.0") 
Example #3
Source File: utils.py    From MechanicalSoup with MIT License 5 votes vote down vote up
def mock_post(mocked_adapter, url, expected, reply='Success!'):
    def text_callback(request, context):
        # Python 2's parse_qsl doesn't like None argument
        query = parse_qsl(request.text) if request.text else []
        # In bs4 4.7.0+, CSS selectors return elements in page order,
        # but did not in earlier versions.
        if StrictVersion(bs4.__version__) >= StrictVersion('4.7.0'):
            assert query == expected
        else:
            assert sorted(query) == sorted(expected)
        return reply

    mocked_adapter.register_uri('POST', url, text=text_callback) 
Example #4
Source File: diagnose.py    From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def benchmark_parsers(num_elements=100000):
    """Very basic head-to-head performance benchmark."""
    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
    data = rdoc(num_elements)
    print("Generated a large invalid HTML document (%d bytes)." % len(data))
    
    for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
        success = False
        try:
            a = time.time()
            soup = BeautifulSoup(data, parser)
            b = time.time()
            success = True
        except Exception as e:
            print("%s could not parse the markup." % parser)
            traceback.print_exc()
        if success:
            print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))

    from lxml import etree
    a = time.time()
    etree.HTML(data)
    b = time.time()
    print("Raw lxml parsed the markup in %.2fs." % (b-a))

    import html5lib
    parser = html5lib.HTMLParser()
    a = time.time()
    parser.parse(data)
    b = time.time()
    print("Raw html5lib parsed the markup in %.2fs." % (b-a)) 
Example #5
Source File: diagnose.py    From Tautulli with GNU General Public License v3.0 5 votes vote down vote up
def benchmark_parsers(num_elements=100000):
    """Very basic head-to-head performance benchmark."""
    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
    data = rdoc(num_elements)
    print("Generated a large invalid HTML document (%d bytes)." % len(data))
    
    for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
        success = False
        try:
            a = time.time()
            soup = BeautifulSoup(data, parser)
            b = time.time()
            success = True
        except Exception as e:
            print("%s could not parse the markup." % parser)
            traceback.print_exc()
        if success:
            print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))

    from lxml import etree
    a = time.time()
    etree.HTML(data)
    b = time.time()
    print("Raw lxml parsed the markup in %.2fs." % (b-a))

    import html5lib
    parser = html5lib.HTMLParser()
    a = time.time()
    parser.parse(data)
    b = time.time()
    print("Raw html5lib parsed the markup in %.2fs." % (b-a)) 
Example #6
Source File: diagnose.py    From bazarr with GNU General Public License v3.0 5 votes vote down vote up
def benchmark_parsers(num_elements=100000):
    """Very basic head-to-head performance benchmark."""
    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
    data = rdoc(num_elements)
    print("Generated a large invalid HTML document (%d bytes)." % len(data))
    
    for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
        success = False
        try:
            a = time.time()
            soup = BeautifulSoup(data, parser)
            b = time.time()
            success = True
        except Exception as e:
            print("%s could not parse the markup." % parser)
            traceback.print_exc()
        if success:
            print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))

    from lxml import etree
    a = time.time()
    etree.HTML(data)
    b = time.time()
    print("Raw lxml parsed the markup in %.2fs." % (b-a))

    import html5lib
    parser = html5lib.HTMLParser()
    a = time.time()
    parser.parse(data)
    b = time.time()
    print("Raw html5lib parsed the markup in %.2fs." % (b-a)) 
Example #7
Source File: diagnose.py    From MIA-Dictionary-Addon with GNU General Public License v3.0 5 votes vote down vote up
def benchmark_parsers(num_elements=100000):
    """Very basic head-to-head performance benchmark."""
    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
    data = rdoc(num_elements)
    print("Generated a large invalid HTML document (%d bytes)." % len(data))
    
    for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
        success = False
        try:
            a = time.time()
            soup = BeautifulSoup(data, parser)
            b = time.time()
            success = True
        except Exception as e:
            print("%s could not parse the markup." % parser)
            traceback.print_exc()
        if success:
            print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))

    from lxml import etree
    a = time.time()
    etree.HTML(data)
    b = time.time()
    print("Raw lxml parsed the markup in %.2fs." % (b-a))

    import html5lib
    parser = html5lib.HTMLParser()
    a = time.time()
    parser.parse(data)
    b = time.time()
    print("Raw html5lib parsed the markup in %.2fs." % (b-a)) 
Example #8
Source File: test_html.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_bs4_version_fails():
    _skip_if_none_of(('bs4', 'html5lib'))
    import bs4
    if bs4.__version__ == LooseVersion('4.2.0'):
        tm.assert_raises(AssertionError, read_html, os.path.join(DATA_PATH,
                                                                 "spam.html"),
                         flavor='bs4') 
Example #9
Source File: html.py    From recruit with Apache License 2.0 5 votes vote down vote up
def _parser_dispatch(flavor):
    """Choose the parser based on the input flavor.

    Parameters
    ----------
    flavor : str
        The type of parser to use. This must be a valid backend.

    Returns
    -------
    cls : _HtmlFrameParser subclass
        The parser class based on the requested input flavor.

    Raises
    ------
    ValueError
        * If `flavor` is not a valid backend.
    ImportError
        * If you do not have the requested `flavor`
    """
    valid_parsers = list(_valid_parsers.keys())
    if flavor not in valid_parsers:
        raise ValueError('{invalid!r} is not a valid flavor, valid flavors '
                         'are {valid}'
                         .format(invalid=flavor, valid=valid_parsers))

    if flavor in ('bs4', 'html5lib'):
        if not _HAS_HTML5LIB:
            raise ImportError("html5lib not found, please install it")
        if not _HAS_BS4:
            raise ImportError(
                "BeautifulSoup4 (bs4) not found, please install it")
        import bs4
        if LooseVersion(bs4.__version__) <= LooseVersion('4.2.0'):
            raise ValueError("A minimum version of BeautifulSoup 4.2.1 "
                             "is required")

    else:
        if not _HAS_LXML:
            raise ImportError("lxml not found, please install it")
    return _valid_parsers[flavor] 
Example #10
Source File: html.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _parser_dispatch(flavor):
    """Choose the parser based on the input flavor.

    Parameters
    ----------
    flavor : str
        The type of parser to use. This must be a valid backend.

    Returns
    -------
    cls : _HtmlFrameParser subclass
        The parser class based on the requested input flavor.

    Raises
    ------
    ValueError
        * If `flavor` is not a valid backend.
    ImportError
        * If you do not have the requested `flavor`
    """
    valid_parsers = list(_valid_parsers.keys())
    if flavor not in valid_parsers:
        raise ValueError('{invalid!r} is not a valid flavor, valid flavors '
                         'are {valid}'
                         .format(invalid=flavor, valid=valid_parsers))

    if flavor in ('bs4', 'html5lib'):
        if not _HAS_HTML5LIB:
            raise ImportError("html5lib not found, please install it")
        if not _HAS_BS4:
            raise ImportError(
                "BeautifulSoup4 (bs4) not found, please install it")
        import bs4
        if LooseVersion(bs4.__version__) <= LooseVersion('4.2.0'):
            raise ValueError("A minimum version of BeautifulSoup 4.2.1 "
                             "is required")

    else:
        if not _HAS_LXML:
            raise ImportError("lxml not found, please install it")
    return _valid_parsers[flavor] 
Example #11
Source File: diagnose.py    From B.E.N.J.I. with MIT License 5 votes vote down vote up
def benchmark_parsers(num_elements=100000):
    """Very basic head-to-head performance benchmark."""
    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
    data = rdoc(num_elements)
    print("Generated a large invalid HTML document (%d bytes)." % len(data))
    
    for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
        success = False
        try:
            a = time.time()
            soup = BeautifulSoup(data, parser)
            b = time.time()
            success = True
        except Exception as e:
            print("%s could not parse the markup." % parser)
            traceback.print_exc()
        if success:
            print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))

    from lxml import etree
    a = time.time()
    etree.HTML(data)
    b = time.time()
    print("Raw lxml parsed the markup in %.2fs." % (b-a))

    import html5lib
    parser = html5lib.HTMLParser()
    a = time.time()
    parser.parse(data)
    b = time.time()
    print("Raw html5lib parsed the markup in %.2fs." % (b-a)) 
Example #12
Source File: test_html.py    From Computable with MIT License 5 votes vote down vote up
def test_bs4_version_fails():
    _skip_if_none_of(('bs4', 'html5lib'))
    import bs4
    if bs4.__version__ == LooseVersion('4.2.0'):
        tm.assert_raises(AssertionError, read_html, os.path.join(DATA_PATH,
                                                                 "spam.html"),
                         flavor='bs4') 
Example #13
Source File: html.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _parser_dispatch(flavor):
    """Choose the parser based on the input flavor.

    Parameters
    ----------
    flavor : str
        The type of parser to use. This must be a valid backend.

    Returns
    -------
    cls : _HtmlFrameParser subclass
        The parser class based on the requested input flavor.

    Raises
    ------
    ValueError
        * If `flavor` is not a valid backend.
    ImportError
        * If you do not have the requested `flavor`
    """
    valid_parsers = list(_valid_parsers.keys())
    if flavor not in valid_parsers:
        raise ValueError('{invalid!r} is not a valid flavor, valid flavors '
                         'are {valid}'
                         .format(invalid=flavor, valid=valid_parsers))

    if flavor in ('bs4', 'html5lib'):
        if not _HAS_HTML5LIB:
            raise ImportError("html5lib not found, please install it")
        if not _HAS_BS4:
            raise ImportError(
                "BeautifulSoup4 (bs4) not found, please install it")
        import bs4
        if LooseVersion(bs4.__version__) <= LooseVersion('4.2.0'):
            raise ValueError("A minimum version of BeautifulSoup 4.2.1 "
                             "is required")

    else:
        if not _HAS_LXML:
            raise ImportError("lxml not found, please install it")
    return _valid_parsers[flavor] 
Example #14
Source File: html.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def _parser_dispatch(flavor):
    """Choose the parser based on the input flavor.

    Parameters
    ----------
    flavor : str
        The type of parser to use. This must be a valid backend.

    Returns
    -------
    cls : _HtmlFrameParser subclass
        The parser class based on the requested input flavor.

    Raises
    ------
    ValueError
        * If `flavor` is not a valid backend.
    ImportError
        * If you do not have the requested `flavor`
    """
    valid_parsers = list(_valid_parsers.keys())
    if flavor not in valid_parsers:
        raise ValueError('{invalid!r} is not a valid flavor, valid flavors '
                         'are {valid}'
                         .format(invalid=flavor, valid=valid_parsers))

    if flavor in ('bs4', 'html5lib'):
        if not _HAS_HTML5LIB:
            raise ImportError("html5lib not found, please install it")
        if not _HAS_BS4:
            raise ImportError(
                "BeautifulSoup4 (bs4) not found, please install it")
        import bs4
        if bs4.__version__ == LooseVersion('4.2.0'):
            raise ValueError("You're using a version"
                             " of BeautifulSoup4 (4.2.0) that has been"
                             " known to cause problems on certain"
                             " operating systems such as Debian. "
                             "Please install a version of"
                             " BeautifulSoup4 != 4.2.0, both earlier"
                             " and later releases will work.")
    else:
        if not _HAS_LXML:
            raise ImportError("lxml not found, please install it")
    return _valid_parsers[flavor] 
Example #15
Source File: html.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def _parser_dispatch(flavor):
    """Choose the parser based on the input flavor.

    Parameters
    ----------
    flavor : str
        The type of parser to use. This must be a valid backend.

    Returns
    -------
    cls : _HtmlFrameParser subclass
        The parser class based on the requested input flavor.

    Raises
    ------
    ValueError
        * If `flavor` is not a valid backend.
    ImportError
        * If you do not have the requested `flavor`
    """
    valid_parsers = list(_valid_parsers.keys())
    if flavor not in valid_parsers:
        raise ValueError('{invalid!r} is not a valid flavor, valid flavors '
                         'are {valid}'
                         .format(invalid=flavor, valid=valid_parsers))

    if flavor in ('bs4', 'html5lib'):
        if not _HAS_HTML5LIB:
            raise ImportError("html5lib not found, please install it")
        if not _HAS_BS4:
            raise ImportError(
                "BeautifulSoup4 (bs4) not found, please install it")
        import bs4
        if bs4.__version__ == LooseVersion('4.2.0'):
            raise ValueError("You're using a version"
                             " of BeautifulSoup4 (4.2.0) that has been"
                             " known to cause problems on certain"
                             " operating systems such as Debian. "
                             "Please install a version of"
                             " BeautifulSoup4 != 4.2.0, both earlier"
                             " and later releases will work.")
    else:
        if not _HAS_LXML:
            raise ImportError("lxml not found, please install it")
    return _valid_parsers[flavor] 
Example #16
Source File: diagnose.py    From B.E.N.J.I. with MIT License 4 votes vote down vote up
def diagnose(data):
    """Diagnostic suite for isolating common problems."""
    print("Diagnostic running on Beautiful Soup %s" % __version__)
    print("Python version %s" % sys.version)

    basic_parsers = ["html.parser", "html5lib", "lxml"]
    for name in basic_parsers:
        for builder in builder_registry.builders:
            if name in builder.features:
                break
        else:
            basic_parsers.remove(name)
            print((
                "I noticed that %s is not installed. Installing it may help." %
                name))

    if 'lxml' in basic_parsers:
        basic_parsers.append(["lxml", "xml"])
        try:
            from lxml import etree
            print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
        except ImportError as e:
            print (
                "lxml is not installed or couldn't be imported.")


    if 'html5lib' in basic_parsers:
        try:
            import html5lib
            print("Found html5lib version %s" % html5lib.__version__)
        except ImportError as e:
            print (
                "html5lib is not installed or couldn't be imported.")

    if hasattr(data, 'read'):
        data = data.read()
    elif os.path.exists(data):
        print('"%s" looks like a filename. Reading data from the file.' % data)
        with open(data) as fp:
            data = fp.read()
    elif data.startswith("http:") or data.startswith("https:"):
        print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
        print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
        return
    print()

    for parser in basic_parsers:
        print("Trying to parse your markup with %s" % parser)
        success = False
        try:
            soup = BeautifulSoup(data, parser)
            success = True
        except Exception as e:
            print("%s could not parse the markup." % parser)
            traceback.print_exc()
        if success:
            print("Here's what %s did with the markup:" % parser)
            print(soup.prettify())

        print("-" * 80) 
Example #17
Source File: html.py    From Computable with MIT License 4 votes vote down vote up
def _parser_dispatch(flavor):
    """Choose the parser based on the input flavor.

    Parameters
    ----------
    flavor : str
        The type of parser to use. This must be a valid backend.

    Returns
    -------
    cls : _HtmlFrameParser subclass
        The parser class based on the requested input flavor.

    Raises
    ------
    ValueError
        * If `flavor` is not a valid backend.
    ImportError
        * If you do not have the requested `flavor`
    """
    valid_parsers = list(_valid_parsers.keys())
    if flavor not in valid_parsers:
        raise ValueError('%r is not a valid flavor, valid flavors are %s' %
                         (flavor, valid_parsers))

    if flavor in ('bs4', 'html5lib'):
        if not _HAS_HTML5LIB:
            raise ImportError("html5lib not found please install it")
        if not _HAS_BS4:
            raise ImportError("bs4 not found please install it")
        if bs4.__version__ == LooseVersion('4.2.0'):
            raise ValueError("You're using a version"
                             " of BeautifulSoup4 (4.2.0) that has been"
                             " known to cause problems on certain"
                             " operating systems such as Debian. "
                             "Please install a version of"
                             " BeautifulSoup4 != 4.2.0, both earlier"
                             " and later releases will work.")
    else:
        if not _HAS_LXML:
            raise ImportError("lxml not found please install it")
    return _valid_parsers[flavor]