org.htmlcleaner.DomSerializer Java Examples

The following examples show how to use org.htmlcleaner.DomSerializer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MagnetWServiceModelImp.java    From AndroidMagnetSearch with Apache License 2.0 4 votes vote down vote up
public List<MagnetInfo> parser(String rootUrl, String url, String keyword,String sort, int page, String group, String magnet, String name, String size, String count,String hot) throws IOException, XPathExpressionException, ParserConfigurationException, XPatherException {
    String newUrl = transformUrl(url, keyword,sort, page);
    String html = Jsoup.connect(newUrl).get().body().html();


    XPath xPath = XPathFactory.newInstance().newXPath();
    TagNode tagNode = new HtmlCleaner().clean(html);
    Document dom = new DomSerializer(new CleanerProperties()).createDOM(tagNode);

    NodeList result = (NodeList) xPath.evaluate(group, dom, XPathConstants.NODESET);
    List<MagnetInfo> infos = new ArrayList<MagnetInfo>();
    for (int i = 0; i < result.getLength(); i++) {
        Node node = result.item(i);
        if (node != null) {
            if (StringUtil.isEmpty(node.getTextContent().trim())) {
                continue;
            }
            MagnetInfo info = new MagnetInfo();
            Node magnetNote = (Node) xPath.evaluate(magnet, node, XPathConstants.NODE);
            //磁力链
            String magnetValue = magnetNote.getTextContent();
            info.setMagnet(transformMagnet(magnetValue));
            //名称
            Node nameNote = ((Node) xPath.evaluate(name, node, XPathConstants.NODE));
            String nameValue = nameNote.getTextContent();
            info.setName(nameValue);
            String nameHref = nameNote.getAttributes().getNamedItem("href").getTextContent();
            info.setDetailUrl(transformDetailUrl(rootUrl, nameHref));
            //大小
            Node sizeNote = ((Node) xPath.evaluate(size, node, XPathConstants.NODE));
            if (sizeNote != null) {
                String sizeValue = sizeNote.getTextContent();
                info.setFormatSize(sizeValue);

                info.setSize(transformSize(sizeValue));
            }
            //时间
            Node dateNote=((Node) xPath.evaluate(count, node, XPathConstants.NODE));
            if(dateNote!=null){
                String countValue = dateNote.getTextContent();
                info.setCount(countValue);
            }
            Node hotNote=((Node) xPath.evaluate(hot, node, XPathConstants.NODE));
            if(hotNote!=null){
                String hotValue = hotNote.getTextContent();
                info.setHot(hotValue);
            }
            //一些加工的额外信息
            String resolution = transformResolution(nameValue);
            info.setResolution(resolution);

            infos.add(info);
        }
    }
    return infos;
}
 
Example #2
Source File: MagnetWServiceModelImp.java    From AndroidDownload with Apache License 2.0 4 votes vote down vote up
public List<MagnetInfo> parser(String rootUrl, String url, String keyword,String sort, int page, String group, String magnet, String name, String size, String count,String hot) throws IOException, XPathExpressionException, ParserConfigurationException, XPatherException {
    String newUrl = transformUrl(url, keyword,sort, page);
    String html = Jsoup.connect(newUrl).get().body().html();


    XPath xPath = XPathFactory.newInstance().newXPath();
    TagNode tagNode = new HtmlCleaner().clean(html);
    Document dom = new DomSerializer(new CleanerProperties()).createDOM(tagNode);

    NodeList result = (NodeList) xPath.evaluate(group, dom, XPathConstants.NODESET);
    List<MagnetInfo> infos = new ArrayList<MagnetInfo>();
    for (int i = 0; i < result.getLength(); i++) {
        Node node = result.item(i);
        if (node != null) {
            if (StringUtil.isEmpty(node.getTextContent().trim())) {
                continue;
            }
            MagnetInfo info = new MagnetInfo();
            Node magnetNote = (Node) xPath.evaluate(magnet, node, XPathConstants.NODE);
            //磁力链
            String magnetValue = magnetNote.getTextContent();
            info.setMagnet(transformMagnet(magnetValue));
            //名称
            Node nameNote = ((Node) xPath.evaluate(name, node, XPathConstants.NODE));
            String nameValue = nameNote.getTextContent();
            info.setName(nameValue);
            String nameHref = nameNote.getAttributes().getNamedItem("href").getTextContent();
            info.setDetailUrl(transformDetailUrl(rootUrl, nameHref));
            //大小
            Node sizeNote = ((Node) xPath.evaluate(size, node, XPathConstants.NODE));
            if (sizeNote != null) {
                String sizeValue = sizeNote.getTextContent();
                info.setFormatSize(sizeValue);

                info.setSize(transformSize(sizeValue));
            }
            //时间
            Node dateNote=((Node) xPath.evaluate(count, node, XPathConstants.NODE));
            if(dateNote!=null){
                String countValue = dateNote.getTextContent();
                info.setCount(countValue);
            }
            Node hotNote=((Node) xPath.evaluate(hot, node, XPathConstants.NODE));
            if(hotNote!=null){
                String hotValue = hotNote.getTextContent();
                info.setHot(hotValue);
            }
            //一些加工的额外信息
            String resolution = transformResolution(nameValue);
            info.setResolution(resolution);

            infos.add(info);
        }
    }
    return infos;
}