Java Code Examples for org.jsoup.nodes.Element#hasAttr()

The following examples show how to use org.jsoup.nodes.Element#hasAttr() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Aw22Rule06031.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * 
 * @param sspHandler
 * @param el
 * @param linkText
 * @return 
 */
private TestSolution testTitleAttributeLink(
        SSPHandler sspHandler, 
        Element el, 
        String linkText) {
    // if the current has no title or has an empty title or has a title 
    // content identical to the link text, returns not applicable.
    if (!el.hasAttr(TITLE_ATTR)) {
        return TestSolution.NOT_APPLICABLE;
    }
    String attrValue=el.attr(TITLE_ATTR);
    if (StringUtils.isBlank(attrValue)) {
        return TestSolution.NOT_APPLICABLE;
    }
    if (StringUtils.equalsIgnoreCase(attrValue, linkText)) {
        return TestSolution.NOT_APPLICABLE;
    }
    ElementHandler<Element> elHandler = new ElementHandlerImpl(el);
    TestSolutionHandler tsHandler = new TestSolutionHandlerImpl();
    titlePertinenceElementChecker.check(sspHandler, elHandler, tsHandler);
    return tsHandler.getTestSolution();
}
 
Example 2
Source File: Rgaa32016Rule060303.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * 
 * @param sspHandler
 * @param el
 * @param linkText
 * @return 
 */
private TestSolution testTitleAttributeLink(
        SSPHandler sspHandler, 
        Element el, 
        String linkText) {
    // if the current has no title or has an empty title or has a title 
    // content identical to the link text, returns not applicable.
    if (!el.hasAttr(TITLE_ATTR)) {
        return TestSolution.NOT_APPLICABLE;
    }
    String attrValue=el.attr(TITLE_ATTR);
    if (StringUtils.isBlank(attrValue)) {
        return TestSolution.NOT_APPLICABLE;
    }
    if (StringUtils.equalsIgnoreCase(attrValue, linkText)) {
        return TestSolution.NOT_APPLICABLE;
    }
    ElementHandler<Element> elHandler = new ElementHandlerImpl(el);
    TestSolutionHandler tsHandler = new TestSolutionHandlerImpl();
    titlePertinenceElementChecker.check(sspHandler, elHandler, tsHandler);
    return tsHandler.getTestSolution();
}
 
Example 3
Source File: Rgaa32016Rule110102.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * This method linked each input on a page to its form in a map.
 */
private void putInputElementHandlerIntoTheMap () {
    for (Element el : inputElementHandler.get()) {
        if (!el.hasAttr(TITLE_ATTR)
                && !el.hasAttr(ARIA_LABEL_ATTR)
                && !el.hasAttr(ARIA_LABELLEDBY_ATTR)) {
            Element tmpElement = el.parent();
            while (StringUtils.isNotBlank(tmpElement.tagName())) {
                if (tmpElement.tagName().equals(FORM_ELEMENT)) {
                    if (inputFormMap.containsKey(tmpElement)) {
                        inputFormMap.get(tmpElement).add(el);
                    } else {
                        ElementHandler<Element> inputElement = new ElementHandlerImpl();
                        inputElement.add(el);
                        inputFormMap.put(tmpElement, inputElement);
                    }
                    break;
                }
                tmpElement = tmpElement.parent();
            }
        }
    }
}
 
Example 4
Source File: M2DocHTMLParser.java    From M2Doc with Eclipse Public License 1.0 6 votes vote down vote up
/**
 * Sets the unordered list numbering.
 * 
 * @param context
 *            the {@link Context}
 * @param element
 *            the ol {@link Element}
 */
private void setUnorderedListNumbering(Context context, Element element) {
    final String symbol;
    if (element.hasAttr(TYPE_ATTR)) {
        final String type = element.attr(TYPE_ATTR);
        if ("disc".equals(type)) {
            symbol = DISC_SYMBOL;
        } else if ("square".equals(type)) {
            symbol = SQUARE_SYMBOL;
        } else if ("circle".equals(type)) {
            symbol = CIRCLE_SYMBOL;
        } else {
            symbol = DISC_SYMBOL;
        }
    } else {
        symbol = DISC_SYMBOL;
    }

    if (context.numbering == null) {
        createNumbering(context);
    }
    context.numberingLevel = incrementNumberingLevel(context.numbering, context.numberingLevel,
            STNumberFormat.BULLET, 1, symbol, false);
}
 
Example 5
Source File: Mf2Parser.java    From indigenous-android with GNU General Public License v3.0 6 votes vote down vote up
private String parseImpliedUrlRelative(Element elem) {
    //     if a.h-x[href] or area.h-x[href] then use that [href] for url
    if (("a".equals(elem.tagName()) || "area".equals(elem.tagName()))
            && elem.hasAttr("href")) {
        return elem.attr("href");
    }
    //else if .h-x>a[href]:only-of-type:not[.h-*] then use that [href] for url
    //else if .h-x>area[href]:only-of-type:not[.h-*] then use that [href] for url
    for (String childTag : Arrays.asList("a", "area")) {
        Elements children = filterByTag(elem.children(), childTag);
        if(children.size() == 1) {
            Element child = children.first();
            if (!hasRootClass(child) && child.hasAttr("href")) {
                return child.attr("href");
            }
        }
    }

    return null;
}
 
Example 6
Source File: StockToStockWithCompanyInfoMapper.java    From XueQiuSuperSpider with MIT License 6 votes vote down vote up
private void initMap() throws Exception {

        industryMap = new HashMap<>();

        String target = URLMapper.COMPREHENSIVE_PAGE.toString();

        String content = request(new URL(target));
        Document doc = Jsoup.parse(content);
        Elements element = doc.getElementsByClass("second-nav")
                .get(1).children()
                .get(3).children()
                .get(3).children()
                .select("a");
        StringBuilder builder = new StringBuilder();
        for (Element ele : element) {
            if (!ele.hasAttr("title") || !ele.hasAttr("href")) continue;
            builder.append(ele.attr("href"));
            industryMap.put(ele.attr("title"),  new Industry(ele.attr("title"), builder.toString()));
            builder.delete(0, builder.length());
        }
    }
 
Example 7
Source File: Aw22Rule06034.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * 
 * @param sspHandler
 * @param el
 * @param linkText
 * @return 
 */
private TestSolution testTitleAttributeLink(
        SSPHandler sspHandler, 
        Element el, 
        String linkText) {
    // if the current has no title or has an empty title or has a title 
    // content identical to the link text, returns not applicable.
    if (!el.hasAttr(TITLE_ATTR)) {
        return TestSolution.NOT_APPLICABLE;
    }
    String attrValue=el.attr(TITLE_ATTR);
    if (StringUtils.isBlank(attrValue)) {
        return TestSolution.NOT_APPLICABLE;
    }
    if (StringUtils.equalsIgnoreCase(attrValue, linkText)) {
        return TestSolution.NOT_APPLICABLE;
    }
    ElementHandler<Element> elHandler = new ElementHandlerImpl(el);
    TestSolutionHandler tsHandler = new TestSolutionHandlerImpl();
    titlePertinenceElementChecker.check(sspHandler, elHandler, tsHandler);
    return tsHandler.getTestSolution();
}
 
Example 8
Source File: Page.java    From WebCollector with GNU General Public License v3.0 5 votes vote down vote up
/**
 * 获取网页中满足指定css选择器的所有元素的指定属性的集合
 * 例如通过attrs("img[src]","abs:src")可获取网页中所有图片的链接
 *
 * @param cssSelector
 * @param attrName
 * @return
 */
public ArrayList<String> attrs(String cssSelector, String attrName) {
    ArrayList<String> result = new ArrayList<String>();
    Elements eles = select(cssSelector);
    for (Element ele : eles) {
        if (ele.hasAttr(attrName)) {
            result.add(ele.attr(attrName));
        }
    }
    return result;
}
 
Example 9
Source File: Rgaa30Rule090102.java    From Asqatasun with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
protected void select(SSPHandler sspHandler) {
    super.select(sspHandler);
    Iterator<Element> elementsIterator = getElements().get().iterator();
    while (elementsIterator.hasNext()) {
        Element element = elementsIterator.next();
        if (element.hasAttr("aria-level")) {
            if (!PATTERN.matcher(element.attr("aria-level")).matches()) {
                elementsIterator.remove();
            }
        }
    }
}
 
Example 10
Source File: SankakuComplexRipper.java    From ripme with MIT License 5 votes vote down vote up
@Override
public Document getNextPage(Document doc) throws IOException {
    Element pagination = doc.select("div.pagination").first();
    if (pagination.hasAttr("next-page-url")) {
        String nextPage = pagination.attr("abs:next-page-url");
        // Only logged in users can see past page 25
        // Trying to rip page 26 will throw a no images found error
        if (!nextPage.contains("page=26")) {
            LOGGER.info("Getting next page: " + pagination.attr("abs:next-page-url"));
            return Http.url(pagination.attr("abs:next-page-url")).cookies(cookies).get();
        }
    }
    throw new IOException("No more pages");
}
 
Example 11
Source File: Elements.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 Get an attribute value from the first matched element that has the attribute.
 @param attributeKey The attribute key.
 @return The attribute value from the first matched element that has the attribute.. If no elements were matched (isEmpty() == true),
 or if the no elements have the attribute, returns empty string.
 @see #hasAttr(String)
 */
public String attr(String attributeKey) {
    for (Element element : this) {
        if (element.hasAttr(attributeKey))
            return element.attr(attributeKey);
    }
    return "";
}
 
Example 12
Source File: NatalieMuRipper.java    From ripme with MIT License 5 votes vote down vote up
@Override
public List<String> getURLsFromPage(Document page) {
    List<String> imageURLs = new ArrayList<>();
    Pattern p; Matcher m;
    //select all album thumbnails
    for (Element span : page.select(".NA_articleGallery span")) {
        if (!span.hasAttr("style")) {
            continue;
        }
        String style = span.attr("style").trim();

        p = Pattern.compile("background-image: url\\((.*list_thumb_inbox.*)\\);", Pattern.CASE_INSENSITIVE);
        m = p.matcher(style);
        if (m.find()) {
            String imgUrl = m.group(1);
            if (imgUrl.startsWith("//")) {
                imgUrl = "http:" + imgUrl;
            }
            if (imgUrl.startsWith("/")) {
                imgUrl = "http://" + this.url.getHost() + imgUrl;
            }
            //convert thumbnail url into fullsize url
            imgUrl = imgUrl.replace("list_thumb_inbox","xlarge");
            // Don't download the same URL twice
            if (imageURLs.contains(imgUrl)) {
                LOGGER.debug("Already attempted: " + imgUrl);
                continue;
            }
            imageURLs.add(imgUrl);
            if (isThisATest()) {
                break;
            }
        }

        if (isStopped()) {
            break;
        }
    }
    return imageURLs;
}
 
Example 13
Source File: Rgaa30Rule010204.java    From Asqatasun with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 
 * @param svgElements
 * @param svgElementsWithoutRoleImage
 * @param ariaAttrOnSvgOrChild
 * @param svgElementsWithDescOrTitleChild
 * @param titleAttrOnSvgOrChild 
 */
private void extractMalformedPatternDetectedElements (
        ElementHandler<Element> svgElements,
        ElementHandler<Element> svgElementsWithoutRoleImage,
        ElementHandler<Element> ariaAttrOnSvgOrChild,
        ElementHandler<Element> svgElementsWithDescOrTitleChild,
        ElementHandler<Element> titleAttrOnSvgOrChild,
        ElementHandler<Element> wellFormedSvgElements) {
    for(Element element : svgElements.get()) {
        boolean patternDetected= false;
        if (!StringUtils.equalsIgnoreCase(element.attr(ROLE_ATTR), "img")) {
            svgElementsWithoutRoleImage.add(element);
            patternDetected= true;
        }
        if (element.hasAttr(ARIA_LABEL_ATTR) || 
                element.hasAttr(ARIA_LABELLEDBY_ATTR) ||
                element.hasAttr(ARIA_DESCRIBEDBY_ATTR) || 
                !element.select(ARIA_DESCRIBEDBY_CSS_LIKE_QUERY+","+ ARIA_LABEL_CSS_LIKE_QUERY+","+ARIA_LABELLEDBY_CSS_LIKE_QUERY).isEmpty()) {
            ariaAttrOnSvgOrChild.add(element);
            patternDetected= true;
        }
        if (!element.select(NOT_EMPTY_ARIA_TITLE_CSS_LIKE_QUERY+","+NOT_EMPTY_ARIA_DESC_CSS_LIKE_QUERY).isEmpty()) {
            svgElementsWithDescOrTitleChild.add(element);
            patternDetected= true;
        } 
        if (element.hasAttr(TITLE_ELEMENT) || 
                !element.select("[title]").isEmpty()) {
            titleAttrOnSvgOrChild.add(element);
            patternDetected= true;
        }
        if (wellFormedSvgElements != null && !patternDetected) {
            wellFormedSvgElements.add(element);
        }
    }
}
 
Example 14
Source File: Elements.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 Get an attribute value from the first matched element that has the attribute.
 @param attributeKey The attribute key.
 @return The attribute value from the first matched element that has the attribute.. If no elements were matched (isEmpty() == true),
 or if the no elements have the attribute, returns empty string.
 @see #hasAttr(String)
 */
public String attr(String attributeKey) {
    for (Element element : this) {
        if (element.hasAttr(attributeKey))
            return element.attr(attributeKey);
    }
    return "";
}
 
Example 15
Source File: Mf2Parser.java    From indigenous-android with GNU General Public License v3.0 4 votes vote down vote up
private String parseImpliedName(Element elem) {
    if (("img".equals(elem.tagName()) || ("area".equals(elem.tagName())) && elem.hasAttr("alt"))) {
        return elem.attr("alt");
    }
    if ("abbr".equals(elem.tagName()) && elem.hasAttr("title")) {
        return elem.attr("title");
    }

    Elements children = elem.children();
    if (children.size() == 1) {
        Element child = children.first();
        // else if .h-x>img:only-child[alt]:not[.h-*] then use that img alt for name
        // else if .h-x>area:only-child[alt]:not[.h-*] then use that area alt for name
        if (!hasRootClass(child)
                && ("img".equals(child.tagName()) || "area".equals(child.tagName()))
                && child.hasAttr("alt")) {
            return child.attr("alt");
        }
        // else if .h-x>abbr:only-child[title] then use that abbr title for name
        if ("abbr".equals(child.tagName()) && child.hasAttr("title")) {
            return child.attr("title");
        }

        Elements grandChildren = child.children();
        if (grandChildren.size() == 1) {
            Element grandChild = grandChildren.first();
            // else if .h-x>:only-child>img:only-child[alt]:not[.h-*] then use that img alt for name
            // else if .h-x>:only-child>area:only-child[alt]:not[.h-*] then use that area alt for name
            if (!hasRootClass(grandChild)
                    && ("img".equals(grandChild.tagName()) || "area".equals(grandChild.tagName()))
                    && grandChild.hasAttr("alt")) {
                return grandChild.attr("alt");
            }
            // else if .h-x>:only-child>abbr:only-child[title] use that abbr title for name
            if ("abbr".equals(grandChild.tagName()) && grandChild.hasAttr("c")) {
                return grandChild.attr("title");
            }
        }
    }

    // else use the textContent of the .h-x for name
    // drop leading & trailing white-space from name, including nbsp
    return elem.text().trim();
}
 
Example 16
Source File: Evaluator.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
    return element.hasAttr(key);
}
 
Example 17
Source File: Evaluator.java    From jsoup-learning with MIT License 4 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
    return element.hasAttr(key);
}
 
Example 18
Source File: Evaluator.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
    return element.hasAttr(key) && value.equalsIgnoreCase(element.attr(key).trim());
}
 
Example 19
Source File: UntisInfoParser.java    From substitution-schedule-parser with Mozilla Public License 2.0 4 votes vote down vote up
private int getRowspan(Element cell) {
    return cell.hasAttr("rowspan") ? Integer.valueOf(cell.attr("rowspan")) : 1;
}
 
Example 20
Source File: ThechiveRipper.java    From ripme with MIT License 4 votes vote down vote up
private List<String> getUrlsFromThechive(Document doc) {
    /*
     * The image urls are stored in a <script> tag of the document. This script
     * contains a single array var by name CHIVE_GALLERY_ITEMS.
     * 
     * We grab all the <img> tags from the particular script, combine them in a
     * string, parse it, and grab all the img/gif urls.
     * 
     */
    List<String> result = new ArrayList<>();
    Elements scripts = doc.getElementsByTag("script");

    for (Element script : scripts) {
        String data = script.data();

        if (!data.contains("CHIVE_GALLERY_ITEMS")) {
            continue;
        }

        /*
         * We add all the <img/> tags in a single StringBuilder and parse as HTML for
         * easy sorting of img/ gifs.
         */
        StringBuilder allImgTags = new StringBuilder();
        Matcher matcher = imagePattern.matcher(data);
        while (matcher.find()) {
            // Unescape '\' from the img tags, which also unescape's img url as well.
            allImgTags.append(matcher.group(0).replaceAll("\\\\", ""));
        }

        // Now we parse and sort links.
        Document imgDoc = Jsoup.parse(allImgTags.toString());
        Elements imgs = imgDoc.getElementsByTag("img");
        for (Element img : imgs) {
            if (img.hasAttr("data-gifsrc")) {
                // For gifs.
                result.add(img.attr("data-gifsrc"));
            } else {
                // For jpeg images.
                result.add(img.attr("src"));
            }
        }
    }

    // strip all GET parameters from the links( such as quality, width, height as to
    // get the original image.).
    result.replaceAll(s -> s.substring(0, s.indexOf("?")));

    return result;
}