Java Code Examples for org.jsoup.nodes.Element#setBaseUri()

The following examples show how to use org.jsoup.nodes.Element#setBaseUri() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SpiderCheckThread.java    From sitemonitoring-production with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
protected void findUrls(String referer, String htmlPage, Map<String, String> allPages) {
		log.debug("find urls on this web page: " + referer);
		if (abort) {
			appendMessage("aborted");
			return;
		}
		Document document = Jsoup.parse(htmlPage);
		Elements newsHeadlines = document.select("a");
		Iterator<Element> iterator = newsHeadlines.iterator();
		while (iterator.hasNext()) {
			if (abort) {
				appendMessage("aborted");
				break;
			}
			Element element = (Element) iterator.next();
			element.setBaseUri(referer);
//			System.out.println("base uri: "+ check.getUrl());
//			System.out.println("referer: "+ referer);
			String url = element.absUrl("href").trim();
			log.debug("spider check found url: " + url);
			if (!url.toString().isEmpty() && !url.startsWith("mailto:") && !SinglePageCheckService.ignoreUrl(url, check.getDoNotFollowUrls()) && url.startsWith(check.getUrl()) && !url.equals(referer)) {
				log.debug("spider check put to all pages url: " + url);
				allPages.put(url, referer);
			}
		}
	}
 
Example 2
Source File: BootstrapHandler.java    From flow with Apache License 2.0 6 votes vote down vote up
protected static void showWebpackErrors(Document document) {
    DevModeHandler devMode = DevModeHandler.getDevModeHandler();
    if (devMode != null) {
        String errorMsg = devMode.getFailedOutput();
        if (errorMsg != null) {
            // Make error lines more prominent
            errorMsg = errorMsg.replaceAll("(ERROR.+?\n)", "<b>$1</b>");

            Element errorElement = document.createElement("div");
            errorElement.setBaseUri("");
            errorElement.attr("class", "v-system-error");
            errorElement.attr("onclick",
                    "this.parentElement.removeChild(this)");
            errorElement
                    .html("<h3 style=\"display:inline;\">Webpack Error</h3>"
                            + "<h6 style=\"display:inline; padding-left:10px;\">Click to close</h6>"
                            + "<pre>" + errorMsg + "</pre>");
            document.body().appendChild(errorElement);
        }
    }
}
 
Example 3
Source File: SpiderCheckThread.java    From sitemonitoring-production with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
protected void findUrls(String referer, String htmlPage, Map<String, String> allPages) {
		log.debug("find urls on this web page: " + referer);
		if (abort) {
			appendMessage("aborted");
			return;
		}
		Document document = Jsoup.parse(htmlPage);
		Elements newsHeadlines = document.select("a");
		Iterator<Element> iterator = newsHeadlines.iterator();
		while (iterator.hasNext()) {
			if (abort) {
				appendMessage("aborted");
				break;
			}
			Element element = (Element) iterator.next();
			element.setBaseUri(referer);
//			System.out.println("base uri: "+ check.getUrl());
//			System.out.println("referer: "+ referer);
			String url = element.absUrl("href").trim();
			log.debug("spider check found url: " + url);
			if (!url.toString().isEmpty() && !url.startsWith("mailto:") && !SinglePageCheckService.ignoreUrl(url, check.getDoNotFollowUrls()) && url.startsWith(check.getUrl()) && !url.equals(referer)) {
				log.debug("spider check put to all pages url: " + url);
				allPages.put(url, referer);
			}
		}
	}
 
Example 4
Source File: FileHandlerHTMLImpl.java    From openbd-core with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Runs around all the internal links and pulls out all the URLs
 * @param doc
 * @param baseUri
 */
private void setAnchors( Document doc, String baseUri ){
	Elements links = doc.select("a[href]");
	for (Element link : links) {
		if ( baseUri != null )
			link.setBaseUri(baseUri);
		
		String newLink = link.attr("abs:href");
		if ( newLink.indexOf("#") != -1 )
			newLink	= newLink.substring( 0, newLink.indexOf("#") );

		anchors.add( newLink );
	}
}