Java Code Examples for org.jsoup.nodes.Document#body()

The following examples show how to use org.jsoup.nodes.Document#body() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: NClientV2   File: LoginWebView.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void fetchUrl(String url, String html) {
    Document jsoup=Jsoup.parse(html);
    Element body=jsoup.body();
    Element form=body.getElementsByTag("form").first();
    body.getElementsByClass("lead").first().text("Tested");
    form.tagName("div");
    form.before("<script>\n" +
            "document.getElementsByClassName('lead')[0].innerHTML='test';\n"+
            "alert('test');\n"+
            "function intercept(){\n" +
            "    password=document.getElementById('id_password').value;\n" +
            "    email=document.getElementById('id_username_or_email').value;\n" +
            "    token=document.getElementsByName('csrfmiddlewaretoken')[0].value;\n" +
            "    captcha=document.getElementById('g-recaptcha-response').value;\n" +
            "     Interceptor.intercept(email,password,token,captcha);\n" +
            "}\n" +
            "</script>");
    form.getElementsByAttributeValue("type","submit").first().attr("onclick","intercept()");
    removeFetcher(fetcher);
    String encodedHtml = Base64.encodeToString(jsoup.outerHtml().getBytes(), Base64.NO_PADDING);
    loadDataWithBaseURL(Utility.getBaseUrl(), encodedHtml,"text/html","base64",null);
}
 
Example 2
Source Project: flow   File: BootstrapHandlerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void renderUI() throws IOException {
    TestUI anotherUI = new TestUI();
    initUI(testUI);
    anotherUI.getInternals().setSession(session);
    VaadinRequest vaadinRequest = createVaadinRequest();
    anotherUI.doInit(vaadinRequest, 0);
    anotherUI.getRouter().initializeUI(anotherUI, request);
    anotherUI.getInternals()
            .setContextRoot(contextRootRelativePath(request));
    BootstrapContext bootstrapContext = new BootstrapContext(vaadinRequest,
            null, session, anotherUI, this::contextRootRelativePath);

    Document page = pageBuilder.getBootstrapPage(bootstrapContext);
    Element body = page.body();

    assertEquals(2, body.childNodeSize());
    assertEquals("noscript", body.child(0).tagName());
}
 
Example 3
@Test public void createsDocumentStructure() {
    String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>";
    Document doc = Jsoup.parse(html);
    Element head = doc.head();
    Element body = doc.body();

    assertEquals(1, doc.children().size()); // root node: contains html node
    assertEquals(2, doc.child(0).children().size()); // html node: head and body
    assertEquals(3, head.children().size());
    assertEquals(1, body.children().size());

    assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name"));
    assertEquals(0, body.getElementsByTag("meta").size());
    assertEquals("jsoup", doc.title());
    assertEquals("Hello world", body.text());
    assertEquals("Hello world", body.children().get(0).text());
}
 
Example 4
Source Project: astor   File: SelectorTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test @MultiLocaleTest public void containsData() {
    String html = "<p>function</p><script>FUNCTION</script><style>item</style><span><!-- comments --></span>";
    Document doc = Jsoup.parse(html);
    Element body = doc.body();

    Elements dataEls1 = body.select(":containsData(function)");
    Elements dataEls2 = body.select("script:containsData(function)");
    Elements dataEls3 = body.select("span:containsData(comments)");
    Elements dataEls4 = body.select(":containsData(o)");
    Elements dataEls5 = body.select("style:containsData(ITEM)");

    assertEquals(2, dataEls1.size()); // body and script
    assertEquals(1, dataEls2.size());
    assertEquals(dataEls1.last(), dataEls2.first());
    assertEquals("<script>FUNCTION</script>", dataEls2.outerHtml());
    assertEquals(1, dataEls3.size());
    assertEquals("span", dataEls3.first().tagName());
    assertEquals(3, dataEls4.size());
    assertEquals("body", dataEls4.first().tagName());
    assertEquals("script", dataEls4.get(1).tagName());
    assertEquals("span", dataEls4.get(2).tagName());
    assertEquals(1, dataEls5.size());
}
 
Example 5
@Test public void createsDocumentStructure() {
    String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>";
    Document doc = Jsoup.parse(html);
    Element head = doc.head();
    Element body = doc.body();

    assertEquals(1, doc.children().size()); // root node: contains html node
    assertEquals(2, doc.child(0).children().size()); // html node: head and body
    assertEquals(3, head.children().size());
    assertEquals(1, body.children().size());

    assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name"));
    assertEquals(0, body.getElementsByTag("meta").size());
    assertEquals("jsoup", doc.title());
    assertEquals("Hello world", body.text());
    assertEquals("Hello world", body.children().get(0).text());
}
 
Example 6
Source Project: astor   File: SelectorTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test @MultiLocaleTest public void containsData() {
    String html = "<p>function</p><script>FUNCTION</script><style>item</style><span><!-- comments --></span>";
    Document doc = Jsoup.parse(html);
    Element body = doc.body();

    Elements dataEls1 = body.select(":containsData(function)");
    Elements dataEls2 = body.select("script:containsData(function)");
    Elements dataEls3 = body.select("span:containsData(comments)");
    Elements dataEls4 = body.select(":containsData(o)");
    Elements dataEls5 = body.select("style:containsData(ITEM)");

    assertEquals(2, dataEls1.size()); // body and script
    assertEquals(1, dataEls2.size());
    assertEquals(dataEls1.last(), dataEls2.first());
    assertEquals("<script>FUNCTION</script>", dataEls2.outerHtml());
    assertEquals(1, dataEls3.size());
    assertEquals("span", dataEls3.first().tagName());
    assertEquals(3, dataEls4.size());
    assertEquals("body", dataEls4.first().tagName());
    assertEquals("script", dataEls4.get(1).tagName());
    assertEquals("span", dataEls4.get(2).tagName());
    assertEquals(1, dataEls5.size());
}
 
Example 7
Source Project: emotional_analysis   File: IpProxy.java    License: Apache License 2.0 5 votes vote down vote up
public static List<IpEntity> getProxyIp(String url) throws Exception{
	ArrayList<IpEntity> ipList = new ArrayList<>();
	Response execute = Jsoup.connect(url)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "max-age=60").header("Accept", "*/*")
			.header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6").header("Connection", "keep-alive")
			.header("Referer", "http://music.163.com/song?id=186016")
			.header("Origin", "http://music.163.com").header("Host", "music.163.com")
			.header("Content-Type", "application/x-www-form-urlencoded")
			.header("Cookie",
					"UM_distinctid=15e9863cf14335-0a09f939cd2af9-6d1b137c-100200-15e9863cf157f1; vjuids=414b87eb3.15e9863cfc1.0.ec99d6f660d09; _ntes_nnid=4543481cc76ab2fd3110ecaafd5f1288,1505795231854; _ntes_nuid=4543481cc76ab2fd3110ecaafd5f1288; __s_=1; __gads=ID=6cbc4ab41878c6b9:T=1505795247:S=ALNI_MbCe-bAY4kZyMbVKlS4T2BSuY75kw; usertrack=c+xxC1nMphjBCzKpBPJjAg==; NTES_CMT_USER_INFO=100899097%7Cm187****4250%7C%7Cfalse%7CbTE4NzAzNDE0MjUwQDE2My5jb20%3D; [email protected]|1507178162|2|mail163|00&99|CA&1506163335&mail163#hun&430800#10#0#0|187250&1|163|[email protected]; vinfo_n_f_l_n3=8ba0369be425c0d2.1.7.1505795231863.1507950353704.1508150387844; vjlast=1505795232.1508150167.11; Province=0450; City=0454; _ga=GA1.2.1044198758.1506584097; _gid=GA1.2.763458995.1508907342; JSESSIONID-WYYY=Zm%2FnBG6%2B1vb%2BfJp%5CJP8nIyBZQfABmnAiIqMM8fgXABoqI0PdVq%2FpCsSPDROY1APPaZnFgh14pR2pV9E0Vdv2DaO%2BKkifMncYvxRVlOKMEGzq9dTcC%2F0PI07KWacWqGpwO88GviAmX%2BVuDkIVNBEquDrJ4QKhTZ2dzyGD%2Bd2T%2BbiztinJ%3A1508946396692; _iuqxldmzr_=32; playerid=20572717; MUSIC_U=39d0b2b5e15675f10fd5d9c05e8a5d593c61fcb81368d4431bab029c28eff977d4a57de2f409f533b482feaf99a1b61e80836282123441c67df96e4bf32a71bc38be3a5b629323e7bf122d59fa1ed6a2; __remember_me=true; __csrf=2032a8f34f1f92412a49ba3d6f68b2db; __utma=94650624.1044198758.1506584097.1508939111.1508942690.40; __utmb=94650624.20.10.1508942690; __utmc=94650624; __utmz=94650624.1508394258.18.4.utmcsr=xujin.org|utmccn=(referral)|utmcmd=referral|utmcct=/")
			.method(Method.GET).ignoreContentType(true)
			.timeout(2099999999).execute();
	Document pageJson = execute.parse();
	Element body = pageJson.body();
	List<Node> childNodes = body.childNode(11).childNode(3).childNode(5).childNode(1).childNodes();
	//把前10位的代理IP放到List中
	for(int i = 2;i <= 30;i += 2){
		IpEntity ipEntity = new IpEntity();
		Node node = childNodes.get(i);
		List<Node> nodes = node.childNodes();
		String ip = nodes.get(3).childNode(0).toString();
		int port = Integer.parseInt(nodes.get(5).childNode(0).toString());
		ipEntity.setIp(ip);
		ipEntity.setPort(port);
		ipList.add(ipEntity);
	}
	return ipList;
}
 
Example 8
Source Project: ShareBox   File: BaseSoup.java    License: Apache License 2.0 5 votes vote down vote up
public Map<String, Object> doParse(Object... arg) {
    mArguments = arg;
    if (mValues == null) {
        mValues = new HashMap<>();
    }
    Document doc = Jsoup.parse(mHtml);
    mHeader = doc.head();
    mBody = doc.body();
    parse(doc, mHeader, mBody, mValues);
    return mValues;
}
 
Example 9
Source Project: springboot-admin   File: JsoupUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static String getBodyHtml(String html) {
	if (StringUtils.isNotBlank(html)) {
		Document document = Jsoup.parse(html);
		if (null != document && document.body() != null) {
			return document.body().html().toString();
		}
	}
	return html;
}
 
Example 10
Source Project: baleen   File: RemoveEmptyText.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void manipulate(Document document) {
  Element body = document.body();

  while (!removeEmpty(body)) {
    // Repeat as needed.... work done in the while
  }
}
 
Example 11
Source Project: zeppelin   File: ZeppelinRDisplay.java    License: Apache License 2.0 5 votes vote down vote up
public static RDisplay render( String html, String imageWidth) {

    Document document = Jsoup.parse(html);
    document.outputSettings().prettyPrint(false);

    Element body = document.body();

    if (body.getElementsByTag("p").isEmpty()) {
      return new RDisplay(body.html(), Type.HTML, Code.SUCCESS);
    }

    String bodyHtml = body.html();

    if (! bodyHtml.contains("<img")
      &&  ! bodyHtml.contains("<script")
      && ! bodyHtml.contains("%html ")
      && ! bodyHtml.contains("%table ")
      && ! bodyHtml.contains("%img ")
    ) {
      return textDisplay(body);
    }

    if (bodyHtml.contains("%table")) {
      return tableDisplay(body);
    }

    if (bodyHtml.contains("%img")) {
      return imgDisplay(body);
    }

    return htmlDisplay(body, imageWidth);
  }
 
Example 12
Source Project: astor   File: Parser.java    License: GNU General Public License v2.0 5 votes vote down vote up
/**
 * Parse a fragment of HTML into the {@code body} of a Document.
 *
 * @param bodyHtml fragment of HTML
 * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
 *
 * @return Document, with empty head, and HTML parsed into body
 */
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
    Document doc = Document.createShell(baseUri);
    Element body = doc.body();
    List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
    Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented
    for (int i = nodes.length - 1; i > 0; i--) {
        nodes[i].remove();
    }
    for (Node node : nodes) {
        body.appendChild(node);
    }
    return doc;
}
 
Example 13
Source Project: astor   File: Parser.java    License: GNU General Public License v2.0 5 votes vote down vote up
/**
 * Parse a fragment of HTML into the {@code body} of a Document.
 *
 * @param bodyHtml fragment of HTML
 * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
 *
 * @return Document, with empty head, and HTML parsed into body
 */
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
    Document doc = Document.createShell(baseUri);
    Element body = doc.body();
    List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
    Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented
    for (int i = nodes.length - 1; i > 0; i--) {
        nodes[i].remove();
    }
    for (Node node : nodes) {
        body.appendChild(node);
    }
    return doc;
}
 
Example 14
@Test public void parsesComments() {
    String html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --><p>Hello</p></body></html>";
    Document doc = Jsoup.parse(html);

    Element body = doc.body();
    Comment comment = (Comment) body.childNode(1); // comment should not be sub of img, as it's an empty tag
    assertEquals(" <table><tr><td></table> ", comment.getData());
    Element p = body.child(1);
    TextNode text = (TextNode) p.childNode(0);
    assertEquals("Hello", text.getWholeText());
}
 
Example 15
Source Project: jsoup-learning   File: Parser.java    License: MIT License 5 votes vote down vote up
/**
 * Parse a fragment of HTML into the {@code body} of a Document.
 *
 * @param bodyHtml fragment of HTML
 * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
 *
 * @return Document, with empty head, and HTML parsed into body
 */
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
    Document doc = Document.createShell(baseUri);
    Element body = doc.body();
    List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
    Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented
    for (Node node : nodes) {
        body.appendChild(node);
    }
    return doc;
}
 
Example 16
Source Project: firing-range   File: TagServlet.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException {
  if (request.getParameter("q") == null) {
    Responses.sendError(response, "Missing q parameter", 400);
    return;
  }

  String  q = request.getParameter("q");
  Document doc = Jsoup.parseBodyFragment(q);
  Element body = doc.body();
  Elements elements = body.getAllElements();
  if (!(q.contains("body"))){
    elements.remove(body);
  }

  if (elements.isEmpty()) {
    Responses.sendError(response, "Invalid input, no tags", 400);
    return;
  }

  String allowedTag = "";
  String allowedAttribute = "";
  if (request.getPathInfo() != null) {
    String pathInfo = request.getPathInfo().substring(1);
    if (pathInfo.contains("/")) {
      allowedTag = pathInfo.split("/", 2)[0];
      allowedAttribute = pathInfo.split("/")[1];
    } else {
      allowedTag = pathInfo;
    }      
  }
  handleRequest(elements, response, allowedTag, allowedAttribute);
}
 
Example 17
Source Project: rebuild   File: SMSender.java    License: GNU General Public License v3.0 4 votes vote down vote up
/**
 * @return
 * @throws IOException
 */
protected static Element getMailTemplate() throws IOException {
	File tmp = SysConfiguration.getFileOfRes("locales/email_zh-CN.html");
	Document html = Jsoup.parse(tmp, "utf-8");
	return html.body();
}
 
Example 18
Source Project: baleen   File: AbstractHtmlConsumer.java    License: Apache License 2.0 4 votes vote down vote up
@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {
  final File f = getFileName(jCas);
  final DocumentAnnotation da = getDocumentAnnotation(jCas);

  final Document doc =
      Jsoup.parse("<!DOCTYPE html>\n<html lang=\"" + da.getLanguage() + "\"></html>");
  doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
  final Element head = doc.head();

  if (!Strings.isNullOrEmpty(css)) {
    final Element cssLink = head.appendElement("link");
    cssLink.attr("rel", "stylesheet");
    cssLink.attr("href", css);
  }

  final Element charset = head.appendElement("meta");
  charset.attr("charset", "utf-8");

  appendMeta(head, "document.type", da.getDocType());
  appendMeta(head, "document.sourceUri", da.getSourceUri());
  appendMeta(head, "externalId", da.getHash());

  appendMeta(head, "document.classification", da.getDocumentClassification());
  appendMeta(
      head,
      "document.caveats",
      String.join(",", UimaTypesUtils.toArray(da.getDocumentCaveats())));
  appendMeta(
      head,
      "document.releasability",
      String.join(",", UimaTypesUtils.toArray(da.getDocumentReleasability())));

  String title = null;
  for (final Metadata md : JCasUtil.select(jCas, Metadata.class)) {
    appendMeta(head, md.getKey(), md.getValue());
    if ("documentTitle".equalsIgnoreCase(md.getKey())) {
      title = md.getValue();
    }
  }

  if (!Strings.isNullOrEmpty(title)) {
    doc.title(title);
  }

  final Element body = doc.body();

  writeBody(jCas, body);

  try {
    FileUtils.writeStringToFile(f, doc.html(), Charset.defaultCharset());
  } catch (final IOException e) {
    throw new AnalysisEngineProcessException(e);
  }
}
 
Example 19
Source Project: something.apk   File: ThreadPageRequest.java    License: MIT License 4 votes vote down vote up
public static ThreadPage processThreadPage(Document document, boolean showImages, boolean showAvatars, boolean hidePreviouslyReadImages, long jumpToPost, String redirectedUrl){
    ArrayList<HashMap<String, String>> posts = new ArrayList<HashMap<String, String>>();

    int currentPage, maxPage = 1, threadId, forumId, unread;
    String jumpToId = jumpToPost > 0 ? "#post"+jumpToPost : null;

    String ptiFragment = null;
    if(!TextUtils.isEmpty(redirectedUrl)){
        Uri url = Uri.parse(redirectedUrl);
        ptiFragment = url.getFragment();
        if("lastpost".matches(ptiFragment)){
            ptiFragment = null;
            jumpToId = "#lastpost";
        }
    }


    Element pages = document.getElementsByClass("pages").first();
    currentPage = FastUtils.safeParseInt(pages.getElementsByAttribute("selected").attr("value"), 1);
    Element lastPage = pages.getElementsByTag("option").last();
    if(lastPage != null){
        maxPage = FastUtils.safeParseInt(lastPage.attr("value"), 1);
    }

    boolean bookmarked = document.getElementsByClass("unbookmark").size() > 0;

    String threadTitle = TextUtils.htmlEncode(document.getElementsByClass("bclast").first().text());

    Element body = document.body();
    forumId = Integer.parseInt(body.attr("data-forum"));
    threadId = Integer.parseInt(body.attr("data-thread"));

    Elements threadbars = document.getElementsByClass("threadbar");
    boolean canReply = !Constants.isArchiveForum(forumId) && threadbars.first().getElementsByAttributeValueContaining("src", "images/forum-closed.gif").size() == 0;

    unread = parsePosts(document, posts, showImages, showAvatars, hidePreviouslyReadImages, ptiFragment, canReply, currentPage == maxPage, forumId);

    StringBuilder builder = new StringBuilder(2048);

    int previouslyRead = posts.size()-unread;

    HashMap<String, String> headerArgs = new HashMap<String, String>();
    headerArgs.put("jumpToPostId", jumpToId);
    headerArgs.put("fontSize", SomePreferences.fontSize);
    headerArgs.put("theme", getTheme(forumId));
    headerArgs.put("previouslyRead", previouslyRead > 0 && unread > 0 ? previouslyRead+" Previous Post"+(previouslyRead > 1 ? "s":"") : null);
    MustCache.applyHeaderTemplate(builder, headerArgs);

    for(HashMap<String, String> post : posts){
        MustCache.applyPostTemplate(builder, post);
    }

    MustCache.applyFooterTemplate(builder, null);

    ThreadItem cachedThread = ThreadManager.getThread(threadId);
    if(cachedThread != null){
        cachedThread.updateUnreadCount(currentPage, maxPage, SomePreferences.threadPostPerPage);
    }

    return new ThreadPage(builder.toString(), currentPage, maxPage, threadId, forumId, threadTitle, -unread, bookmarked, canReply);

}
 
Example 20
Source Project: firing-range   File: Expression.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException {
  if (request.getParameter("q") == null) {
    Responses.sendError(response, "Missing q parameter", 400);
    return;
  }

  String  q = request.getParameter("q");
  Document doc = Jsoup.parseBodyFragment(q);
  Element body = doc.body();
  Elements elements = body.getAllElements();
  elements.remove(body);
  if (elements.isEmpty()) {
    Responses.sendError(response, "Invalid input, no tags", 400);
    return;
  }

  StringBuilder res = new StringBuilder();
  for (Element element : elements) {
    boolean validElement = true;

    Attributes attributes = element.attributes();
    for (Attribute attribute : attributes) {
      if (attribute.getKey().toLowerCase().startsWith("on")
          || attribute.getKey().toLowerCase().equals("href")
          || attribute.getKey().toLowerCase().equals("src")) {
        validElement = false;
      }

      if (attribute.getKey().toLowerCase().equals("style")
          && attribute.getValue().toLowerCase().contains("expression")) {
        validElement = false;
      }
    }

    if (validElement) {
      res.append(element.toString());
    }
  }
  Responses.sendXssed(response, res.toString());
}