Java Code Examples for org.jsoup.Jsoup.parseBodyFragment()

The following are Jave code examples for showing how to use parseBodyFragment() of the org.jsoup.Jsoup class. You can vote up the examples you like. Your votes will be used in our system to get more good examples.
+ Save this method
Example 1
Project: ZhihuQuestionsSpider   File: ParseRegularUtil.java   View Source Code Vote up 6 votes
public static void parseZhihuTopics1(Page page, Result result) {
    String json = page.getContent();
    JSONObject object = JSON.parseObject(json);
    JSONArray array = object.getJSONArray("msg");
    if(array.size()==0) {
        result.setSkip(true);
        return;
    }
    for (int i = 0; i < array.size(); i++) {
        String topicStr = array.getString(i);
        Document doc = Jsoup.parseBodyFragment(topicStr);
        Element a = doc.body().select("div.item").first().select("a[target]").first();
        String href = "https://www.zhihu.com" + a.attr("href")+"/newest";
        result.addRequest(new Request(href, HttpMethod.GET));
    }
    Request request = new Request("https://www.zhihu.com/node/TopicsPlazzaListV2", HttpMethod.POST);
    JSONObject object1 = new JSONObject();
    object1.put("topic_id", page.getRequest().getAddch("topic_id"));
    object1.put("offset", Integer.valueOf(((Integer) page.getRequest().getAddch("offset")) + 20));
    object1.put("hash_id", "22e50cd21ed9df7085ff76d62175e986");
    request.addParame("method", "next")
            .addParame("params", object1.toJSONString()).addAttach("offset", Integer.valueOf(((Integer) page.getRequest().getAddch("offset")) + 20)).addAttach("topic_id", page.getRequest().getAddch("topic_id"));
    result.addRequest(request);
}
 
Example 2
Project: yadaframework   File: YadaWebUtil.java   View Source Code Vote up 6 votes
/**
 * Cleans the html content leaving only the following tags: b, em, i, strong, u, br, cite, em, i, p, strong, img, li, ul, ol, sup, sub, s
 * @param content html content
 * @param extraTags any other tags that you may want to keep, e. g. "a"
 * @return
 */
public String cleanContent(String content, String ... extraTags) {
	Whitelist allowedTags = Whitelist.simpleText(); // This whitelist allows only simple text formatting: b, em, i, strong, u. All other HTML (tags and attributes) will be removed.
	allowedTags.addTags("br", "cite", "em", "i", "p", "strong", "img", "li", "ul", "ol", "sup", "sub", "s");
	allowedTags.addTags(extraTags);
	allowedTags.addAttributes("p", "style"); // Serve per l'allineamento a destra e sinistra
	allowedTags.addAttributes("img", "src", "style", "class"); 
	if (Arrays.asList(extraTags).contains("a")) {
		allowedTags.addAttributes("a", "href", "target"); 
	}
	Document dirty = Jsoup.parseBodyFragment(content, "");
	Cleaner cleaner = new Cleaner(allowedTags);
	Document clean = cleaner.clean(dirty);
	clean.outputSettings().escapeMode(EscapeMode.xhtml); // Non fa l'escape dei caratteri utf-8
	String safe = clean.body().html();
	return safe;
}
 
Example 3
Project: q-mail   File: UriParserTestHelper.java   View Source Code Vote up 5 votes
public static void assertContainsLink(String expected, StringBuffer actual) {
    String linkifiedUri = actual.toString();
    Document document = Jsoup.parseBodyFragment(linkifiedUri);
    Element anchorElement = document.select("a").first();
    assertNotNull("No <a> element found", anchorElement);
    assertEquals(expected, anchorElement.text());
    assertEquals(expected, anchorElement.attr("href"));
}
 
Example 4
Project: q-mail   File: UriParserTestHelper.java   View Source Code Vote up 5 votes
public static void assertLinkOnly(String expected, StringBuffer actual) {
    String linkifiedUri = actual.toString();
    Document document = Jsoup.parseBodyFragment(linkifiedUri);
    Element anchorElement = document.select("a").first();
    assertNotNull("No <a> element found", anchorElement);
    assertEquals(expected, anchorElement.text());
    assertEquals(expected, anchorElement.attr("href"));

    assertAnchorElementIsSoleContent(document, anchorElement);
}
 
Example 5
Project: docx4j-template   File: XHTMLDocumentHandler.java   View Source Code Vote up 5 votes
/**
 * Jsoup.parse(html)
 * Jsoup.parse(html, baseUri)
 * Jsoup.parseBodyFragment(bodyHtml)
 * Jsoup.parseBodyFragment(bodyHtml, baseUri)
 */
@Override
public Document handle( String html,boolean fragment) throws IOException{
	//获取Jsoup参数
	String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,"");
	//使用Jsoup将html转换成Document对象
	Document doc = fragment ? Jsoup.parseBodyFragment( html, baseUri) : Jsoup.parse( html,baseUri);
	//返回Document对象
	return doc;
}
 
Example 6
Project: gitplex-mit   File: DefaultMarkdownManager.java   View Source Code Vote up 5 votes
@Override
public String postProcess(String html) {
	// Use a faked baseURI, otherwise all relative urls will be stripped out
	Document body = Jsoup.parseBodyFragment(html, "http://localhost/sanitize");
	
	Cleaner cleaner = new Cleaner(whiteList);
	body = cleaner.clean(body);

	for (HtmlTransformer transformer : htmlTransformers)
		transformer.transform(body);
	return body.body().html();
}
 
Example 7
Project: nifi-nars   File: GetWebpage.java   View Source Code Vote up 5 votes
/**
 * Uses Jsoup to convert from HTML to XHTML
 */
private byte[] formatToXHtml(String html, Charset charset) {
    Document document = Jsoup.parseBodyFragment(html);
    document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    document.outputSettings().charset(charset);
    return document.toString().getBytes(charset);
}
 
Example 8
Project: Cypher   File: EventListItemPresenter.java   View Source Code Vote up 4 votes
private void generateFormattedTextObjects(String text) throws IllegalArgumentException {

		Document document = Jsoup.parseBodyFragment(text);
		document.outputSettings(new Document.OutputSettings().prettyPrint(false));
		parseFormattedMessageNode(document.body(), new LinkedList<>());
	}
 
Example 9
Project: camunda-bpm-swagger   File: HtmlDocumentInterpreter.java   View Source Code Vote up 4 votes
String getText(final HtmlBlock node) {
  final Document document = Jsoup.parseBodyFragment(node.getChars().toString());
  return document.text();
}
 
Example 10
Project: camunda-bpm-swagger   File: HtmlDocumentInterpreter.java   View Source Code Vote up 4 votes
private Map<String, ParameterDescription> htmlNodeToMap(final HtmlBlock htmlBlock) {
  final String htmlBlockBody = prepareHTML(htmlBlock);
  final Document document = Jsoup.parseBodyFragment(htmlBlockBody);
  final Elements trs = document.select("tr");
  Integer nameIdx = null;
  Integer descriptionIdx = null;
  Integer typeIdx = null;
  Integer requiredIdx = null;
  final Elements ths = trs.get(0).select("th");

  if(ths.size() == 0) {
    // Workaround for missing table header
    nameIdx = 0;
    switch(trs.get(0).select("td").size()) {
    case 2:
      descriptionIdx = 1;
      break;
    case 3:
      typeIdx = 1;
      descriptionIdx = 2;
      break;
    }
  }
  for (int i = 0; i < ths.size(); i++) {
    final Element element = ths.get(i);
    switch(element.text()) {
    case "Name":
    case "Code":
    case "Form Part Name":
      nameIdx = i;
      break;
    case "Description":
      descriptionIdx = i;
      break;
    case "Media type":
    case "Type":
    case "Content Type":
    case "Value":
      typeIdx = i;
      break;
    case "Required?":
      requiredIdx = i;
      break;
    default:
      log.debug("Fieldname unknown: " + element.text());
      break;
    }
  }
  final HashMap<String, ParameterDescription> result = new HashMap<>();
  for (final Element tr : trs) {
    final Elements tds = tr.select("td");
    if (tds.size() >= 2) {
      final ParameterDescription.ParameterDescriptionBuilder builder = ParameterDescription.builder();
      Optional.ofNullable(nameIdx).map(tds::get).map(Element::text).ifPresent(builder::id);
      Optional.ofNullable(descriptionIdx).map(tds::get).map(Element::text).ifPresent(builder::description);
      Optional.ofNullable(typeIdx).map(tds::get).map(Element::text).ifPresent(builder::type);
      Optional.ofNullable(requiredIdx).map(tds::get).map(Element::text).map(o -> o.equals("Yes")).ifPresent(builder::required);
      final ParameterDescription parameterDescription = builder.build();
      result.put(parameterDescription.getId(), parameterDescription);
    }
  }
  return result;
}