Java Code Examples for org.jsoup.Jsoup#parseBodyFragment()

The following examples show how to use org.jsoup.Jsoup#parseBodyFragment() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Test
public void testWithSimpleMapper() throws UIMAException {
  JCas jCas = JCasSingleton.getJCasInstance();
  DocumentToJCasConverter converter =
      new DocumentToJCasConverter(Collections.singletonList(new MapOnlyP()));

  Document doc = Jsoup.parseBodyFragment("<p>Hello</p><pre>Something\nFormatted</pre>");

  converter.apply(doc, jCas);

  assertEquals("HelloSomething\nFormatted", jCas.getDocumentText());

  Collection<Paragraph> select = JCasUtil.select(jCas, Paragraph.class);
  assertEquals(select.size(), 1);
  Paragraph p = select.iterator().next();
  assertEquals(p.getCoveredText(), "Hello");
}
 
Example 2
/**
 * 解析上传的文件完整路径名称
 * @param html 富文本内容
 * @param item 项目
 * @return 
 */
public Map<String,String> analysisFullFileName(String html,String item){
	Map<String,String> fullFileNameMap = new HashMap<String,String>();//key:文件完整路径名称 value:文件名称
	if(!StringUtils.isBlank(html)){
		Document doc = Jsoup.parseBodyFragment(html);

		Elements file_els = doc.select("a[href]");  
		for (Element element : file_els) {  
			String fileUrl = element.attr("href");
			String fileName = element.text();
			if(fileUrl != null && !"".equals(fileUrl.trim())){
				if(StringUtils.startsWithIgnoreCase(fileUrl, "file/"+item+"/")){
					fullFileNameMap.put(fileUrl.trim(),fileName);
				}
			}
		}
	}
	
	return fullFileNameMap;
}
 
Example 3
Source Project: jinjava   File: BatchFilterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void batchFilterNoBackfill() {
  Map<String, Object> context = ImmutableMap.of(
    "items",
    (Object) Lists.newArrayList("1", "2", "3", "4", "5", "6")
  );

  Document dom = Jsoup.parseBodyFragment(render("batch-filter", context));
  assertThat(dom.select("tr")).hasSize(2);

  Elements trs = dom.select("tr");
  assertThat(trs.get(0).select("td")).hasSize(3);
  assertThat(trs.get(0).select("td").get(0).text()).isEqualTo("1");
  assertThat(trs.get(0).select("td").get(1).text()).isEqualTo("2");
  assertThat(trs.get(0).select("td").get(2).text()).isEqualTo("3");
  assertThat(trs.get(1).select("td")).hasSize(3);
  assertThat(trs.get(1).select("td").get(0).text()).isEqualTo("4");
  assertThat(trs.get(1).select("td").get(1).text()).isEqualTo("5");
  assertThat(trs.get(1).select("td").get(2).text()).isEqualTo("6");
}
 
Example 4
@Test
public void testGenerateBasicStringPropertyWithMaxlength() throws Exception
{
   Map<String, Object> root = createInspectionResultWrapper(ENTITY_NAME, STRING_PROP_WITH_MAX_LEN);

   Resource<URL> templateResource = resourceFactory.create(getClass().getResource(
            Deployments.BASE_PACKAGE_PATH + Deployments.BASIC_PROPERTY_DETAIL_INCLUDE));
   Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
   String output = processor.process(root);
   Document html = Jsoup.parseBodyFragment(output);
   assertThat(output.trim(), not(equalTo("")));

   Elements container = html.select("div.form-group");
   assertThat(container, notNullValue());
   assertThat(container.attr("ng-class"), not(equalTo("")));

   Elements formInputElement = html.select("div.form-group input");
   assertThat(formInputElement.attr("id"), equalTo("fullName"));
   assertThat(formInputElement.attr("type"), equalTo("text"));
   assertThat(formInputElement.attr("ng-model"), equalTo(StringUtils.camelCase(ENTITY_NAME) + "." + "fullName"));
   assertThat(formInputElement.attr("ng-maxlength"), equalTo("100"));
}
 
Example 5
Source Project: flow   File: Html.java    License: Apache License 2.0 6 votes vote down vote up
private void setOuterHtml(String outerHtml) {
    Document doc = Jsoup.parseBodyFragment(outerHtml);
    int nrChildren = doc.body().children().size();
    if (nrChildren != 1) {
        String message = "HTML must contain exactly one top level element (ignoring text nodes). Found "
                + nrChildren;
        if (nrChildren > 1) {
            String tagNames = doc.body().children().stream()
                    .map(org.jsoup.nodes.Element::tagName)
                    .collect(Collectors.joining(", "));
            message += " elements with the tag names " + tagNames;
        }
        throw new IllegalArgumentException(message);
    }

    org.jsoup.nodes.Element root = doc.body().child(0);
    Attributes attrs = root.attributes();

    Component.setElement(this, new Element(root.tagName()));
    attrs.forEach(this::setAttribute);

    doc.outputSettings().prettyPrint(false);
    setInnerHtml(root.html());

}
 
Example 6
Source Project: jinjava   File: BatchFilterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void batchFilterFillMissing() {
  Map<String, Object> context = ImmutableMap.of(
    "items",
    (Object) Lists.newArrayList("1", "2", "3", "4")
  );

  Document dom = Jsoup.parseBodyFragment(render("batch-filter", context));
  assertThat(dom.select("tr")).hasSize(2);

  Elements trs = dom.select("tr");
  assertThat(trs.get(0).select("td")).hasSize(3);
  assertThat(trs.get(0).select("td").get(0).text()).isEqualTo("1");
  assertThat(trs.get(0).select("td").get(1).text()).isEqualTo("2");
  assertThat(trs.get(0).select("td").get(2).text()).isEqualTo("3");
  assertThat(trs.get(1).select("td")).hasSize(3);
  assertThat(trs.get(1).select("td").get(0).text()).isEqualTo("4");
  assertThat(trs.get(1).select("td").get(1).text()).isEqualTo("foo");
  assertThat(trs.get(1).select("td").get(2).text()).isEqualTo("foo");
}
 
Example 7
@Test
public void testGenerateBasicStringProperty() throws Exception
{
   Map<String, Object> root = createInspectionResultWrapper(ENTITY_NAME, BASIC_STRING_PROP);

   Resource<URL> templateResource = resourceFactory.create(getClass().getResource(
            Deployments.BASE_PACKAGE_PATH + Deployments.BASIC_PROPERTY_DETAIL_INCLUDE));
   Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
   String output = processor.process(root);
   Document html = Jsoup.parseBodyFragment(output);
   assertThat(output.trim(), not(equalTo("")));

   Elements container = html.select("div.form-group");
   assertThat(container, notNullValue());
   assertThat(container.attr("ng-class"), not(equalTo("")));

   Elements formInputElement = html.select("div.form-group input");
   assertThat(formInputElement.attr("id"), equalTo("fullName"));
   assertThat(formInputElement.attr("type"), equalTo("text"));
   assertThat(formInputElement.attr("ng-model"), equalTo(StringUtils.camelCase(ENTITY_NAME) + "." + "fullName"));
}
 
Example 8
@Test
public void testGenerateBasicStringPropertyWithMinlength() throws Exception
{
   Map<String, Object> root = createInspectionResultWrapper(ENTITY_NAME, STRING_PROP_WITH_MIN_LEN);

   Resource<URL> templateResource = resourceFactory.create(getClass().getResource(
            Deployments.BASE_PACKAGE_PATH + Deployments.BASIC_PROPERTY_DETAIL_INCLUDE));
   Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
   String output = processor.process(root);
   Document html = Jsoup.parseBodyFragment(output);
   assertThat(output.trim(), not(equalTo("")));

   Elements container = html.select("div.form-group");
   assertThat(container, notNullValue());
   assertThat(container.attr("ng-class"), not(equalTo("")));

   Elements formInputElement = html.select("div.form-group input");
   assertThat(formInputElement.attr("id"), equalTo("fullName"));
   assertThat(formInputElement.attr("type"), equalTo("text"));
   assertThat(formInputElement.attr("ng-model"), equalTo(StringUtils.camelCase(ENTITY_NAME) + "." + "fullName"));
   assertThat(formInputElement.attr("ng-minlength"), equalTo("5"));
}
 
Example 9
@Test
public void testGenerateBasicBooleanProperty() throws Exception {
    Map<String, Object> root = TestHelpers.createInspectionResultWrapper(ENTITY_NAME, BOOLEAN_PROP);

    Resource<URL> templateResource = resourceFactory.create(getClass().getResource(Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_FORM_INPUT));
    Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
    String output = processor.process(root);
    Document html = Jsoup.parseBodyFragment(output);
    assertThat(output.trim(), not(equalTo("")));

    Elements container = html.select("div.form-group");
    assertThat(container, notNullValue());

    Elements formInputElement = container.select("div.col-sm-10 > select");
    assertThat(formInputElement.attr("id"), equalTo("optForMail"));
    assertThat(formInputElement.attr("ng-model"), equalTo("search" + "." + "optForMail"));
}
 
Example 10
Source Project: baleen   File: RemoveEmptyTextTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNonEmpty() {

  Document doc = Jsoup.parseBodyFragment("<p>Hello</p>");
  m.manipulate(doc);

  assertFalse(doc.body().select("p").isEmpty());
}
 
Example 11
Source Project: tieba-api   File: TieBaApi.java    License: MIT License 5 votes vote down vote up
/**
 * 获取贴吧首页帖子tid列表
 * @param tbName 贴吧id
 * @param replyNum 定义标志 根据回复数筛选(回复为0的帖子,抢二楼专用)
 * @return 帖子tid 数组
 * 帖子链接:https://tieba.baidu.com/p/  + tid
 */
public List<String> getIndexTList(String tbName, Integer replyNum){
	List<String> list = new ArrayList<String>();
	try {
		HttpResponse response = hk.execute(Constants.TIEBA_GET_URL + "/f?kw=" + tbName + "&fr=index");
		String result = EntityUtils.toString(response.getEntity());
		if(StrKit.notBlank(result) && response.getStatusLine().getStatusCode() == 200){
			Document doc_thread = Jsoup.parse(result);
			//解析出帖子code块
			String tcode = doc_thread.getElementById("pagelet_html_frs-list/pagelet/thread_list")
							.html()
							.replace("<!--", "")
							.replace("-->", "");
			//放入新的body解析
			Document doc = Jsoup.parseBodyFragment(tcode);
			Elements link  = doc.getElementsByAttributeValue("class", "j_th_tit "); //帖子链接(获取tid)
			Elements data  = doc.getElementsByAttributeValueMatching("class", "j_thread_list.* clearfix"); //回复数,是否置顶 data-field
			for (int i = 0; i < link.size(); i++) {
				Element element = link.get(i);
				Integer reply= (Integer) JsonKit.getInfo("reply_num",data.get(i).attr("data-field"));
				Object isTop = JsonKit.getInfo("is_top",data.get(i).attr("data-field"));
				if(isTop != null && ("1".equals(isTop.toString()) || "true".equals(isTop.toString()))){//是置顶贴,默认不回复 所以在这里过滤掉
					continue;
				}
				if(replyNum != null){
					if(reply.intValue() == replyNum.intValue()){
						list.add(element.attr("href").substring(3));
					}
				}else{
					list.add(element.attr("href").substring(3));
				}
			}
		}
	} catch (Exception e) {
		logger.error(e.getMessage(), e);
	}
	return list;
}
 
Example 12
@Test public void handlesQuotesInCommentsInScripts() {
    String html = "<script>\n" +
            "  <!--\n" +
            "    document.write('</scr' + 'ipt>');\n" +
            "  // -->\n" +
            "</script>";
    Document node = Jsoup.parseBodyFragment(html);
    assertEquals("<script>\n" +
            "  <!--\n" +
            "    document.write('</scr' + 'ipt>');\n" +
            "  // -->\n" +
            "</script>", node.body().html());
}
 
Example 13
Source Project: jinjava   File: ForTagTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void forLoopMultipleLoopVars() {
  Map<String, Object> dict = Maps.newHashMap();
  dict.put("foo", "one");
  dict.put("bar", 2L);

  context.put("the_dictionary", dict);
  TagNode tagNode = (TagNode) fixture("multiple-loop-vars");
  Document dom = Jsoup.parseBodyFragment(tag.interpret(tagNode, interpreter));

  assertThat(dom.select("p")).hasSize(2);
}
 
Example 14
/**
 * 处理视频播放器标签
 * @param html 富文本内容
 * @param tagId 话题标签  -1表示管理后台打开链接,不校验权限
 * @param secret 密钥
 * @return
 */
public String processVideoPlayer(String html,Long tagId,String secret){
	
	if(!StringUtils.isBlank(html)){
		Document doc = Jsoup.parseBodyFragment(html);
		Elements elements = doc.select("video");  
		for (Element element : elements) {
			//标签src属性
			String src = element.attr("src"); 

			element.removeAttr("src"); 
			//替换当前标签为<player>标签
			element.tagName("player");
			
			
			String url = "";
			if(secret != null && !"".equals(secret.trim())){
				url = SecureLink.createVideoRedirectLink(src,tagId,secret);
			}else{
				url = src;
			}
			element.attr("url",url); 
		
			
		}
		//prettyPrint(是否重新格式化)、outline(是否强制所有标签换行)、indentAmount(缩进长度)    doc.outputSettings().indentAmount(0).prettyPrint(false);
		doc.outputSettings().prettyPrint(false);
		html = doc.body().html();
	}
	return html;
}
 
Example 15
Source Project: baleen   File: RemoveEmptyTextTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMixedEmpty() {

  Document doc = Jsoup.parseBodyFragment("<p></p><div></div><p>Hello</p>");
  m.manipulate(doc);

  assertEquals(doc.body().select("p").size(), 1);
}
 
Example 16
@Test
public void testNoMarking() {
  Document doc = Jsoup.parseBodyFragment("<p>This is some text</p>");
  m.manipulate(doc);

  assertEquals(doc.body().text(), "This is some text");
}
 
Example 17
@Test
public void testGenerateOneToOneProperty() throws Exception
{
   Map<String, String> voucherProperties = new HashMap<String, String>();
   String oneToOneProperty = "voucher";
   voucherProperties.put("name", oneToOneProperty);
   voucherProperties.put("label", StringUtils.uncamelCase(oneToOneProperty));
   voucherProperties.put("type", "com.example.scaffoldtester.model.DiscountVoucher");
   voucherProperties.put("one-to-one", "true");
   voucherProperties.put("simpleType", "DiscountVoucher");
   voucherProperties.put("optionLabel", "id");

   List<Map<String, ? extends Object>> properties = new ArrayList<Map<String, ? extends Object>>();
   properties.add(voucherProperties);

   Map<String, Object> root = new HashMap<String, Object>();
   root.put("entityName", "SampleEntity");
   root.put("pluralizedEntityName", "SampleEntities");
   root.put("entityId", oneToOneProperty);
   root.put("properties", properties);
   Resource<URL> templateResource = resourceFactory.create(getClass().getResource(
            Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_RESULTS));
   Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
   String output = processor.process(root);
   Document html = Jsoup.parseBodyFragment(output);
   assertThat(output.trim(), not(equalTo("")));

   Elements headers = html.select("table > thead > tr > th");
   assertThat(headers.size(), equalTo(1));
   assertThat(headers.text(), equalTo("Voucher"));

   Elements resultRows = html.select("table > tbody > tr");
   assertThat(resultRows.attr("ng-repeat"), containsString("result in filteredResults"));

   Elements resultCells = resultRows.select(" > td");
   assertThat(resultCells.size(), equalTo(1));
   assertThat(resultCells.select("a").attr("href"), equalTo("#/" + "SampleEntities" + "/edit/{{result.voucher}}"));
   assertThat(resultCells.select("a").text(), equalTo("{{result.voucher.id}}"));
}
 
Example 18
@Test
public void testGenerateBasicNumberProperty() throws Exception
{
   Map<String, String> ageProperties = new HashMap<String, String>();
   String basicNumberProperty = "age";
   ageProperties.put("name", basicNumberProperty);
   ageProperties.put("label", StringUtils.uncamelCase(basicNumberProperty));
   ageProperties.put("type", "number");

   List<Map<String, ? extends Object>> properties = new ArrayList<Map<String, ? extends Object>>();
   properties.add(ageProperties);

   Map<String, Object> root = new HashMap<String, Object>();
   root.put("entityName", "SampleEntity");
   root.put("pluralizedEntityName", "SampleEntities");
   root.put("entityId", basicNumberProperty);
   root.put("properties", properties);
   Resource<URL> templateResource = resourceFactory.create(getClass().getResource(
            Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_RESULTS));
   Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
   String output = processor.process(root);
   Document html = Jsoup.parseBodyFragment(output);
   assertThat(output.trim(), not(equalTo("")));

   Elements headers = html.select("table > thead > tr > th");
   assertThat(headers.size(), equalTo(1));
   assertThat(headers.text(), equalTo("Age"));

   Elements resultRows = html.select("table > tbody > tr");
   assertThat(resultRows.attr("ng-repeat"), containsString("result in filteredResults"));

   Elements resultCells = resultRows.select(" > td");
   assertThat(resultCells.size(), equalTo(1));
   assertThat(resultCells.select("a").attr("href"), equalTo("#/" + "SampleEntities" + "/edit/{{result.age}}"));
   assertThat(resultCells.select("a").text(), equalTo("{{result.age}}"));
}
 
Example 19
@Test
public void testGenerateManyToManyProperty() throws Exception
{
   String manyToManyProperty = "users";
   Map<String, Object> root = createInspectionResultWrapper(ENTITY_NAME, MANY_TO_MANY_PROP);

   Resource<URL> templateResource = resourceFactory.create(getClass().getResource(
            Deployments.BASE_PACKAGE_PATH + Deployments.N_TO_MANY_PROPERTY_DETAIL_INCLUDE));
   Template processor = processorFactory.create(templateResource, FreemarkerTemplate.class);
   String output = processor.process(root);
   Document html = Jsoup.parseBodyFragment(output);
   assertThat(output.trim(), not(equalTo("")));

   Elements container = html.select("div.form-group");
   assertThat(container, notNullValue());
   assertThat(container.attr("ng-class"), not(equalTo("")));

   Elements nToManyWidgetElement = html.select("div.form-group > div.col-sm-10");
   assertThat(nToManyWidgetElement, notNullValue());

   Elements selectElement = nToManyWidgetElement.select(" > select");
   assertThat(selectElement.attr("id"), equalTo(manyToManyProperty));
   assertThat(selectElement.attr("multiple"), notNullValue());
   assertThat(selectElement.attr("ng-model"), equalTo(manyToManyProperty + "Selection"));
   String collectionElement = manyToManyProperty.substring(0, 1);
   String optionsExpression = collectionElement + ".text for " + collectionElement + " in " + manyToManyProperty
            + "SelectionList";
   assertThat(selectElement.attr("ng-options"), equalTo(optionsExpression));
}
 
Example 20
/**
 * 解析隐藏标签
 * @param html 富文本内容
 * @return 每种隐藏类型只取第一个值
 */
public Map<Integer,Object> analysisHiddenTag(String html){
	//隐藏标签输入值 key 隐藏标签类型 value:输入值
	Map<Integer,Object> inputValueMap = new HashMap<Integer,Object>();
	if(!StringUtils.isBlank(html)){
		Document doc = Jsoup.parseBodyFragment(html);
		Elements elements = doc.select("hide");  
		for (Element element : elements) {
			//隐藏标签类型
			String hide_type = element.attr("hide-type"); 
			//隐藏标签输入值
			String input_value = element.attr("input-value"); 
			
			if(hide_type != null && !"".equals(hide_type.trim())){
				if(hide_type.trim().equals(HideTagType.PASSWORD.getName().toString())){//输入密码可见
					if(input_value != null && !"".equals(input_value.trim())){
						if(inputValueMap.get(HideTagType.PASSWORD.getName()) == null){
							inputValueMap.put(HideTagType.PASSWORD.getName(), input_value.trim());
						}
					}
				}
				if(hide_type.trim().equals(HideTagType.COMMENT.getName().toString())){//评论话题可见
					if(inputValueMap.get(HideTagType.COMMENT.getName()) == null){
						inputValueMap.put(HideTagType.COMMENT.getName(), true);
					}
				}
				if(hide_type.trim().equals(HideTagType.GRADE.getName().toString())){//达到等级可见
					if(input_value != null && !"".equals(input_value.trim())){
						if(inputValueMap.get(HideTagType.GRADE.getName()) == null){
							inputValueMap.put(HideTagType.GRADE.getName(), Long.parseLong(input_value.trim()));
						}
					}
				}
				if(hide_type.trim().equals(HideTagType.POINT.getName().toString())){//积分购买可见
					if(input_value != null && !"".equals(input_value.trim())){
						if(inputValueMap.get(HideTagType.POINT.getName()) == null){
							inputValueMap.put(HideTagType.POINT.getName(), Long.parseLong(input_value.trim()));
						}
					}
				}
				if(hide_type.trim().equals(HideTagType.AMOUNT.getName().toString())){//余额购买可见
					if(input_value != null && !"".equals(input_value.trim())){
						if(inputValueMap.get(HideTagType.AMOUNT.getName()) == null){
							inputValueMap.put(HideTagType.AMOUNT.getName(), new BigDecimal(input_value.trim()));
						}
					}
				}
			}
		}
	}
	return inputValueMap;
}