org.jsoup.parser.Tag Java Examples

The following examples show how to use org.jsoup.parser.Tag. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NodeTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseUri() {
    Tag tag = Tag.valueOf("a");
    Attributes attribs = new Attributes();
    attribs.put("relHref", "/foo");
    attribs.put("absHref", "http://bar/qux");

    Element noBase = new Element(tag, "", attribs);
    assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is
    assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib

    Element withBase = new Element(tag, "http://foo/", attribs);
    assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel
    assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that
    assertEquals("", withBase.absUrl("noval"));

    Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs);
    assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that
    assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing 
}
 
Example #2
Source File: GuxiaobeiPageHandler.java    From cetty with Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    for (final Element pEl : tempBody) {
        if (pEl.select("div.open-message,div.jp-relatedposts,div.article-social").size() != 0) {
            continue;
        }
        if (pEl.tagName().equals("p")) {
            Element imgEl = pEl.select("img").first();
            if (imgEl != null) {
                String src = imgEl.attr("src");
                if (src.contains("data:image")) {
                    src = imgEl.attr("data-src");
                } else if (!src.contains("www.guxiaobei.com")) {
                    src = "http://www.guxiaobei.com" + src;
                }
                imgEl.attr("src", src);

                articleBody.appendChild(buildFigure(imgEl));
                continue;
            }
        }
        articleBody.appendChild(pEl);
    }
    return articleBody;
}
 
Example #3
Source File: Waimaob2cPageHandler.java    From cetty with Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    for (Element body : tempBody) {
        if (body.tagName().equals("p")) {
            boolean skipRegister = body.select("p").text().contains("即刻注册SHOPIFY账户, 跟着我们精心准备的SHOPIFY教程开始外贸独立站之旅!");
            boolean skipCopyRight = body.classNames().contains("post-copyright");
            if (skipRegister || skipCopyRight) {
                continue;
            }
        }
        Element imgEl = body.select("img").first();
        if (imgEl != null) {
            articleBody.appendChild(buildFigure(imgEl));
            continue;
        }
        articleBody.appendChild(body);
    }
    return articleBody;
}
 
Example #4
Source File: CifnewsPageHandler.java    From cetty with Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    String blockquote = tempBody.select("div.fetch-read>div.summary").text();
    buildBlockquote(blockquote, articleBody);
    Elements inner = tempBody.select("div.article-inner>*");
    for (Element pEl : inner) {
        if (pEl.select("div.fetch-present").size() != 0) {
            continue;
        }
        Element imgEl = pEl.select("p>img").first();
        if (imgEl != null) {
            Element figure = buildFigure(imgEl);
            if (imgEl.nextElementSibling() != null && imgEl.nextElementSibling().tagName().equals("p")) {
                Element figcaption = buildFigcaption(imgEl.nextElementSibling().text());
                figure.appendChild(figcaption);
                articleBody.appendChild(figure);
                continue;
            }
            articleBody.appendChild(figure);
            continue;
        }
        articleBody.appendChild(pEl);
    }
    return articleBody;
}
 
Example #5
Source File: NodeTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseUri() {
    Tag tag = Tag.valueOf("a");
    Attributes attribs = new Attributes();
    attribs.put("relHref", "/foo");
    attribs.put("absHref", "http://bar/qux");

    Element noBase = new Element(tag, "", attribs);
    assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is
    assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib

    Element withBase = new Element(tag, "http://foo/", attribs);
    assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel
    assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that
    assertEquals("", withBase.absUrl("noval"));

    Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs);
    assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that
    assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing 
}
 
Example #6
Source File: NodeTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseUri() {
    Tag tag = Tag.valueOf("a");
    Attributes attribs = new Attributes();
    attribs.put("relHref", "/foo");
    attribs.put("absHref", "http://bar/qux");

    Element noBase = new Element(tag, "", attribs);
    assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is
    assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib

    Element withBase = new Element(tag, "http://foo/", attribs);
    assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel
    assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that
    assertEquals("", withBase.absUrl("noval"));

    Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs);
    assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that
    assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing 
}
 
Example #7
Source File: SimpleTextElementBuilderTest.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Test of buildTextFromElement method, of class SimpleTextElementBuilder.
 */
public void testBuildTextFromElementWithChildren() {
    LOGGER.debug("buildTextFromElementWithChildren");
    Element element = new Element(Tag.valueOf("div"), "");
    element.appendText("   text1   ");
    
    Element childElement = new Element(Tag.valueOf("div"), "");
    childElement.text("   child element text   ");

    Element childElement2 = new Element(Tag.valueOf("div"), "");
    childElement2.text("   child element text second level  ");
    childElement.appendChild(childElement2);
    
    element.appendChild(childElement);
    element.appendText("   text2   ");

    SimpleTextElementBuilder instance = new SimpleTextElementBuilder();
    String expResult = "text1 child element text child element text second level text2";
    String result = instance.buildTextFromElement(element);
    assertEquals(expResult, result);
}
 
Example #8
Source File: Cleaner.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example #9
Source File: Cleaner.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example #10
Source File: StructuralAnnotationsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testArticle() throws UIMAException {
  final JCas jCas = JCasSingleton.getJCasInstance();
  final StructuralAnnotations sa = new StructuralAnnotations();

  final Map<String, Class<?>> expectedArticle = new HashMap<>();
  expectedArticle.put("Sheet", Sheet.class);
  expectedArticle.put("Slide", Slide.class);
  expectedArticle.put("Page", Page.class);
  expectedArticle.put("Another", Page.class);

  for (final Map.Entry<String, Class<?>> e : expectedArticle.entrySet()) {
    final Element anchor = new Element(Tag.valueOf("article"), "");
    anchor.attr("class", e.getKey());

    final AnnotationCollector collector = new AnnotationCollector();
    sa.map(jCas, anchor, collector);

    if (e.getValue() != null) {
      assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0)));
    } else {
      assertNull(collector.getAnnotations());
    }
  }
}
 
Example #11
Source File: SemanticHtmlTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testMain() throws UIMAException {
  JCas jCas = JCasSingleton.getJCasInstance();
  SemanticHtml sa = new SemanticHtml();

  Map<String, Class<?>> expectedMain = new HashMap<>();
  expectedMain.put("time", Temporal.class);
  expectedMain.put("meter", Quantity.class);
  expectedMain.put("dfn", Buzzword.class);
  expectedMain.put("address", Location.class);
  expectedMain.put("abbr", Buzzword.class);
  expectedMain.put("cite", DocumentReference.class);

  for (Map.Entry<String, Class<?>> e : expectedMain.entrySet()) {
    Element element = new Element(Tag.valueOf(e.getKey()), "");

    AnnotationCollector collector = new AnnotationCollector();
    sa.map(jCas, element, collector);

    if (e.getValue() != null) {
      assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0)));
    } else {
      assertNull(collector.getAnnotations());
    }
  }
}
 
Example #12
Source File: StructuralAnnotationsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testLink() throws UIMAException {
  final JCas jCas = JCasSingleton.getJCasInstance();
  final StructuralAnnotations sa = new StructuralAnnotations();

  final Element a1 = new Element(Tag.valueOf("a"), "");
  a1.attr("href", "http://example.com");
  final Element a2 = new Element(Tag.valueOf("a"), "");
  a2.attr("href", "/example.com");

  final AnnotationCollector collector = new AnnotationCollector();
  sa.map(jCas, a1, collector);
  sa.map(jCas, a2, collector);

  Annotation link = collector.getAnnotations().get(0);
  assertTrue(link instanceof Link);
  assertEquals("http://example.com", ((Link) link).getTarget());
  Annotation link2 = collector.getAnnotations().get(1);
  assertTrue(link2 instanceof Link);
  assertEquals("/example.com", ((Link) link2).getTarget());
}
 
Example #13
Source File: BootstrapHandler.java    From flow with Apache License 2.0 5 votes vote down vote up
private Element createStylesheetElement(String url) {
    final Element cssElement;
    if (url != null) {
        cssElement = new Element(Tag.valueOf("link"), "")
                .attr("rel", "stylesheet")
                .attr("type", CSS_TYPE_ATTRIBUTE_VALUE)
                .attr("href", url);
    } else {
        cssElement = new Element(Tag.valueOf("style"), "").attr("type",
                CSS_TYPE_ATTRIBUTE_VALUE);
    }
    return cssElement;
}
 
Example #14
Source File: ElementTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test 
   public void whiteSpaceClassElement(){
    Tag tag = Tag.valueOf("a");
    Attributes attribs = new Attributes();
    Element el = new Element(tag, "", attribs);
    
    attribs.put("class", "abc ");
    boolean hasClass = el.hasClass("ab");
    assertFalse(hasClass);
        
}
 
Example #15
Source File: CssTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void root() {
	Elements sel = html.select(":root");
	assertEquals(1, sel.size());
	assertNotNull(sel.get(0));
	assertEquals(Tag.valueOf("html"), sel.get(0).tag());

	Elements sel2 = html.select("body").select(":root");
	assertEquals(1, sel2.size());
	assertNotNull(sel2.get(0));
	assertEquals(Tag.valueOf("body"), sel2.get(0).tag());
}
 
Example #16
Source File: ElementTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void insertChildrenAtPosition() {
    Document doc = Jsoup.parse("<div id=1>Text1 <p>One</p> Text2 <p>Two</p></div><div id=2>Text3 <p>Three</p></div>");
    Element div1 = doc.select("div").get(0);
    Elements p1s = div1.select("p");
    Element div2 = doc.select("div").get(1);

    assertEquals(2, div2.childNodeSize());
    div2.insertChildren(-1, p1s);
    assertEquals(2, div1.childNodeSize()); // moved two out
    assertEquals(4, div2.childNodeSize());
    assertEquals(3, p1s.get(1).siblingIndex()); // should be last

    List<Node> els = new ArrayList<Node>();
    Element el1 = new Element(Tag.valueOf("span"), "").text("Span1");
    Element el2 = new Element(Tag.valueOf("span"), "").text("Span2");
    TextNode tn1 = new TextNode("Text4", "");
    els.add(el1);
    els.add(el2);
    els.add(tn1);

    assertNull(el1.parent());
    div2.insertChildren(-2, els);
    assertEquals(div2, el1.parent());
    assertEquals(7, div2.childNodeSize());
    assertEquals(3, el1.siblingIndex());
    assertEquals(4, el2.siblingIndex());
    assertEquals(5, tn1.siblingIndex());
}
 
Example #17
Source File: NodeTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void after() {
    Document doc = Jsoup.parse("<p>One <b>two</b> three</p>");
    Element newNode = new Element(Tag.valueOf("em"), "");
    newNode.appendText("four");

    doc.select("b").first().after(newNode);
    assertEquals("<p>One <b>two</b><em>four</em> three</p>", doc.body().html());

    doc.select("b").first().after("<i>five</i>");
    assertEquals("<p>One <b>two</b><i>five</i><em>four</em> three</p>", doc.body().html());
}
 
Example #18
Source File: NodeTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void after() {
    Document doc = Jsoup.parse("<p>One <b>two</b> three</p>");
    Element newNode = new Element(Tag.valueOf("em"), "");
    newNode.appendText("four");

    doc.select("b").first().after(newNode);
    assertEquals("<p>One <b>two</b><em>four</em> three</p>", doc.body().html());

    doc.select("b").first().after("<i>five</i>");
    assertEquals("<p>One <b>two</b><i>five</i><em>four</em> three</p>", doc.body().html());
}
 
Example #19
Source File: JsoupCssInliner.java    From ogham with Apache License 2.0 5 votes vote down vote up
/**
 * Replace link tags with style tags in order to keep the same inclusion
 * order
 *
 * @param doc
 *            the html document
 * @param cssContents
 *            the list of external css files with their content
 */
private static void internStyles(Document doc, List<ExternalCss> cssContents) {
	Elements els = doc.select(CSS_LINKS_SELECTOR);
	for (Element e : els) {
		if (isInlineModeAllowed(e, InlineModes.STYLE_ATTR)) {
			String path = e.attr(HREF_ATTR);
			ExternalCss css = getCss(cssContents, path);
			if (css != null) {
				Element style = new Element(Tag.valueOf(STYLE_TAG), "");
				style.appendChild(new DataNode(getCssContent(css)));
				e.replaceWith(style);
			}
		}
	}
}
 
Example #20
Source File: Element.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Finds elements, including and recursively under this element, with the specified tag name.
 * @param tagName The tag name to search for (case insensitively).
 * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
 */
public Elements getElementsByTag(String tagName) {
    Validate.notEmpty(tagName);
    tagName = tagName.toLowerCase().trim();

    return Collector.collect(new Evaluator.Tag(tagName), this);
}
 
Example #21
Source File: AppShellSettings.java    From flow with Apache License 2.0 5 votes vote down vote up
private static Element createElement(String tag, String content,
        String... attrs) {
    Element elm = new Element(Tag.valueOf(tag), "");
    if (content != null && !content.isEmpty()) {
        elm.appendChild(new DataNode(content));
    }
    for (int i = 0; i < attrs.length - 1; i += 2) {
        elm.attr(attrs[i], attrs[i + 1]);
    }
    return elm;
}
 
Example #22
Source File: GithubDownLoadTests.java    From java_in_examples with Apache License 2.0 5 votes vote down vote up
private static List<LinkContainer> work(Elements elements) {
    List<LinkContainer> result = new ArrayList<>(elements.size());
    String currentCategory = null;
    for(Element element: elements) {
        Tag tag = element.tag();
        if(isHeader(tag)) {
            currentCategory = element.text();
            System.out.println(currentCategory);
        }
        work(element.children());
    }
    return result;
}
 
Example #23
Source File: NodeTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void orphanNodeReturnsNullForSiblingElements() {
    Node node = new Element(Tag.valueOf("p"), "");
    Element el = new Element(Tag.valueOf("p"), "");

    assertEquals(0, node.siblingIndex());
    assertEquals(0, node.siblingNodes().size());

    assertNull(node.previousSibling());
    assertNull(node.nextSibling());

    assertEquals(0, el.siblingElements().size());
    assertNull(el.previousElementSibling());
    assertNull(el.nextElementSibling());
}
 
Example #24
Source File: NodeTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void orphanNodeReturnsNullForSiblingElements() {
    Node node = new Element(Tag.valueOf("p"), "");
    Element el = new Element(Tag.valueOf("p"), "");

    assertEquals(0, node.siblingIndex());
    assertEquals(0, node.siblingNodes().size());

    assertNull(node.previousSibling());
    assertNull(node.nextSibling());

    assertEquals(0, el.siblingElements().size());
    assertNull(el.previousElementSibling());
    assertNull(el.nextElementSibling());
}
 
Example #25
Source File: NodeTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void before() {
    Document doc = Jsoup.parse("<p>One <b>two</b> three</p>");
    Element newNode = new Element(Tag.valueOf("em"), "");
    newNode.appendText("four");

    doc.select("b").first().before(newNode);
    assertEquals("<p>One <em>four</em><b>two</b> three</p>", doc.body().html());

    doc.select("b").first().before("<i>five</i>");
    assertEquals("<p>One <em>four</em><i>five</i><b>two</b> three</p>", doc.body().html());
}
 
Example #26
Source File: Element.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Finds elements, including and recursively under this element, with the specified tag name.
 * @param tagName The tag name to search for (case insensitively).
 * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
 */
public Elements getElementsByTag(String tagName) {
    Validate.notEmpty(tagName);
    tagName = normalize(tagName);

    return Collector.collect(new Evaluator.Tag(tagName), this);
}
 
Example #27
Source File: Element.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Finds elements, including and recursively under this element, with the specified tag name.
 * @param tagName The tag name to search for (case insensitively).
 * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
 */
public Elements getElementsByTag(String tagName) {
    Validate.notEmpty(tagName);
    tagName = normalize(tagName);

    return Collector.collect(new Evaluator.Tag(tagName), this);
}
 
Example #28
Source File: CssTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void root() {
	Elements sel = html.select(":root");
	assertEquals(1, sel.size());
	assertNotNull(sel.get(0));
	assertEquals(Tag.valueOf("html"), sel.get(0).tag());

	Elements sel2 = html.select("body").select(":root");
	assertEquals(1, sel2.size());
	assertNotNull(sel2.get(0));
	assertEquals(Tag.valueOf("body"), sel2.get(0).tag());
}
 
Example #29
Source File: NodeTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void before() {
    Document doc = Jsoup.parse("<p>One <b>two</b> three</p>");
    Element newNode = new Element(Tag.valueOf("em"), "");
    newNode.appendText("four");

    doc.select("b").first().before(newNode);
    assertEquals("<p>One <em>four</em><b>two</b> three</p>", doc.body().html());

    doc.select("b").first().before("<i>five</i>");
    assertEquals("<p>One <em>four</em><i>five</i><b>two</b> three</p>", doc.body().html());
}
 
Example #30
Source File: BootstrapHandler.java    From flow with Apache License 2.0 5 votes vote down vote up
private static Element createJavaScriptElement(String sourceUrl,
        boolean defer, String type) {
    Element jsElement = new Element(Tag.valueOf(SCRIPT_TAG), "")
            .attr("type", type).attr(DEFER_ATTRIBUTE, defer);
    if (sourceUrl != null) {
        jsElement = jsElement.attr("src", sourceUrl);
    }
    return jsElement;
}