org.jsoup.parser.Tag Java Examples

The following examples show how to use org.jsoup.parser.Tag. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: cetty   Author: heyingcai   File: CifnewsPageHandler.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    String blockquote = tempBody.select("div.fetch-read>div.summary").text();
    buildBlockquote(blockquote, articleBody);
    Elements inner = tempBody.select("div.article-inner>*");
    for (Element pEl : inner) {
        if (pEl.select("div.fetch-present").size() != 0) {
            continue;
        }
        Element imgEl = pEl.select("p>img").first();
        if (imgEl != null) {
            Element figure = buildFigure(imgEl);
            if (imgEl.nextElementSibling() != null && imgEl.nextElementSibling().tagName().equals("p")) {
                Element figcaption = buildFigcaption(imgEl.nextElementSibling().text());
                figure.appendChild(figcaption);
                articleBody.appendChild(figure);
                continue;
            }
            articleBody.appendChild(figure);
            continue;
        }
        articleBody.appendChild(pEl);
    }
    return articleBody;
}
 
Example #2
Source Project: cetty   Author: heyingcai   File: Waimaob2cPageHandler.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    for (Element body : tempBody) {
        if (body.tagName().equals("p")) {
            boolean skipRegister = body.select("p").text().contains("即刻注册SHOPIFY账户, 跟着我们精心准备的SHOPIFY教程开始外贸独立站之旅!");
            boolean skipCopyRight = body.classNames().contains("post-copyright");
            if (skipRegister || skipCopyRight) {
                continue;
            }
        }
        Element imgEl = body.select("img").first();
        if (imgEl != null) {
            articleBody.appendChild(buildFigure(imgEl));
            continue;
        }
        articleBody.appendChild(body);
    }
    return articleBody;
}
 
Example #3
Source Project: cetty   Author: heyingcai   File: GuxiaobeiPageHandler.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    for (final Element pEl : tempBody) {
        if (pEl.select("div.open-message,div.jp-relatedposts,div.article-social").size() != 0) {
            continue;
        }
        if (pEl.tagName().equals("p")) {
            Element imgEl = pEl.select("img").first();
            if (imgEl != null) {
                String src = imgEl.attr("src");
                if (src.contains("data:image")) {
                    src = imgEl.attr("data-src");
                } else if (!src.contains("www.guxiaobei.com")) {
                    src = "http://www.guxiaobei.com" + src;
                }
                imgEl.attr("src", src);

                articleBody.appendChild(buildFigure(imgEl));
                continue;
            }
        }
        articleBody.appendChild(pEl);
    }
    return articleBody;
}
 
Example #4
Source Project: Asqatasun   Author: Asqatasun   File: SimpleTextElementBuilderTest.java    License: GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Test of buildTextFromElement method, of class SimpleTextElementBuilder.
 */
public void testBuildTextFromElementWithChildren() {
    LOGGER.debug("buildTextFromElementWithChildren");
    Element element = new Element(Tag.valueOf("div"), "");
    element.appendText("   text1   ");
    
    Element childElement = new Element(Tag.valueOf("div"), "");
    childElement.text("   child element text   ");

    Element childElement2 = new Element(Tag.valueOf("div"), "");
    childElement2.text("   child element text second level  ");
    childElement.appendChild(childElement2);
    
    element.appendChild(childElement);
    element.appendText("   text2   ");

    SimpleTextElementBuilder instance = new SimpleTextElementBuilder();
    String expResult = "text1 child element text child element text second level text2";
    String result = instance.buildTextFromElement(element);
    assertEquals(expResult, result);
}
 
Example #5
Source Project: baleen   Author: dstl   File: SemanticHtmlTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMain() throws UIMAException {
  JCas jCas = JCasSingleton.getJCasInstance();
  SemanticHtml sa = new SemanticHtml();

  Map<String, Class<?>> expectedMain = new HashMap<>();
  expectedMain.put("time", Temporal.class);
  expectedMain.put("meter", Quantity.class);
  expectedMain.put("dfn", Buzzword.class);
  expectedMain.put("address", Location.class);
  expectedMain.put("abbr", Buzzword.class);
  expectedMain.put("cite", DocumentReference.class);

  for (Map.Entry<String, Class<?>> e : expectedMain.entrySet()) {
    Element element = new Element(Tag.valueOf(e.getKey()), "");

    AnnotationCollector collector = new AnnotationCollector();
    sa.map(jCas, element, collector);

    if (e.getValue() != null) {
      assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0)));
    } else {
      assertNull(collector.getAnnotations());
    }
  }
}
 
Example #6
Source Project: astor   Author: SpoonLabs   File: NodeTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseUri() {
    Tag tag = Tag.valueOf("a");
    Attributes attribs = new Attributes();
    attribs.put("relHref", "/foo");
    attribs.put("absHref", "http://bar/qux");

    Element noBase = new Element(tag, "", attribs);
    assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is
    assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib

    Element withBase = new Element(tag, "http://foo/", attribs);
    assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel
    assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that
    assertEquals("", withBase.absUrl("noval"));

    Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs);
    assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that
    assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing 
}
 
Example #7
Source Project: baleen   Author: dstl   File: StructuralAnnotationsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLink() throws UIMAException {
  final JCas jCas = JCasSingleton.getJCasInstance();
  final StructuralAnnotations sa = new StructuralAnnotations();

  final Element a1 = new Element(Tag.valueOf("a"), "");
  a1.attr("href", "http://example.com");
  final Element a2 = new Element(Tag.valueOf("a"), "");
  a2.attr("href", "/example.com");

  final AnnotationCollector collector = new AnnotationCollector();
  sa.map(jCas, a1, collector);
  sa.map(jCas, a2, collector);

  Annotation link = collector.getAnnotations().get(0);
  assertTrue(link instanceof Link);
  assertEquals("http://example.com", ((Link) link).getTarget());
  Annotation link2 = collector.getAnnotations().get(1);
  assertTrue(link2 instanceof Link);
  assertEquals("/example.com", ((Link) link2).getTarget());
}
 
Example #8
Source Project: baleen   Author: dstl   File: StructuralAnnotationsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testArticle() throws UIMAException {
  final JCas jCas = JCasSingleton.getJCasInstance();
  final StructuralAnnotations sa = new StructuralAnnotations();

  final Map<String, Class<?>> expectedArticle = new HashMap<>();
  expectedArticle.put("Sheet", Sheet.class);
  expectedArticle.put("Slide", Slide.class);
  expectedArticle.put("Page", Page.class);
  expectedArticle.put("Another", Page.class);

  for (final Map.Entry<String, Class<?>> e : expectedArticle.entrySet()) {
    final Element anchor = new Element(Tag.valueOf("article"), "");
    anchor.attr("class", e.getKey());

    final AnnotationCollector collector = new AnnotationCollector();
    sa.map(jCas, anchor, collector);

    if (e.getValue() != null) {
      assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0)));
    } else {
      assertNull(collector.getAnnotations());
    }
  }
}
 
Example #9
Source Project: astor   Author: SpoonLabs   File: Cleaner.java    License: GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example #10
Source Project: astor   Author: SpoonLabs   File: Cleaner.java    License: GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example #11
Source Project: astor   Author: SpoonLabs   File: NodeTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseUri() {
    Tag tag = Tag.valueOf("a");
    Attributes attribs = new Attributes();
    attribs.put("relHref", "/foo");
    attribs.put("absHref", "http://bar/qux");

    Element noBase = new Element(tag, "", attribs);
    assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is
    assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib

    Element withBase = new Element(tag, "http://foo/", attribs);
    assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel
    assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that
    assertEquals("", withBase.absUrl("noval"));

    Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs);
    assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that
    assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing 
}
 
Example #12
Source Project: astor   Author: SpoonLabs   File: NodeTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseUri() {
    Tag tag = Tag.valueOf("a");
    Attributes attribs = new Attributes();
    attribs.put("relHref", "/foo");
    attribs.put("absHref", "http://bar/qux");

    Element noBase = new Element(tag, "", attribs);
    assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is
    assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib

    Element withBase = new Element(tag, "http://foo/", attribs);
    assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel
    assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that
    assertEquals("", withBase.absUrl("noval"));

    Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs);
    assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that
    assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing 
}
 
Example #13
Source Project: astor   Author: SpoonLabs   File: NodeTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void before() {
    Document doc = Jsoup.parse("<p>One <b>two</b> three</p>");
    Element newNode = new Element(Tag.valueOf("em"), "");
    newNode.appendText("four");

    doc.select("b").first().before(newNode);
    assertEquals("<p>One <em>four</em><b>two</b> three</p>", doc.body().html());

    doc.select("b").first().before("<i>five</i>");
    assertEquals("<p>One <em>four</em><i>five</i><b>two</b> three</p>", doc.body().html());
}
 
Example #14
Source Project: spring-boot   Author: h819   File: MyHtmlUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 按原Element重建一个新的Element
 * @param sourceEl
 * @return
 */
private static Element createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        destAttrs.put(sourceAttr);
    }
    return dest;
}
 
Example #15
Source Project: Asqatasun   Author: Asqatasun   File: TextBelongsToBlackListCheckerTest.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
protected void setUp() throws Exception {
    super.setUp();
    mockTextElementBuilder = createMock(TextElementBuilder.class);
    mockSSPHandler = createMock(SSPHandler.class);
    elements = new Elements();
    element = new Element(Tag.valueOf("div"), "");
    mockTestSolutionHandler = createMock(TestSolutionHandler.class);
    mockNomenclature = createMock(Nomenclature.class);
    mockNomenclatureLoaderService = createMock(NomenclatureLoaderService.class);
    expect(mockNomenclatureLoaderService.loadByCode(BLACKLIST_NOM_NAME)).
            andReturn(mockNomenclature).once();
    mockProcessRemarkService = createMock(ProcessRemarkService.class);
}
 
Example #16
Source Project: Asqatasun   Author: Asqatasun   File: TextEmptinessCheckerTest.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
protected void setUp() throws Exception {
    super.setUp();
    mockTextElementBuilder = createMock(TextElementBuilder.class);
    mockSSPHandler = createMock(SSPHandler.class);
    elements = new Elements();
    element = new Element(Tag.valueOf("div"), "");
    mockTestSolutionHandler = createMock(TestSolutionHandler.class);
    mockProcessRemarkService = createMock(ProcessRemarkService.class);
}
 
Example #17
@Override
protected void setUp() throws Exception {
    super.setUp();
    mockTextElementBuilder = createMock(TextElementBuilder.class);
    mockSSPHandler = createMock(SSPHandler.class);
    elements = new Elements();
    element = new Element(Tag.valueOf("div"), "");
    element.attr(AttributeStore.ALT_ATTR, "test");
    mockTestSolutionHandler = createMock(TestSolutionHandler.class);
    mockProcessRemarkService = createMock(ProcessRemarkService.class);
}
 
Example #18
@Override
protected void setUp() throws Exception {
    super.setUp();
    mockTextElementBuilder = createMock(TextElementBuilder.class);
    mockSSPHandler = createMock(SSPHandler.class);
    elements = new Elements();
    element = new Element(Tag.valueOf("div"), "");
    mockTestSolutionHandler = createMock(TestSolutionHandler.class);
    mockProcessRemarkService = createMock(ProcessRemarkService.class);
}
 
Example #19
Source Project: astor   Author: SpoonLabs   File: ElementTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testHashcodeIsStableWithContentChanges() {
    Element root = new Element(Tag.valueOf("root"), "");

    HashSet<Element> set = new HashSet<Element>();
    // Add root node:
    set.add(root);

    root.appendChild(new Element(Tag.valueOf("a"), ""));
    assertTrue(set.contains(root));
}
 
Example #20
Source Project: Asqatasun   Author: Asqatasun   File: SimpleTextElementBuilderTest.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Test of buildTextFromElement method, of class SimpleTextElementBuilder.
 */
public void testBuildTextFromElement() {
    LOGGER.debug("buildTextFromElement");
    Element element = new Element(Tag.valueOf("div"), "");
    element.text("test");
    SimpleTextElementBuilder instance = new SimpleTextElementBuilder();
    String expResult = "test";
    String result = instance.buildTextFromElement(element);
    assertEquals(expResult, result);
}
 
Example #21
Source Project: astor   Author: SpoonLabs   File: ElementTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testHashcodeIsStableWithContentChanges() {
    Element root = new Element(Tag.valueOf("root"), "");

    HashSet<Element> set = new HashSet<Element>();
    // Add root node:
    set.add(root);

    root.appendChild(new Element(Tag.valueOf("a"), ""));
    assertTrue(set.contains(root));
}
 
Example #22
Source Project: Asqatasun   Author: Asqatasun   File: TextAttributeOfElementBuilderTest.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
     * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder.
     */
    public void testBuildTextFromElementWithTargettedAttributeNotSet() {
        LOGGER.debug("buildTextFromElementWithTargettedAttributeNotSet");
        Element element = new Element(Tag.valueOf("div"), "");
        element.attr(AttributeStore.ALT_ATTR, "test");
        TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder();
        String result = instance.buildTextFromElement(element);
        assertNull(result);
//        assertNull(instance.getAttributeName());
    }
 
Example #23
Source Project: Asqatasun   Author: Asqatasun   File: TextAttributeOfElementBuilderTest.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
     * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder.
     */
    public void testBuildTextFromElementWithAttribute() {
        LOGGER.debug("buildTextFromElementWithAttribute");
        Element element = new Element(Tag.valueOf("div"), "");
        element.attr(AttributeStore.ALT_ATTR, "test");
        TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder(AttributeStore.ALT_ATTR);
        String result = instance.buildTextFromElement(element);
        assertEquals("test", result);
//        assertEquals(AttributeStore.ALT_ATTR, instance.getAttributeName());
    }
 
Example #24
Source Project: Asqatasun   Author: Asqatasun   File: TextAttributeOfElementBuilderTest.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
     * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder.
     */
    public void testBuildTextFromElementWithAttributeMissing() {
        LOGGER.debug("buildTextFromElementWithAttributeMissing");
        Element element = new Element(Tag.valueOf("div"), "");
        TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder(AttributeStore.ALT_ATTR);
        String result = instance.buildTextFromElement(element);
        assertNull(result);
//        assertEquals(AttributeStore.ALT_ATTR, instance.getAttributeName());
    }
 
Example #25
Source Project: baleen   Author: dstl   File: MarkupUtilsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAdditionallyAnnotateAsType() {
  Element e = new Element(Tag.valueOf("p"), "");
  MarkupUtils.additionallyAnnotateAsType(e, "testtype");

  assertEquals(MarkupUtils.getAttribute(e, "types"), "testtype");

  assertTrue(MarkupUtils.getTypes(e).contains("testtype"));
}
 
Example #26
Source Project: baleen   Author: dstl   File: MarkupUtilsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSetAttribute() {
  Element e = new Element(Tag.valueOf("p"), "");
  MarkupUtils.setAttribute(e, "key", "value");
  assert (MarkupUtils.getAttribute(e, "key").equals("value"));

  MarkupUtils.setAttribute(e, "key", "value2");
  assert (MarkupUtils.getAttribute(e, "key").equals("value2"));
}
 
Example #27
Source Project: baleen   Author: dstl   File: MarkupUtilsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAddAttribute() {
  Element e = new Element(Tag.valueOf("p"), "");
  MarkupUtils.addAttribute(e, "key", "value1");

  assert (MarkupUtils.getAttribute(e, "key").contains("value1"));

  MarkupUtils.addAttribute(e, "key", "value2");

  assert (MarkupUtils.getAttribute(e, "key").contains("value1"));
  assert (MarkupUtils.getAttribute(e, "key").contains("value2"));

  assert (MarkupUtils.getAttributes(e, "key").contains("value1"));
  assert (MarkupUtils.getAttributes(e, "key").contains("value2"));
}
 
Example #28
Source Project: baleen   Author: dstl   File: DataAttributeMapperTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testElementWithoutTypes() {
  Element e = new Element(Tag.valueOf("p"), "");
  mapper.map(jCas, e, collector);

  assertNull(collector.getAnnotations());
}
 
Example #29
Source Project: baleen   Author: dstl   File: DataAttributeMapperTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testElementWithTypeNoAttributes() {
  Element e = new Element(Tag.valueOf("p"), "");
  MarkupUtils.additionallyAnnotateAsType(
      e, "uk.gov.dstl.baleen.types.metadata.ProtectiveMarking");
  mapper.map(jCas, e, collector);

  assertEquals(1, collector.getAnnotations().size());
  ProtectiveMarking annotation = (ProtectiveMarking) collector.getAnnotations().get(0);
  assertEquals(annotation.getClassification(), null);
}
 
Example #30
Source Project: baleen   Author: dstl   File: StructuralAnnotationsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMap() throws UIMAException {
  final JCas jCas = JCasSingleton.getJCasInstance();

  final StructuralAnnotations sa = new StructuralAnnotations();

  for (final Map.Entry<Tag, Class<?>[]> e : expected.entrySet()) {
    final Element element = new Element(e.getKey(), "");
    final AnnotationCollector collector = new AnnotationCollector();
    sa.map(jCas, element, collector);

    final List<Annotation> annotations = collector.getAnnotations();

    final Class<?>[] classes = e.getValue();
    if (classes == null || classes.length == 0) {
      if (annotations != null) {
        assertTrue(annotations.isEmpty());
      }
    } else {
      assertEquals(annotations.size(), classes.length);
      for (int i = 0; i < classes.length; i++) {
        final Class<?> c = classes[i];
        assertTrue(c.isInstance(annotations.get(i)));
      }
    }
  }
}