Java Code Examples for org.jsoup.parser.Tag

The following examples show how to use org.jsoup.parser.Tag. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: cetty   Source File: CifnewsPageHandler.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    String blockquote = tempBody.select("div.fetch-read>div.summary").text();
    buildBlockquote(blockquote, articleBody);
    Elements inner = tempBody.select("div.article-inner>*");
    for (Element pEl : inner) {
        if (pEl.select("div.fetch-present").size() != 0) {
            continue;
        }
        Element imgEl = pEl.select("p>img").first();
        if (imgEl != null) {
            Element figure = buildFigure(imgEl);
            if (imgEl.nextElementSibling() != null && imgEl.nextElementSibling().tagName().equals("p")) {
                Element figcaption = buildFigcaption(imgEl.nextElementSibling().text());
                figure.appendChild(figcaption);
                articleBody.appendChild(figure);
                continue;
            }
            articleBody.appendChild(figure);
            continue;
        }
        articleBody.appendChild(pEl);
    }
    return articleBody;
}
 
Example 2
Source Project: cetty   Source File: Waimaob2cPageHandler.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    for (Element body : tempBody) {
        if (body.tagName().equals("p")) {
            boolean skipRegister = body.select("p").text().contains("即刻注册SHOPIFY账户, 跟着我们精心准备的SHOPIFY教程开始外贸独立站之旅!");
            boolean skipCopyRight = body.classNames().contains("post-copyright");
            if (skipRegister || skipCopyRight) {
                continue;
            }
        }
        Element imgEl = body.select("img").first();
        if (imgEl != null) {
            articleBody.appendChild(buildFigure(imgEl));
            continue;
        }
        articleBody.appendChild(body);
    }
    return articleBody;
}
 
Example 3
Source Project: cetty   Source File: GuxiaobeiPageHandler.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    for (final Element pEl : tempBody) {
        if (pEl.select("div.open-message,div.jp-relatedposts,div.article-social").size() != 0) {
            continue;
        }
        if (pEl.tagName().equals("p")) {
            Element imgEl = pEl.select("img").first();
            if (imgEl != null) {
                String src = imgEl.attr("src");
                if (src.contains("data:image")) {
                    src = imgEl.attr("data-src");
                } else if (!src.contains("www.guxiaobei.com")) {
                    src = "http://www.guxiaobei.com" + src;
                }
                imgEl.attr("src", src);

                articleBody.appendChild(buildFigure(imgEl));
                continue;
            }
        }
        articleBody.appendChild(pEl);
    }
    return articleBody;
}
 
Example 4
/**
 * Test of buildTextFromElement method, of class SimpleTextElementBuilder.
 */
public void testBuildTextFromElementWithChildren() {
    LOGGER.debug("buildTextFromElementWithChildren");
    Element element = new Element(Tag.valueOf("div"), "");
    element.appendText("   text1   ");
    
    Element childElement = new Element(Tag.valueOf("div"), "");
    childElement.text("   child element text   ");

    Element childElement2 = new Element(Tag.valueOf("div"), "");
    childElement2.text("   child element text second level  ");
    childElement.appendChild(childElement2);
    
    element.appendChild(childElement);
    element.appendText("   text2   ");

    SimpleTextElementBuilder instance = new SimpleTextElementBuilder();
    String expResult = "text1 child element text child element text second level text2";
    String result = instance.buildTextFromElement(element);
    assertEquals(expResult, result);
}
 
Example 5
Source Project: baleen   Source File: SemanticHtmlTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMain() throws UIMAException {
  JCas jCas = JCasSingleton.getJCasInstance();
  SemanticHtml sa = new SemanticHtml();

  Map<String, Class<?>> expectedMain = new HashMap<>();
  expectedMain.put("time", Temporal.class);
  expectedMain.put("meter", Quantity.class);
  expectedMain.put("dfn", Buzzword.class);
  expectedMain.put("address", Location.class);
  expectedMain.put("abbr", Buzzword.class);
  expectedMain.put("cite", DocumentReference.class);

  for (Map.Entry<String, Class<?>> e : expectedMain.entrySet()) {
    Element element = new Element(Tag.valueOf(e.getKey()), "");

    AnnotationCollector collector = new AnnotationCollector();
    sa.map(jCas, element, collector);

    if (e.getValue() != null) {
      assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0)));
    } else {
      assertNull(collector.getAnnotations());
    }
  }
}
 
Example 6
Source Project: astor   Source File: NodeTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseUri() {
    Tag tag = Tag.valueOf("a");
    Attributes attribs = new Attributes();
    attribs.put("relHref", "/foo");
    attribs.put("absHref", "http://bar/qux");

    Element noBase = new Element(tag, "", attribs);
    assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is
    assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib

    Element withBase = new Element(tag, "http://foo/", attribs);
    assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel
    assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that
    assertEquals("", withBase.absUrl("noval"));

    Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs);
    assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that
    assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing 
}
 
Example 7
Source Project: baleen   Source File: StructuralAnnotationsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLink() throws UIMAException {
  final JCas jCas = JCasSingleton.getJCasInstance();
  final StructuralAnnotations sa = new StructuralAnnotations();

  final Element a1 = new Element(Tag.valueOf("a"), "");
  a1.attr("href", "http://example.com");
  final Element a2 = new Element(Tag.valueOf("a"), "");
  a2.attr("href", "/example.com");

  final AnnotationCollector collector = new AnnotationCollector();
  sa.map(jCas, a1, collector);
  sa.map(jCas, a2, collector);

  Annotation link = collector.getAnnotations().get(0);
  assertTrue(link instanceof Link);
  assertEquals("http://example.com", ((Link) link).getTarget());
  Annotation link2 = collector.getAnnotations().get(1);
  assertTrue(link2 instanceof Link);
  assertEquals("/example.com", ((Link) link2).getTarget());
}
 
Example 8
Source Project: baleen   Source File: StructuralAnnotationsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testArticle() throws UIMAException {
  final JCas jCas = JCasSingleton.getJCasInstance();
  final StructuralAnnotations sa = new StructuralAnnotations();

  final Map<String, Class<?>> expectedArticle = new HashMap<>();
  expectedArticle.put("Sheet", Sheet.class);
  expectedArticle.put("Slide", Slide.class);
  expectedArticle.put("Page", Page.class);
  expectedArticle.put("Another", Page.class);

  for (final Map.Entry<String, Class<?>> e : expectedArticle.entrySet()) {
    final Element anchor = new Element(Tag.valueOf("article"), "");
    anchor.attr("class", e.getKey());

    final AnnotationCollector collector = new AnnotationCollector();
    sa.map(jCas, anchor, collector);

    if (e.getValue() != null) {
      assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0)));
    } else {
      assertNull(collector.getAnnotations());
    }
  }
}
 
Example 9
Source Project: astor   Source File: Cleaner.java    License: GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example 10
Source Project: astor   Source File: Cleaner.java    License: GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example 11
Source Project: astor   Source File: NodeTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseUri() {
    Tag tag = Tag.valueOf("a");
    Attributes attribs = new Attributes();
    attribs.put("relHref", "/foo");
    attribs.put("absHref", "http://bar/qux");

    Element noBase = new Element(tag, "", attribs);
    assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is
    assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib

    Element withBase = new Element(tag, "http://foo/", attribs);
    assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel
    assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that
    assertEquals("", withBase.absUrl("noval"));

    Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs);
    assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that
    assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing 
}
 
Example 12
Source Project: astor   Source File: NodeTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesBaseUri() {
    Tag tag = Tag.valueOf("a");
    Attributes attribs = new Attributes();
    attribs.put("relHref", "/foo");
    attribs.put("absHref", "http://bar/qux");

    Element noBase = new Element(tag, "", attribs);
    assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is
    assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib

    Element withBase = new Element(tag, "http://foo/", attribs);
    assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel
    assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that
    assertEquals("", withBase.absUrl("noval"));

    Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs);
    assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that
    assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing 
}
 
Example 13
Source Project: astor   Source File: NodeTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void before() {
    Document doc = Jsoup.parse("<p>One <b>two</b> three</p>");
    Element newNode = new Element(Tag.valueOf("em"), "");
    newNode.appendText("four");

    doc.select("b").first().before(newNode);
    assertEquals("<p>One <em>four</em><b>two</b> three</p>", doc.body().html());

    doc.select("b").first().before("<i>five</i>");
    assertEquals("<p>One <em>four</em><i>five</i><b>two</b> three</p>", doc.body().html());
}
 
Example 14
Source Project: spring-boot   Source File: MyHtmlUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 按原Element重建一个新的Element
 * @param sourceEl
 * @return
 */
private static Element createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        destAttrs.put(sourceAttr);
    }
    return dest;
}
 
Example 15
@Override
protected void setUp() throws Exception {
    super.setUp();
    mockTextElementBuilder = createMock(TextElementBuilder.class);
    mockSSPHandler = createMock(SSPHandler.class);
    elements = new Elements();
    element = new Element(Tag.valueOf("div"), "");
    mockTestSolutionHandler = createMock(TestSolutionHandler.class);
    mockNomenclature = createMock(Nomenclature.class);
    mockNomenclatureLoaderService = createMock(NomenclatureLoaderService.class);
    expect(mockNomenclatureLoaderService.loadByCode(BLACKLIST_NOM_NAME)).
            andReturn(mockNomenclature).once();
    mockProcessRemarkService = createMock(ProcessRemarkService.class);
}
 
Example 16
@Override
protected void setUp() throws Exception {
    super.setUp();
    mockTextElementBuilder = createMock(TextElementBuilder.class);
    mockSSPHandler = createMock(SSPHandler.class);
    elements = new Elements();
    element = new Element(Tag.valueOf("div"), "");
    mockTestSolutionHandler = createMock(TestSolutionHandler.class);
    mockProcessRemarkService = createMock(ProcessRemarkService.class);
}
 
Example 17
@Override
protected void setUp() throws Exception {
    super.setUp();
    mockTextElementBuilder = createMock(TextElementBuilder.class);
    mockSSPHandler = createMock(SSPHandler.class);
    elements = new Elements();
    element = new Element(Tag.valueOf("div"), "");
    element.attr(AttributeStore.ALT_ATTR, "test");
    mockTestSolutionHandler = createMock(TestSolutionHandler.class);
    mockProcessRemarkService = createMock(ProcessRemarkService.class);
}
 
Example 18
@Override
protected void setUp() throws Exception {
    super.setUp();
    mockTextElementBuilder = createMock(TextElementBuilder.class);
    mockSSPHandler = createMock(SSPHandler.class);
    elements = new Elements();
    element = new Element(Tag.valueOf("div"), "");
    mockTestSolutionHandler = createMock(TestSolutionHandler.class);
    mockProcessRemarkService = createMock(ProcessRemarkService.class);
}
 
Example 19
Source Project: astor   Source File: ElementTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testHashcodeIsStableWithContentChanges() {
    Element root = new Element(Tag.valueOf("root"), "");

    HashSet<Element> set = new HashSet<Element>();
    // Add root node:
    set.add(root);

    root.appendChild(new Element(Tag.valueOf("a"), ""));
    assertTrue(set.contains(root));
}
 
Example 20
/**
 * Test of buildTextFromElement method, of class SimpleTextElementBuilder.
 */
public void testBuildTextFromElement() {
    LOGGER.debug("buildTextFromElement");
    Element element = new Element(Tag.valueOf("div"), "");
    element.text("test");
    SimpleTextElementBuilder instance = new SimpleTextElementBuilder();
    String expResult = "test";
    String result = instance.buildTextFromElement(element);
    assertEquals(expResult, result);
}
 
Example 21
Source Project: astor   Source File: ElementTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testHashcodeIsStableWithContentChanges() {
    Element root = new Element(Tag.valueOf("root"), "");

    HashSet<Element> set = new HashSet<Element>();
    // Add root node:
    set.add(root);

    root.appendChild(new Element(Tag.valueOf("a"), ""));
    assertTrue(set.contains(root));
}
 
Example 22
/**
     * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder.
     */
    public void testBuildTextFromElementWithTargettedAttributeNotSet() {
        LOGGER.debug("buildTextFromElementWithTargettedAttributeNotSet");
        Element element = new Element(Tag.valueOf("div"), "");
        element.attr(AttributeStore.ALT_ATTR, "test");
        TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder();
        String result = instance.buildTextFromElement(element);
        assertNull(result);
//        assertNull(instance.getAttributeName());
    }
 
Example 23
/**
     * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder.
     */
    public void testBuildTextFromElementWithAttribute() {
        LOGGER.debug("buildTextFromElementWithAttribute");
        Element element = new Element(Tag.valueOf("div"), "");
        element.attr(AttributeStore.ALT_ATTR, "test");
        TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder(AttributeStore.ALT_ATTR);
        String result = instance.buildTextFromElement(element);
        assertEquals("test", result);
//        assertEquals(AttributeStore.ALT_ATTR, instance.getAttributeName());
    }
 
Example 24
/**
     * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder.
     */
    public void testBuildTextFromElementWithAttributeMissing() {
        LOGGER.debug("buildTextFromElementWithAttributeMissing");
        Element element = new Element(Tag.valueOf("div"), "");
        TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder(AttributeStore.ALT_ATTR);
        String result = instance.buildTextFromElement(element);
        assertNull(result);
//        assertEquals(AttributeStore.ALT_ATTR, instance.getAttributeName());
    }
 
Example 25
Source Project: baleen   Source File: MarkupUtilsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAdditionallyAnnotateAsType() {
  Element e = new Element(Tag.valueOf("p"), "");
  MarkupUtils.additionallyAnnotateAsType(e, "testtype");

  assertEquals(MarkupUtils.getAttribute(e, "types"), "testtype");

  assertTrue(MarkupUtils.getTypes(e).contains("testtype"));
}
 
Example 26
Source Project: baleen   Source File: MarkupUtilsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSetAttribute() {
  Element e = new Element(Tag.valueOf("p"), "");
  MarkupUtils.setAttribute(e, "key", "value");
  assert (MarkupUtils.getAttribute(e, "key").equals("value"));

  MarkupUtils.setAttribute(e, "key", "value2");
  assert (MarkupUtils.getAttribute(e, "key").equals("value2"));
}
 
Example 27
Source Project: baleen   Source File: MarkupUtilsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAddAttribute() {
  Element e = new Element(Tag.valueOf("p"), "");
  MarkupUtils.addAttribute(e, "key", "value1");

  assert (MarkupUtils.getAttribute(e, "key").contains("value1"));

  MarkupUtils.addAttribute(e, "key", "value2");

  assert (MarkupUtils.getAttribute(e, "key").contains("value1"));
  assert (MarkupUtils.getAttribute(e, "key").contains("value2"));

  assert (MarkupUtils.getAttributes(e, "key").contains("value1"));
  assert (MarkupUtils.getAttributes(e, "key").contains("value2"));
}
 
Example 28
Source Project: baleen   Source File: DataAttributeMapperTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testElementWithoutTypes() {
  Element e = new Element(Tag.valueOf("p"), "");
  mapper.map(jCas, e, collector);

  assertNull(collector.getAnnotations());
}
 
Example 29
Source Project: baleen   Source File: DataAttributeMapperTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testElementWithTypeNoAttributes() {
  Element e = new Element(Tag.valueOf("p"), "");
  MarkupUtils.additionallyAnnotateAsType(
      e, "uk.gov.dstl.baleen.types.metadata.ProtectiveMarking");
  mapper.map(jCas, e, collector);

  assertEquals(1, collector.getAnnotations().size());
  ProtectiveMarking annotation = (ProtectiveMarking) collector.getAnnotations().get(0);
  assertEquals(annotation.getClassification(), null);
}
 
Example 30
Source Project: baleen   Source File: StructuralAnnotationsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMap() throws UIMAException {
  final JCas jCas = JCasSingleton.getJCasInstance();

  final StructuralAnnotations sa = new StructuralAnnotations();

  for (final Map.Entry<Tag, Class<?>[]> e : expected.entrySet()) {
    final Element element = new Element(e.getKey(), "");
    final AnnotationCollector collector = new AnnotationCollector();
    sa.map(jCas, element, collector);

    final List<Annotation> annotations = collector.getAnnotations();

    final Class<?>[] classes = e.getValue();
    if (classes == null || classes.length == 0) {
      if (annotations != null) {
        assertTrue(annotations.isEmpty());
      }
    } else {
      assertEquals(annotations.size(), classes.length);
      for (int i = 0; i < classes.length; i++) {
        final Class<?> c = classes[i];
        assertTrue(c.isInstance(annotations.get(i)));
      }
    }
  }
}