org.jsoup.parser.Tag Java Examples
The following examples show how to use
org.jsoup.parser.Tag.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: cetty Author: heyingcai File: CifnewsPageHandler.java License: Apache License 2.0 | 6 votes |
@Override public Element appendBody(Elements tempBody) { final Element articleBody = new Element(Tag.valueOf("div"), ""); String blockquote = tempBody.select("div.fetch-read>div.summary").text(); buildBlockquote(blockquote, articleBody); Elements inner = tempBody.select("div.article-inner>*"); for (Element pEl : inner) { if (pEl.select("div.fetch-present").size() != 0) { continue; } Element imgEl = pEl.select("p>img").first(); if (imgEl != null) { Element figure = buildFigure(imgEl); if (imgEl.nextElementSibling() != null && imgEl.nextElementSibling().tagName().equals("p")) { Element figcaption = buildFigcaption(imgEl.nextElementSibling().text()); figure.appendChild(figcaption); articleBody.appendChild(figure); continue; } articleBody.appendChild(figure); continue; } articleBody.appendChild(pEl); } return articleBody; }
Example #2
Source Project: cetty Author: heyingcai File: Waimaob2cPageHandler.java License: Apache License 2.0 | 6 votes |
@Override public Element appendBody(Elements tempBody) { final Element articleBody = new Element(Tag.valueOf("div"), ""); for (Element body : tempBody) { if (body.tagName().equals("p")) { boolean skipRegister = body.select("p").text().contains("即刻注册SHOPIFY账户, 跟着我们精心准备的SHOPIFY教程开始外贸独立站之旅!"); boolean skipCopyRight = body.classNames().contains("post-copyright"); if (skipRegister || skipCopyRight) { continue; } } Element imgEl = body.select("img").first(); if (imgEl != null) { articleBody.appendChild(buildFigure(imgEl)); continue; } articleBody.appendChild(body); } return articleBody; }
Example #3
Source Project: cetty Author: heyingcai File: GuxiaobeiPageHandler.java License: Apache License 2.0 | 6 votes |
@Override public Element appendBody(Elements tempBody) { final Element articleBody = new Element(Tag.valueOf("div"), ""); for (final Element pEl : tempBody) { if (pEl.select("div.open-message,div.jp-relatedposts,div.article-social").size() != 0) { continue; } if (pEl.tagName().equals("p")) { Element imgEl = pEl.select("img").first(); if (imgEl != null) { String src = imgEl.attr("src"); if (src.contains("data:image")) { src = imgEl.attr("data-src"); } else if (!src.contains("www.guxiaobei.com")) { src = "http://www.guxiaobei.com" + src; } imgEl.attr("src", src); articleBody.appendChild(buildFigure(imgEl)); continue; } } articleBody.appendChild(pEl); } return articleBody; }
Example #4
Source Project: Asqatasun Author: Asqatasun File: SimpleTextElementBuilderTest.java License: GNU Affero General Public License v3.0 | 6 votes |
/** * Test of buildTextFromElement method, of class SimpleTextElementBuilder. */ public void testBuildTextFromElementWithChildren() { LOGGER.debug("buildTextFromElementWithChildren"); Element element = new Element(Tag.valueOf("div"), ""); element.appendText(" text1 "); Element childElement = new Element(Tag.valueOf("div"), ""); childElement.text(" child element text "); Element childElement2 = new Element(Tag.valueOf("div"), ""); childElement2.text(" child element text second level "); childElement.appendChild(childElement2); element.appendChild(childElement); element.appendText(" text2 "); SimpleTextElementBuilder instance = new SimpleTextElementBuilder(); String expResult = "text1 child element text child element text second level text2"; String result = instance.buildTextFromElement(element); assertEquals(expResult, result); }
Example #5
Source Project: baleen Author: dstl File: SemanticHtmlTest.java License: Apache License 2.0 | 6 votes |
@Test public void testMain() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); SemanticHtml sa = new SemanticHtml(); Map<String, Class<?>> expectedMain = new HashMap<>(); expectedMain.put("time", Temporal.class); expectedMain.put("meter", Quantity.class); expectedMain.put("dfn", Buzzword.class); expectedMain.put("address", Location.class); expectedMain.put("abbr", Buzzword.class); expectedMain.put("cite", DocumentReference.class); for (Map.Entry<String, Class<?>> e : expectedMain.entrySet()) { Element element = new Element(Tag.valueOf(e.getKey()), ""); AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, element, collector); if (e.getValue() != null) { assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0))); } else { assertNull(collector.getAnnotations()); } } }
Example #6
Source Project: astor Author: SpoonLabs File: NodeTest.java License: GNU General Public License v2.0 | 6 votes |
@Test public void handlesBaseUri() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); attribs.put("relHref", "/foo"); attribs.put("absHref", "http://bar/qux"); Element noBase = new Element(tag, "", attribs); assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib Element withBase = new Element(tag, "http://foo/", attribs); assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that assertEquals("", withBase.absUrl("noval")); Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs); assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing }
Example #7
Source Project: baleen Author: dstl File: StructuralAnnotationsTest.java License: Apache License 2.0 | 6 votes |
@Test public void testLink() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); final Element a1 = new Element(Tag.valueOf("a"), ""); a1.attr("href", "http://example.com"); final Element a2 = new Element(Tag.valueOf("a"), ""); a2.attr("href", "/example.com"); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, a1, collector); sa.map(jCas, a2, collector); Annotation link = collector.getAnnotations().get(0); assertTrue(link instanceof Link); assertEquals("http://example.com", ((Link) link).getTarget()); Annotation link2 = collector.getAnnotations().get(1); assertTrue(link2 instanceof Link); assertEquals("/example.com", ((Link) link2).getTarget()); }
Example #8
Source Project: baleen Author: dstl File: StructuralAnnotationsTest.java License: Apache License 2.0 | 6 votes |
@Test public void testArticle() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); final Map<String, Class<?>> expectedArticle = new HashMap<>(); expectedArticle.put("Sheet", Sheet.class); expectedArticle.put("Slide", Slide.class); expectedArticle.put("Page", Page.class); expectedArticle.put("Another", Page.class); for (final Map.Entry<String, Class<?>> e : expectedArticle.entrySet()) { final Element anchor = new Element(Tag.valueOf("article"), ""); anchor.attr("class", e.getKey()); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, anchor, collector); if (e.getValue() != null) { assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0))); } else { assertNull(collector.getAnnotations()); } } }
Example #9
Source Project: astor Author: SpoonLabs File: Cleaner.java License: GNU General Public License v2.0 | 6 votes |
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
Example #10
Source Project: astor Author: SpoonLabs File: Cleaner.java License: GNU General Public License v2.0 | 6 votes |
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
Example #11
Source Project: astor Author: SpoonLabs File: NodeTest.java License: GNU General Public License v2.0 | 6 votes |
@Test public void handlesBaseUri() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); attribs.put("relHref", "/foo"); attribs.put("absHref", "http://bar/qux"); Element noBase = new Element(tag, "", attribs); assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib Element withBase = new Element(tag, "http://foo/", attribs); assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that assertEquals("", withBase.absUrl("noval")); Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs); assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing }
Example #12
Source Project: astor Author: SpoonLabs File: NodeTest.java License: GNU General Public License v2.0 | 6 votes |
@Test public void handlesBaseUri() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); attribs.put("relHref", "/foo"); attribs.put("absHref", "http://bar/qux"); Element noBase = new Element(tag, "", attribs); assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib Element withBase = new Element(tag, "http://foo/", attribs); assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that assertEquals("", withBase.absUrl("noval")); Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs); assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing }
Example #13
Source Project: astor Author: SpoonLabs File: NodeTest.java License: GNU General Public License v2.0 | 5 votes |
@Test public void before() { Document doc = Jsoup.parse("<p>One <b>two</b> three</p>"); Element newNode = new Element(Tag.valueOf("em"), ""); newNode.appendText("four"); doc.select("b").first().before(newNode); assertEquals("<p>One <em>four</em><b>two</b> three</p>", doc.body().html()); doc.select("b").first().before("<i>five</i>"); assertEquals("<p>One <em>four</em><i>five</i><b>two</b> three</p>", doc.body().html()); }
Example #14
Source Project: spring-boot Author: h819 File: MyHtmlUtils.java License: Apache License 2.0 | 5 votes |
/** * 按原Element重建一个新的Element * @param sourceEl * @return */ private static Element createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { destAttrs.put(sourceAttr); } return dest; }
Example #15
Source Project: Asqatasun Author: Asqatasun File: TextBelongsToBlackListCheckerTest.java License: GNU Affero General Public License v3.0 | 5 votes |
@Override protected void setUp() throws Exception { super.setUp(); mockTextElementBuilder = createMock(TextElementBuilder.class); mockSSPHandler = createMock(SSPHandler.class); elements = new Elements(); element = new Element(Tag.valueOf("div"), ""); mockTestSolutionHandler = createMock(TestSolutionHandler.class); mockNomenclature = createMock(Nomenclature.class); mockNomenclatureLoaderService = createMock(NomenclatureLoaderService.class); expect(mockNomenclatureLoaderService.loadByCode(BLACKLIST_NOM_NAME)). andReturn(mockNomenclature).once(); mockProcessRemarkService = createMock(ProcessRemarkService.class); }
Example #16
Source Project: Asqatasun Author: Asqatasun File: TextEmptinessCheckerTest.java License: GNU Affero General Public License v3.0 | 5 votes |
@Override protected void setUp() throws Exception { super.setUp(); mockTextElementBuilder = createMock(TextElementBuilder.class); mockSSPHandler = createMock(SSPHandler.class); elements = new Elements(); element = new Element(Tag.valueOf("div"), ""); mockTestSolutionHandler = createMock(TestSolutionHandler.class); mockProcessRemarkService = createMock(ProcessRemarkService.class); }
Example #17
Source Project: Asqatasun Author: Asqatasun File: TextNotIdenticalToAttributeCheckerTest.java License: GNU Affero General Public License v3.0 | 5 votes |
@Override protected void setUp() throws Exception { super.setUp(); mockTextElementBuilder = createMock(TextElementBuilder.class); mockSSPHandler = createMock(SSPHandler.class); elements = new Elements(); element = new Element(Tag.valueOf("div"), ""); element.attr(AttributeStore.ALT_ATTR, "test"); mockTestSolutionHandler = createMock(TestSolutionHandler.class); mockProcessRemarkService = createMock(ProcessRemarkService.class); }
Example #18
Source Project: Asqatasun Author: Asqatasun File: TextOnlyContainsNonAlphanumericalCharactersCheckerTest.java License: GNU Affero General Public License v3.0 | 5 votes |
@Override protected void setUp() throws Exception { super.setUp(); mockTextElementBuilder = createMock(TextElementBuilder.class); mockSSPHandler = createMock(SSPHandler.class); elements = new Elements(); element = new Element(Tag.valueOf("div"), ""); mockTestSolutionHandler = createMock(TestSolutionHandler.class); mockProcessRemarkService = createMock(ProcessRemarkService.class); }
Example #19
Source Project: astor Author: SpoonLabs File: ElementTest.java License: GNU General Public License v2.0 | 5 votes |
@Test public void testHashcodeIsStableWithContentChanges() { Element root = new Element(Tag.valueOf("root"), ""); HashSet<Element> set = new HashSet<Element>(); // Add root node: set.add(root); root.appendChild(new Element(Tag.valueOf("a"), "")); assertTrue(set.contains(root)); }
Example #20
Source Project: Asqatasun Author: Asqatasun File: SimpleTextElementBuilderTest.java License: GNU Affero General Public License v3.0 | 5 votes |
/** * Test of buildTextFromElement method, of class SimpleTextElementBuilder. */ public void testBuildTextFromElement() { LOGGER.debug("buildTextFromElement"); Element element = new Element(Tag.valueOf("div"), ""); element.text("test"); SimpleTextElementBuilder instance = new SimpleTextElementBuilder(); String expResult = "test"; String result = instance.buildTextFromElement(element); assertEquals(expResult, result); }
Example #21
Source Project: astor Author: SpoonLabs File: ElementTest.java License: GNU General Public License v2.0 | 5 votes |
@Test public void testHashcodeIsStableWithContentChanges() { Element root = new Element(Tag.valueOf("root"), ""); HashSet<Element> set = new HashSet<Element>(); // Add root node: set.add(root); root.appendChild(new Element(Tag.valueOf("a"), "")); assertTrue(set.contains(root)); }
Example #22
Source Project: Asqatasun Author: Asqatasun File: TextAttributeOfElementBuilderTest.java License: GNU Affero General Public License v3.0 | 5 votes |
/** * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder. */ public void testBuildTextFromElementWithTargettedAttributeNotSet() { LOGGER.debug("buildTextFromElementWithTargettedAttributeNotSet"); Element element = new Element(Tag.valueOf("div"), ""); element.attr(AttributeStore.ALT_ATTR, "test"); TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder(); String result = instance.buildTextFromElement(element); assertNull(result); // assertNull(instance.getAttributeName()); }
Example #23
Source Project: Asqatasun Author: Asqatasun File: TextAttributeOfElementBuilderTest.java License: GNU Affero General Public License v3.0 | 5 votes |
/** * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder. */ public void testBuildTextFromElementWithAttribute() { LOGGER.debug("buildTextFromElementWithAttribute"); Element element = new Element(Tag.valueOf("div"), ""); element.attr(AttributeStore.ALT_ATTR, "test"); TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder(AttributeStore.ALT_ATTR); String result = instance.buildTextFromElement(element); assertEquals("test", result); // assertEquals(AttributeStore.ALT_ATTR, instance.getAttributeName()); }
Example #24
Source Project: Asqatasun Author: Asqatasun File: TextAttributeOfElementBuilderTest.java License: GNU Affero General Public License v3.0 | 5 votes |
/** * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder. */ public void testBuildTextFromElementWithAttributeMissing() { LOGGER.debug("buildTextFromElementWithAttributeMissing"); Element element = new Element(Tag.valueOf("div"), ""); TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder(AttributeStore.ALT_ATTR); String result = instance.buildTextFromElement(element); assertNull(result); // assertEquals(AttributeStore.ALT_ATTR, instance.getAttributeName()); }
Example #25
Source Project: baleen Author: dstl File: MarkupUtilsTest.java License: Apache License 2.0 | 5 votes |
@Test public void testAdditionallyAnnotateAsType() { Element e = new Element(Tag.valueOf("p"), ""); MarkupUtils.additionallyAnnotateAsType(e, "testtype"); assertEquals(MarkupUtils.getAttribute(e, "types"), "testtype"); assertTrue(MarkupUtils.getTypes(e).contains("testtype")); }
Example #26
Source Project: baleen Author: dstl File: MarkupUtilsTest.java License: Apache License 2.0 | 5 votes |
@Test public void testSetAttribute() { Element e = new Element(Tag.valueOf("p"), ""); MarkupUtils.setAttribute(e, "key", "value"); assert (MarkupUtils.getAttribute(e, "key").equals("value")); MarkupUtils.setAttribute(e, "key", "value2"); assert (MarkupUtils.getAttribute(e, "key").equals("value2")); }
Example #27
Source Project: baleen Author: dstl File: MarkupUtilsTest.java License: Apache License 2.0 | 5 votes |
@Test public void testAddAttribute() { Element e = new Element(Tag.valueOf("p"), ""); MarkupUtils.addAttribute(e, "key", "value1"); assert (MarkupUtils.getAttribute(e, "key").contains("value1")); MarkupUtils.addAttribute(e, "key", "value2"); assert (MarkupUtils.getAttribute(e, "key").contains("value1")); assert (MarkupUtils.getAttribute(e, "key").contains("value2")); assert (MarkupUtils.getAttributes(e, "key").contains("value1")); assert (MarkupUtils.getAttributes(e, "key").contains("value2")); }
Example #28
Source Project: baleen Author: dstl File: DataAttributeMapperTest.java License: Apache License 2.0 | 5 votes |
@Test public void testElementWithoutTypes() { Element e = new Element(Tag.valueOf("p"), ""); mapper.map(jCas, e, collector); assertNull(collector.getAnnotations()); }
Example #29
Source Project: baleen Author: dstl File: DataAttributeMapperTest.java License: Apache License 2.0 | 5 votes |
@Test public void testElementWithTypeNoAttributes() { Element e = new Element(Tag.valueOf("p"), ""); MarkupUtils.additionallyAnnotateAsType( e, "uk.gov.dstl.baleen.types.metadata.ProtectiveMarking"); mapper.map(jCas, e, collector); assertEquals(1, collector.getAnnotations().size()); ProtectiveMarking annotation = (ProtectiveMarking) collector.getAnnotations().get(0); assertEquals(annotation.getClassification(), null); }
Example #30
Source Project: baleen Author: dstl File: StructuralAnnotationsTest.java License: Apache License 2.0 | 5 votes |
@Test public void testMap() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); for (final Map.Entry<Tag, Class<?>[]> e : expected.entrySet()) { final Element element = new Element(e.getKey(), ""); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, element, collector); final List<Annotation> annotations = collector.getAnnotations(); final Class<?>[] classes = e.getValue(); if (classes == null || classes.length == 0) { if (annotations != null) { assertTrue(annotations.isEmpty()); } } else { assertEquals(annotations.size(), classes.length); for (int i = 0; i < classes.length; i++) { final Class<?> c = classes[i]; assertTrue(c.isInstance(annotations.get(i))); } } } }