Java Code Examples for org.jsoup.parser.Tag
The following examples show how to use
org.jsoup.parser.Tag. These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: cetty Source File: CifnewsPageHandler.java License: Apache License 2.0 | 6 votes |
@Override public Element appendBody(Elements tempBody) { final Element articleBody = new Element(Tag.valueOf("div"), ""); String blockquote = tempBody.select("div.fetch-read>div.summary").text(); buildBlockquote(blockquote, articleBody); Elements inner = tempBody.select("div.article-inner>*"); for (Element pEl : inner) { if (pEl.select("div.fetch-present").size() != 0) { continue; } Element imgEl = pEl.select("p>img").first(); if (imgEl != null) { Element figure = buildFigure(imgEl); if (imgEl.nextElementSibling() != null && imgEl.nextElementSibling().tagName().equals("p")) { Element figcaption = buildFigcaption(imgEl.nextElementSibling().text()); figure.appendChild(figcaption); articleBody.appendChild(figure); continue; } articleBody.appendChild(figure); continue; } articleBody.appendChild(pEl); } return articleBody; }
Example 2
Source Project: cetty Source File: Waimaob2cPageHandler.java License: Apache License 2.0 | 6 votes |
@Override public Element appendBody(Elements tempBody) { final Element articleBody = new Element(Tag.valueOf("div"), ""); for (Element body : tempBody) { if (body.tagName().equals("p")) { boolean skipRegister = body.select("p").text().contains("即刻注册SHOPIFY账户, 跟着我们精心准备的SHOPIFY教程开始外贸独立站之旅!"); boolean skipCopyRight = body.classNames().contains("post-copyright"); if (skipRegister || skipCopyRight) { continue; } } Element imgEl = body.select("img").first(); if (imgEl != null) { articleBody.appendChild(buildFigure(imgEl)); continue; } articleBody.appendChild(body); } return articleBody; }
Example 3
Source Project: cetty Source File: GuxiaobeiPageHandler.java License: Apache License 2.0 | 6 votes |
@Override public Element appendBody(Elements tempBody) { final Element articleBody = new Element(Tag.valueOf("div"), ""); for (final Element pEl : tempBody) { if (pEl.select("div.open-message,div.jp-relatedposts,div.article-social").size() != 0) { continue; } if (pEl.tagName().equals("p")) { Element imgEl = pEl.select("img").first(); if (imgEl != null) { String src = imgEl.attr("src"); if (src.contains("data:image")) { src = imgEl.attr("data-src"); } else if (!src.contains("www.guxiaobei.com")) { src = "http://www.guxiaobei.com" + src; } imgEl.attr("src", src); articleBody.appendChild(buildFigure(imgEl)); continue; } } articleBody.appendChild(pEl); } return articleBody; }
Example 4
Source Project: Asqatasun Source File: SimpleTextElementBuilderTest.java License: GNU Affero General Public License v3.0 | 6 votes |
/** * Test of buildTextFromElement method, of class SimpleTextElementBuilder. */ public void testBuildTextFromElementWithChildren() { LOGGER.debug("buildTextFromElementWithChildren"); Element element = new Element(Tag.valueOf("div"), ""); element.appendText(" text1 "); Element childElement = new Element(Tag.valueOf("div"), ""); childElement.text(" child element text "); Element childElement2 = new Element(Tag.valueOf("div"), ""); childElement2.text(" child element text second level "); childElement.appendChild(childElement2); element.appendChild(childElement); element.appendText(" text2 "); SimpleTextElementBuilder instance = new SimpleTextElementBuilder(); String expResult = "text1 child element text child element text second level text2"; String result = instance.buildTextFromElement(element); assertEquals(expResult, result); }
Example 5
Source Project: baleen Source File: SemanticHtmlTest.java License: Apache License 2.0 | 6 votes |
@Test public void testMain() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); SemanticHtml sa = new SemanticHtml(); Map<String, Class<?>> expectedMain = new HashMap<>(); expectedMain.put("time", Temporal.class); expectedMain.put("meter", Quantity.class); expectedMain.put("dfn", Buzzword.class); expectedMain.put("address", Location.class); expectedMain.put("abbr", Buzzword.class); expectedMain.put("cite", DocumentReference.class); for (Map.Entry<String, Class<?>> e : expectedMain.entrySet()) { Element element = new Element(Tag.valueOf(e.getKey()), ""); AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, element, collector); if (e.getValue() != null) { assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0))); } else { assertNull(collector.getAnnotations()); } } }
Example 6
Source Project: astor Source File: NodeTest.java License: GNU General Public License v2.0 | 6 votes |
@Test public void handlesBaseUri() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); attribs.put("relHref", "/foo"); attribs.put("absHref", "http://bar/qux"); Element noBase = new Element(tag, "", attribs); assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib Element withBase = new Element(tag, "http://foo/", attribs); assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that assertEquals("", withBase.absUrl("noval")); Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs); assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing }
Example 7
Source Project: baleen Source File: StructuralAnnotationsTest.java License: Apache License 2.0 | 6 votes |
@Test public void testLink() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); final Element a1 = new Element(Tag.valueOf("a"), ""); a1.attr("href", "http://example.com"); final Element a2 = new Element(Tag.valueOf("a"), ""); a2.attr("href", "/example.com"); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, a1, collector); sa.map(jCas, a2, collector); Annotation link = collector.getAnnotations().get(0); assertTrue(link instanceof Link); assertEquals("http://example.com", ((Link) link).getTarget()); Annotation link2 = collector.getAnnotations().get(1); assertTrue(link2 instanceof Link); assertEquals("/example.com", ((Link) link2).getTarget()); }
Example 8
Source Project: baleen Source File: StructuralAnnotationsTest.java License: Apache License 2.0 | 6 votes |
@Test public void testArticle() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); final Map<String, Class<?>> expectedArticle = new HashMap<>(); expectedArticle.put("Sheet", Sheet.class); expectedArticle.put("Slide", Slide.class); expectedArticle.put("Page", Page.class); expectedArticle.put("Another", Page.class); for (final Map.Entry<String, Class<?>> e : expectedArticle.entrySet()) { final Element anchor = new Element(Tag.valueOf("article"), ""); anchor.attr("class", e.getKey()); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, anchor, collector); if (e.getValue() != null) { assertTrue(e.getValue().isInstance(collector.getAnnotations().get(0))); } else { assertNull(collector.getAnnotations()); } } }
Example 9
Source Project: astor Source File: Cleaner.java License: GNU General Public License v2.0 | 6 votes |
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
Example 10
Source Project: astor Source File: Cleaner.java License: GNU General Public License v2.0 | 6 votes |
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
Example 11
Source Project: astor Source File: NodeTest.java License: GNU General Public License v2.0 | 6 votes |
@Test public void handlesBaseUri() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); attribs.put("relHref", "/foo"); attribs.put("absHref", "http://bar/qux"); Element noBase = new Element(tag, "", attribs); assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib Element withBase = new Element(tag, "http://foo/", attribs); assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that assertEquals("", withBase.absUrl("noval")); Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs); assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing }
Example 12
Source Project: astor Source File: NodeTest.java License: GNU General Public License v2.0 | 6 votes |
@Test public void handlesBaseUri() { Tag tag = Tag.valueOf("a"); Attributes attribs = new Attributes(); attribs.put("relHref", "/foo"); attribs.put("absHref", "http://bar/qux"); Element noBase = new Element(tag, "", attribs); assertEquals("", noBase.absUrl("relHref")); // with no base, should NOT fallback to href attrib, whatever it is assertEquals("http://bar/qux", noBase.absUrl("absHref")); // no base but valid attrib, return attrib Element withBase = new Element(tag, "http://foo/", attribs); assertEquals("http://foo/foo", withBase.absUrl("relHref")); // construct abs from base + rel assertEquals("http://bar/qux", withBase.absUrl("absHref")); // href is abs, so returns that assertEquals("", withBase.absUrl("noval")); Element dodgyBase = new Element(tag, "wtf://no-such-protocol/", attribs); assertEquals("http://bar/qux", dodgyBase.absUrl("absHref")); // base fails, but href good, so get that assertEquals("", dodgyBase.absUrl("relHref")); // base fails, only rel href, so return nothing }
Example 13
Source Project: astor Source File: NodeTest.java License: GNU General Public License v2.0 | 5 votes |
@Test public void before() { Document doc = Jsoup.parse("<p>One <b>two</b> three</p>"); Element newNode = new Element(Tag.valueOf("em"), ""); newNode.appendText("four"); doc.select("b").first().before(newNode); assertEquals("<p>One <em>four</em><b>two</b> three</p>", doc.body().html()); doc.select("b").first().before("<i>five</i>"); assertEquals("<p>One <em>four</em><i>five</i><b>two</b> three</p>", doc.body().html()); }
Example 14
Source Project: spring-boot Source File: MyHtmlUtils.java License: Apache License 2.0 | 5 votes |
/** * 按原Element重建一个新的Element * @param sourceEl * @return */ private static Element createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { destAttrs.put(sourceAttr); } return dest; }
Example 15
Source Project: Asqatasun Source File: TextBelongsToBlackListCheckerTest.java License: GNU Affero General Public License v3.0 | 5 votes |
@Override protected void setUp() throws Exception { super.setUp(); mockTextElementBuilder = createMock(TextElementBuilder.class); mockSSPHandler = createMock(SSPHandler.class); elements = new Elements(); element = new Element(Tag.valueOf("div"), ""); mockTestSolutionHandler = createMock(TestSolutionHandler.class); mockNomenclature = createMock(Nomenclature.class); mockNomenclatureLoaderService = createMock(NomenclatureLoaderService.class); expect(mockNomenclatureLoaderService.loadByCode(BLACKLIST_NOM_NAME)). andReturn(mockNomenclature).once(); mockProcessRemarkService = createMock(ProcessRemarkService.class); }
Example 16
Source Project: Asqatasun Source File: TextEmptinessCheckerTest.java License: GNU Affero General Public License v3.0 | 5 votes |
@Override protected void setUp() throws Exception { super.setUp(); mockTextElementBuilder = createMock(TextElementBuilder.class); mockSSPHandler = createMock(SSPHandler.class); elements = new Elements(); element = new Element(Tag.valueOf("div"), ""); mockTestSolutionHandler = createMock(TestSolutionHandler.class); mockProcessRemarkService = createMock(ProcessRemarkService.class); }
Example 17
Source Project: Asqatasun Source File: TextNotIdenticalToAttributeCheckerTest.java License: GNU Affero General Public License v3.0 | 5 votes |
@Override protected void setUp() throws Exception { super.setUp(); mockTextElementBuilder = createMock(TextElementBuilder.class); mockSSPHandler = createMock(SSPHandler.class); elements = new Elements(); element = new Element(Tag.valueOf("div"), ""); element.attr(AttributeStore.ALT_ATTR, "test"); mockTestSolutionHandler = createMock(TestSolutionHandler.class); mockProcessRemarkService = createMock(ProcessRemarkService.class); }
Example 18
Source Project: Asqatasun Source File: TextOnlyContainsNonAlphanumericalCharactersCheckerTest.java License: GNU Affero General Public License v3.0 | 5 votes |
@Override protected void setUp() throws Exception { super.setUp(); mockTextElementBuilder = createMock(TextElementBuilder.class); mockSSPHandler = createMock(SSPHandler.class); elements = new Elements(); element = new Element(Tag.valueOf("div"), ""); mockTestSolutionHandler = createMock(TestSolutionHandler.class); mockProcessRemarkService = createMock(ProcessRemarkService.class); }
Example 19
Source Project: astor Source File: ElementTest.java License: GNU General Public License v2.0 | 5 votes |
@Test public void testHashcodeIsStableWithContentChanges() { Element root = new Element(Tag.valueOf("root"), ""); HashSet<Element> set = new HashSet<Element>(); // Add root node: set.add(root); root.appendChild(new Element(Tag.valueOf("a"), "")); assertTrue(set.contains(root)); }
Example 20
Source Project: Asqatasun Source File: SimpleTextElementBuilderTest.java License: GNU Affero General Public License v3.0 | 5 votes |
/** * Test of buildTextFromElement method, of class SimpleTextElementBuilder. */ public void testBuildTextFromElement() { LOGGER.debug("buildTextFromElement"); Element element = new Element(Tag.valueOf("div"), ""); element.text("test"); SimpleTextElementBuilder instance = new SimpleTextElementBuilder(); String expResult = "test"; String result = instance.buildTextFromElement(element); assertEquals(expResult, result); }
Example 21
Source Project: astor Source File: ElementTest.java License: GNU General Public License v2.0 | 5 votes |
@Test public void testHashcodeIsStableWithContentChanges() { Element root = new Element(Tag.valueOf("root"), ""); HashSet<Element> set = new HashSet<Element>(); // Add root node: set.add(root); root.appendChild(new Element(Tag.valueOf("a"), "")); assertTrue(set.contains(root)); }
Example 22
Source Project: Asqatasun Source File: TextAttributeOfElementBuilderTest.java License: GNU Affero General Public License v3.0 | 5 votes |
/** * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder. */ public void testBuildTextFromElementWithTargettedAttributeNotSet() { LOGGER.debug("buildTextFromElementWithTargettedAttributeNotSet"); Element element = new Element(Tag.valueOf("div"), ""); element.attr(AttributeStore.ALT_ATTR, "test"); TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder(); String result = instance.buildTextFromElement(element); assertNull(result); // assertNull(instance.getAttributeName()); }
Example 23
Source Project: Asqatasun Source File: TextAttributeOfElementBuilderTest.java License: GNU Affero General Public License v3.0 | 5 votes |
/** * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder. */ public void testBuildTextFromElementWithAttribute() { LOGGER.debug("buildTextFromElementWithAttribute"); Element element = new Element(Tag.valueOf("div"), ""); element.attr(AttributeStore.ALT_ATTR, "test"); TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder(AttributeStore.ALT_ATTR); String result = instance.buildTextFromElement(element); assertEquals("test", result); // assertEquals(AttributeStore.ALT_ATTR, instance.getAttributeName()); }
Example 24
Source Project: Asqatasun Source File: TextAttributeOfElementBuilderTest.java License: GNU Affero General Public License v3.0 | 5 votes |
/** * Test of buildTextFromElement method, of class TextAttributeOfElementBuilder. */ public void testBuildTextFromElementWithAttributeMissing() { LOGGER.debug("buildTextFromElementWithAttributeMissing"); Element element = new Element(Tag.valueOf("div"), ""); TextAttributeOfElementBuilder instance = new TextAttributeOfElementBuilder(AttributeStore.ALT_ATTR); String result = instance.buildTextFromElement(element); assertNull(result); // assertEquals(AttributeStore.ALT_ATTR, instance.getAttributeName()); }
Example 25
Source Project: baleen Source File: MarkupUtilsTest.java License: Apache License 2.0 | 5 votes |
@Test public void testAdditionallyAnnotateAsType() { Element e = new Element(Tag.valueOf("p"), ""); MarkupUtils.additionallyAnnotateAsType(e, "testtype"); assertEquals(MarkupUtils.getAttribute(e, "types"), "testtype"); assertTrue(MarkupUtils.getTypes(e).contains("testtype")); }
Example 26
Source Project: baleen Source File: MarkupUtilsTest.java License: Apache License 2.0 | 5 votes |
@Test public void testSetAttribute() { Element e = new Element(Tag.valueOf("p"), ""); MarkupUtils.setAttribute(e, "key", "value"); assert (MarkupUtils.getAttribute(e, "key").equals("value")); MarkupUtils.setAttribute(e, "key", "value2"); assert (MarkupUtils.getAttribute(e, "key").equals("value2")); }
Example 27
Source Project: baleen Source File: MarkupUtilsTest.java License: Apache License 2.0 | 5 votes |
@Test public void testAddAttribute() { Element e = new Element(Tag.valueOf("p"), ""); MarkupUtils.addAttribute(e, "key", "value1"); assert (MarkupUtils.getAttribute(e, "key").contains("value1")); MarkupUtils.addAttribute(e, "key", "value2"); assert (MarkupUtils.getAttribute(e, "key").contains("value1")); assert (MarkupUtils.getAttribute(e, "key").contains("value2")); assert (MarkupUtils.getAttributes(e, "key").contains("value1")); assert (MarkupUtils.getAttributes(e, "key").contains("value2")); }
Example 28
Source Project: baleen Source File: DataAttributeMapperTest.java License: Apache License 2.0 | 5 votes |
@Test public void testElementWithoutTypes() { Element e = new Element(Tag.valueOf("p"), ""); mapper.map(jCas, e, collector); assertNull(collector.getAnnotations()); }
Example 29
Source Project: baleen Source File: DataAttributeMapperTest.java License: Apache License 2.0 | 5 votes |
@Test public void testElementWithTypeNoAttributes() { Element e = new Element(Tag.valueOf("p"), ""); MarkupUtils.additionallyAnnotateAsType( e, "uk.gov.dstl.baleen.types.metadata.ProtectiveMarking"); mapper.map(jCas, e, collector); assertEquals(1, collector.getAnnotations().size()); ProtectiveMarking annotation = (ProtectiveMarking) collector.getAnnotations().get(0); assertEquals(annotation.getClassification(), null); }
Example 30
Source Project: baleen Source File: StructuralAnnotationsTest.java License: Apache License 2.0 | 5 votes |
@Test public void testMap() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final StructuralAnnotations sa = new StructuralAnnotations(); for (final Map.Entry<Tag, Class<?>[]> e : expected.entrySet()) { final Element element = new Element(e.getKey(), ""); final AnnotationCollector collector = new AnnotationCollector(); sa.map(jCas, element, collector); final List<Annotation> annotations = collector.getAnnotations(); final Class<?>[] classes = e.getValue(); if (classes == null || classes.length == 0) { if (annotations != null) { assertTrue(annotations.isEmpty()); } } else { assertEquals(annotations.size(), classes.length); for (int i = 0; i < classes.length; i++) { final Class<?> c = classes[i]; assertTrue(c.isInstance(annotations.get(i))); } } } }