Java Code Examples for org.jsoup.nodes.Attribute
The following examples show how to use
org.jsoup.nodes.Attribute. These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: astor Source File: AttributeParseTest.java License: GNU General Public License v2.0 | 7 votes |
@Test public void parsesBooleanAttributes() { String html = "<a normal=\"123\" boolean empty=\"\"></a>"; Element el = Jsoup.parse(html).select("a").first(); assertEquals("123", el.attr("normal")); assertEquals("", el.attr("boolean")); assertEquals("", el.attr("empty")); List<Attribute> attributes = el.attributes().asList(); assertEquals("There should be 3 attribute present", 3, attributes.size()); // Assuming the list order always follows the parsed html assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute); assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute); assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute); assertEquals(html, el.outerHtml()); }
Example 2
Source Project: jpress Source File: JsoupUtils.java License: GNU Lesser General Public License v3.0 | 6 votes |
@Override protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { //不允许 javascript 开头的 src 和 href if ("src".equalsIgnoreCase(attr.getKey()) || "href".equalsIgnoreCase(attr.getKey())) { String value = attr.getValue(); if (StrUtil.isNotBlank(value) && value.toLowerCase().startsWith("javascript")) { return false; } } //允许 base64 的图片内容 if ("img".equals(tagName) && "src".equals(attr.getKey()) && attr.getValue().startsWith("data:;base64")){ return true; } return super.isSafeAttribute(tagName, el, attr); }
Example 3
Source Project: Android-WYSIWYG-Editor Source File: MacroExtensions.java License: Apache License 2.0 | 6 votes |
@Override public Node buildNodeFromHTML(Element element) { String tag = element.tagName().toLowerCase(); Node node = getNodeInstance(EditorType.macro); node.content.add(tag); List<Attribute> attrs = element.attributes().asList(); if (!attrs.isEmpty()) { node.macroSettings = new HashMap<>(); for (Attribute attr : attrs) { node.macroSettings.put(attr.getKey(), attr.getValue()); } } int index = editorCore.getChildCount(); View view = editorCore.getEditorListener().onRenderMacro(tag, node.macroSettings, editorCore.getChildCount()); if(view == null) view = getEmptyMacro(node.content.get(0), node.macroSettings); insertMacro(tag, view, node.macroSettings, index); return null; }
Example 4
Source Project: Asqatasun Source File: HTMLJsoupCleanerImpl.java License: GNU Affero General Public License v3.0 | 6 votes |
/** * Remove the comments of the page * * @param node */ private void removeMalformedAttributes(Node node) { // as we are removing child nodes while iterating, we cannot use a normal foreach over children, // or will get a concurrent list modification error. int i = 0; while (i < node.childNodes().size()) { Node child = node.childNode(i); for (Attribute attr : child.attributes()) { if (attr.getKey().startsWith("\"") && attr.getKey().endsWith("\"")) { child.removeAttr(attr.getKey()); } } removeMalformedAttributes(child); i++; } }
Example 5
Source Project: Asqatasun Source File: CaptchaElementSelector.java License: GNU Affero General Public License v3.0 | 6 votes |
/** * * @param element * @return wheter either one attribute of the current element, either its * text, either one attribute of one of its parent or the text of one of * its parents contains the "captcha" keyword */ private boolean parseAttributeToExtractCaptcha(Element element) { if (element.nodeName().equalsIgnoreCase(HTML_ELEMENT) || element.nodeName().equalsIgnoreCase(BODY_ELEMENT)) { return false; } if (StringUtils.containsIgnoreCase(element.ownText(), CAPTCHA_KEY)) { return true; } else { for (Attribute attr : element.attributes()) { if (StringUtils.containsIgnoreCase(attr.getValue(), CAPTCHA_KEY)) { return true; } } } return false; }
Example 6
Source Project: flow Source File: WebComponentBootstrapHandler.java License: Apache License 2.0 | 6 votes |
/** * Creates a javascript which copies attributes from the {@code element} to * the created DOM element identified by {@code elementRef}. If {@code * element} contains a {@code src} attribute, its path is prefixed with * {@code basePath}. * * @param writer * response writer * @param elementRef * variable name of the element in javascript * @param element * jsoup element from which to copy the attributes * @param basePath * base path of {@code src} attributes (service url's path) * @throws IOException * if {@code writer} is unable to write */ private void transferAttribute(Writer writer, String elementRef, Element element, String basePath) throws IOException { for (Attribute attribute : element.attributes()) { writer.append(elementRef).append(".setAttribute('") .append(attribute.getKey()).append("',"); if (attribute.getValue() == null) { writer.append("''"); } else { String path = attribute.getValue(); if ("src".equals(attribute.getKey())) { path = modifyPath(basePath, path); } writer.append("'").append(path).append("'"); } writer.append(");"); } }
Example 7
Source Project: astor Source File: Whitelist.java License: GNU General Public License v2.0 | 6 votes |
/** * Test if the supplied attribute is allowed by this whitelist for this tag * @param tagName tag to consider allowing the attribute in * @param el element under test, to confirm protocol * @param attr attribute under test * @return true if allowed */ protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); if (attributes.containsKey(tag)) { if (attributes.get(tag).contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } } // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); }
Example 8
Source Project: astor Source File: W3CDom.java License: GNU General Public License v2.0 | 6 votes |
/** * Finds any namespaces defined in this element. Returns any tag prefix. */ private String updateNamespaces(org.jsoup.nodes.Element el) { // scan the element for namespace declarations // like: xmlns="blah" or xmlns:prefix="blah" Attributes attributes = el.attributes(); for (Attribute attr : attributes) { String key = attr.getKey(); String prefix; if (key.equals(xmlnsKey)) { prefix = ""; } else if (key.startsWith(xmlnsPrefix)) { prefix = key.substring(xmlnsPrefix.length()); } else { continue; } namespaces.put(prefix, attr.getValue()); } // get the element prefix if any int pos = el.tagName().indexOf(":"); return pos > 0 ? el.tagName().substring(0, pos) : ""; }
Example 9
Source Project: astor Source File: AttributeParseTest.java License: GNU General Public License v2.0 | 6 votes |
@Test public void parsesBooleanAttributes() { String html = "<a normal=\"123\" boolean empty=\"\"></a>"; Element el = Jsoup.parse(html).select("a").first(); assertEquals("123", el.attr("normal")); assertEquals("", el.attr("boolean")); assertEquals("", el.attr("empty")); List<Attribute> attributes = el.attributes().asList(); assertEquals("There should be 3 attribute present", 3, attributes.size()); // Assuming the list order always follows the parsed html assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute); assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute); assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute); assertEquals(html, el.outerHtml()); }
Example 10
Source Project: astor Source File: Cleaner.java License: GNU General Public License v2.0 | 6 votes |
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
Example 11
Source Project: astor Source File: W3CDom.java License: GNU General Public License v2.0 | 6 votes |
/** * Finds any namespaces defined in this element. Returns any tag prefix. */ private String updateNamespaces(org.jsoup.nodes.Element el) { // scan the element for namespace declarations // like: xmlns="blah" or xmlns:prefix="blah" Attributes attributes = el.attributes(); for (Attribute attr : attributes) { String key = attr.getKey(); String prefix; if (key.equals(xmlnsKey)) { prefix = ""; } else if (key.startsWith(xmlnsPrefix)) { prefix = key.substring(xmlnsPrefix.length()); } else { continue; } namespaces.put(prefix, attr.getValue()); } // get the element prefix if any int pos = el.tagName().indexOf(":"); return pos > 0 ? el.tagName().substring(0, pos) : ""; }
Example 12
Source Project: astor Source File: AttributeParseTest.java License: GNU General Public License v2.0 | 6 votes |
@Test public void parsesBooleanAttributes() { String html = "<a normal=\"123\" boolean empty=\"\"></a>"; Element el = Jsoup.parse(html).select("a").first(); assertEquals("123", el.attr("normal")); assertEquals("", el.attr("boolean")); assertEquals("", el.attr("empty")); List<Attribute> attributes = el.attributes().asList(); assertEquals("There should be 3 attribute present", 3, attributes.size()); // Assuming the list order always follows the parsed html assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute); assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute); assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute); assertEquals(html, el.outerHtml()); }
Example 13
Source Project: astor Source File: Cleaner.java License: GNU General Public License v2.0 | 6 votes |
private ElementMeta createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); Attributes destAttrs = new Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr); else numDiscarded++; } Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); return new ElementMeta(dest, numDiscarded); }
Example 14
Source Project: astor Source File: W3CDom.java License: GNU General Public License v2.0 | 6 votes |
/** * Finds any namespaces defined in this element. Returns any tag prefix. */ private String updateNamespaces(org.jsoup.nodes.Element el) { // scan the element for namespace declarations // like: xmlns="blah" or xmlns:prefix="blah" Attributes attributes = el.attributes(); for (Attribute attr : attributes) { String key = attr.getKey(); String prefix; if (key.equals(xmlnsKey)) { prefix = ""; } else if (key.startsWith(xmlnsPrefix)) { prefix = key.substring(xmlnsPrefix.length()); } else { continue; } namespaces.put(prefix, attr.getValue()); } // get the element prefix if any int pos = el.tagName().indexOf(":"); return pos > 0 ? el.tagName().substring(0, pos) : ""; }
Example 15
Source Project: jsoup-learning Source File: Whitelist.java License: MIT License | 6 votes |
/** * Test if the supplied attribute is allowed by this whitelist for this tag * @param tagName tag to consider allowing the attribute in * @param el element under test, to confirm protocol * @param attr attribute under test * @return true if allowed */ boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); if (attributes.containsKey(tag)) { if (attributes.get(tag).contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } } // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); }
Example 16
Source Project: jsoup-learning Source File: Whitelist.java License: MIT License | 6 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString() + ":"; if (value.toLowerCase().startsWith(prot)) { return true; } } return false; }
Example 17
Source Project: metafacture-core Source File: HtmlDecoder.java License: Apache License 2.0 | 6 votes |
private void process(Element parent, StreamReceiver receiver) { for (Element element : parent.children()) { receiver.startEntity(element.nodeName()); Attributes attributes = element.attributes(); for (Attribute attribute : attributes) { receiver.literal(attribute.getKey(), attribute.getValue()); } if (element.children().isEmpty()) { String text = element.text().trim(); String value = text.isEmpty() ? element.data() : text; if (!value.isEmpty()) { receiver.literal("value", value); } } process(element, receiver); receiver.endEntity(); } }
Example 18
Source Project: jstarcraft-core Source File: HtmlNavigator.java License: Apache License 2.0 | 5 votes |
@Override public boolean isAttribute(Object object) { if (object instanceof Attribute) { return true; } else { return false; } }
Example 19
Source Project: jstarcraft-core Source File: HtmlNavigator.java License: Apache License 2.0 | 5 votes |
@Override public Iterator<Attribute> getAttributeAxisIterator(Object contextNode, String localName, String namespacePrefix, String namespaceURI) { Element node = ((Element) contextNode); LinkedList<Attribute> attributes = new LinkedList<>(); for (Attribute attribute : node.attributes()) { if (localName.equals(attribute.getKey())) { attributes.add(attribute); } } return attributes.iterator(); }
Example 20
Source Project: zongtui-webcrawler Source File: ElementAdaptor.java License: GNU General Public License v2.0 | 5 votes |
@Override public Attr getAttributeNode(String name) { if (element.attr(name) == null) { return null; } return NodeAdaptors.getAttr(new Attribute(name, element.attr(name)), element); }
Example 21
Source Project: zongtui-webcrawler Source File: AttributesAdaptor.java License: GNU General Public License v2.0 | 5 votes |
public AttributesAdaptor(Attributes attributes, Element element) { this.attributes = attributes; this.element = element; attrList = new ArrayList<Attr>(); for (Attribute attribute : attributes) { attrList.add(new AttributeAdaptor(attribute,element)); } }
Example 22
Source Project: flow Source File: Html.java License: Apache License 2.0 | 5 votes |
private void setAttribute(Attribute attribute) { String name = attribute.getKey(); String value = attribute.getValue(); if (value == null) { value = ""; } getElement().setAttribute(name, value); }
Example 23
Source Project: compiler Source File: HtmlVisitor.java License: Apache License 2.0 | 5 votes |
public void visit(DocumentType node) { Ast.Element.Builder b = Ast.Element.newBuilder(); b.setKind(ElementKind.DOC_TYPE); b.setTag("!DOCTYPE"); for (Attribute n : node.attributes()) { visit(n); b.addAttributes(attributes.pop()); } elements.peek().add(b.build()); }
Example 24
Source Project: astor Source File: Whitelist.java License: GNU General Public License v2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString(); if (prot.equals("#")) { // allows anchor links if (isValidAnchor(value)) { return true; } else { continue; } } prot += ":"; if (value.toLowerCase().startsWith(prot)) { return true; } } return false; }
Example 25
Source Project: astor Source File: Whitelist.java License: GNU General Public License v2.0 | 5 votes |
/** * Test if the supplied attribute is allowed by this whitelist for this tag * @param tagName tag to consider allowing the attribute in * @param el element under test, to confirm protocol * @param attr attribute under test * @return true if allowed */ protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); Set<AttributeKey> okSet = attributes.get(tag); if (okSet != null && okSet.contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } // might be an enforced attribute? Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag); if (enforcedSet != null) { Attributes expect = getEnforcedAttributes(tagName); String attrKey = attr.getKey(); if (expect.hasKeyIgnoreCase(attrKey)) { return expect.getIgnoreCase(attrKey).equals(attr.getValue()); } } // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); }
Example 26
Source Project: astor Source File: Whitelist.java License: GNU General Public License v2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString(); if (prot.equals("#")) { // allows anchor links if (isValidAnchor(value)) { return true; } else { continue; } } prot += ":"; if (lowerCase(value).startsWith(prot)) { return true; } } return false; }
Example 27
Source Project: astor Source File: W3CDom.java License: GNU General Public License v2.0 | 5 votes |
private void copyAttributes(org.jsoup.nodes.Node source, Element el) { for (Attribute attribute : source.attributes()) { // valid xml attribute names are: ^[a-zA-Z_:][-a-zA-Z0-9_:.] String key = attribute.getKey().replaceAll("[^-a-zA-Z0-9_:.]", ""); if (key.matches("[a-zA-Z_:][-a-zA-Z0-9_:.]*")) el.setAttribute(key, attribute.getValue()); } }
Example 28
Source Project: astor Source File: Whitelist.java License: GNU General Public License v2.0 | 5 votes |
/** * Test if the supplied attribute is allowed by this whitelist for this tag * @param tagName tag to consider allowing the attribute in * @param el element under test, to confirm protocol * @param attr attribute under test * @return true if allowed */ protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { TagName tag = TagName.valueOf(tagName); AttributeKey key = AttributeKey.valueOf(attr.getKey()); Set<AttributeKey> okSet = attributes.get(tag); if (okSet != null && okSet.contains(key)) { if (protocols.containsKey(tag)) { Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); // ok if not defined protocol; otherwise test return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key)); } else { // attribute found, no protocols defined, so OK return true; } } // might be an enforced attribute? Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag); if (enforcedSet != null) { Attributes expect = getEnforcedAttributes(tagName); String attrKey = attr.getKey(); if (expect.hasKeyIgnoreCase(attrKey)) { return expect.getIgnoreCase(attrKey).equals(attr.getValue()); } } // no attributes defined for tag, try :all tag return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); }
Example 29
Source Project: astor Source File: Whitelist.java License: GNU General Public License v2.0 | 5 votes |
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) { // try to resolve relative urls to abs, and optionally update the attribute so output html has abs. // rels without a baseuri get removed String value = el.absUrl(attr.getKey()); if (value.length() == 0) value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols if (!preserveRelativeLinks) attr.setValue(value); for (Protocol protocol : protocols) { String prot = protocol.toString(); if (prot.equals("#")) { // allows anchor links if (isValidAnchor(value)) { return true; } else { continue; } } prot += ":"; if (lowerCase(value).startsWith(prot)) { return true; } } return false; }
Example 30
Source Project: astor Source File: W3CDom.java License: GNU General Public License v2.0 | 5 votes |
private void copyAttributes(org.jsoup.nodes.Node source, Element el) { for (Attribute attribute : source.attributes()) { // valid xml attribute names are: ^[a-zA-Z_:][-a-zA-Z0-9_:.] String key = attribute.getKey().replaceAll("[^-a-zA-Z0-9_:.]", ""); if (key.matches("[a-zA-Z_:][-a-zA-Z0-9_:.]*")) el.setAttribute(key, attribute.getValue()); } }