Java Code Examples for org.jsoup.nodes.Attribute

The following examples show how to use org.jsoup.nodes.Attribute. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: jpress   Source File: JsoupUtils.java    License: GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {

    //不允许 javascript 开头的 src 和 href
    if ("src".equalsIgnoreCase(attr.getKey()) || "href".equalsIgnoreCase(attr.getKey())) {
        String value = attr.getValue();
        if (StrUtil.isNotBlank(value) && value.toLowerCase().startsWith("javascript")) {
            return false;
        }
    }


    //允许 base64 的图片内容
    if ("img".equals(tagName) && "src".equals(attr.getKey()) && attr.getValue().startsWith("data:;base64")){
        return true;
    }

    return super.isSafeAttribute(tagName, el, attr);
}
 
Example 2
Source Project: Android-WYSIWYG-Editor   Source File: MacroExtensions.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Node buildNodeFromHTML(Element element) {
    String tag = element.tagName().toLowerCase();
    Node node = getNodeInstance(EditorType.macro);
    node.content.add(tag);

    List<Attribute> attrs = element.attributes().asList();

    if (!attrs.isEmpty()) {
        node.macroSettings = new HashMap<>();
        for (Attribute attr : attrs) {
            node.macroSettings.put(attr.getKey(), attr.getValue());
        }
    }
    int index = editorCore.getChildCount();
    View view = editorCore.getEditorListener().onRenderMacro(tag, node.macroSettings, editorCore.getChildCount());

    if(view == null)
        view = getEmptyMacro(node.content.get(0), node.macroSettings);

    insertMacro(tag, view, node.macroSettings, index);
    return null;
}
 
Example 3
/**
 * Remove the comments of the page 
 * 
 * @param node 
 */
private void removeMalformedAttributes(Node node) {
    // as we are removing child nodes while iterating, we cannot use a normal foreach over children,
    // or will get a concurrent list modification error.
    int i = 0;
    while (i < node.childNodes().size()) {
        Node child = node.childNode(i);
        for (Attribute attr : child.attributes()) {
            if (attr.getKey().startsWith("\"") && attr.getKey().endsWith("\"")) {
                child.removeAttr(attr.getKey());
            }
        }
        removeMalformedAttributes(child);
        i++;
    }
}
 
Example 4
/**
 *
 * @param element
 * @return wheter either one attribute of the current element, either its
 * text, either one attribute of one of its parent or the text of one of
 * its parents contains the "captcha" keyword
 */
private boolean parseAttributeToExtractCaptcha(Element element) {
    if (element.nodeName().equalsIgnoreCase(HTML_ELEMENT) || 
            element.nodeName().equalsIgnoreCase(BODY_ELEMENT)) {
        return false;
    }
    if (StringUtils.containsIgnoreCase(element.ownText(), CAPTCHA_KEY)) {
        return true;
    } else {
        for (Attribute attr : element.attributes()) {
            if (StringUtils.containsIgnoreCase(attr.getValue(), CAPTCHA_KEY)) {
                return true;
            }
        }
    }
    return false;
}
 
Example 5
Source Project: flow   Source File: WebComponentBootstrapHandler.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a javascript which copies attributes from the {@code element} to
 * the created DOM element identified by {@code elementRef}. If {@code
 * element} contains a {@code src} attribute, its path is prefixed with
 * {@code basePath}.
 *
 * @param writer
 *            response writer
 * @param elementRef
 *            variable name of the element in javascript
 * @param element
 *            jsoup element from which to copy the attributes
 * @param basePath
 *            base path of {@code src} attributes (service url's path)
 * @throws IOException
 *             if {@code writer} is unable to write
 */
private void transferAttribute(Writer writer, String elementRef,
        Element element, String basePath) throws IOException {
    for (Attribute attribute : element.attributes()) {
        writer.append(elementRef).append(".setAttribute('")
                .append(attribute.getKey()).append("',");
        if (attribute.getValue() == null) {
            writer.append("''");
        } else {
            String path = attribute.getValue();
            if ("src".equals(attribute.getKey())) {
                path = modifyPath(basePath, path);
            }
            writer.append("'").append(path).append("'");
        }
        writer.append(");");
    }
}
 
Example 6
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 6 votes vote down vote up
/**
 * Test if the supplied attribute is allowed by this whitelist for this tag
 * @param tagName tag to consider allowing the attribute in
 * @param el element under test, to confirm protocol
 * @param attr attribute under test
 * @return true if allowed
 */
protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
    TagName tag = TagName.valueOf(tagName);
    AttributeKey key = AttributeKey.valueOf(attr.getKey());

    if (attributes.containsKey(tag)) {
        if (attributes.get(tag).contains(key)) {
            if (protocols.containsKey(tag)) {
                Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag);
                // ok if not defined protocol; otherwise test
                return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key));
            } else { // attribute found, no protocols defined, so OK
                return true;
            }
        }
    }
    // no attributes defined for tag, try :all tag
    return !tagName.equals(":all") && isSafeAttribute(":all", el, attr);
}
 
Example 7
Source Project: astor   Source File: W3CDom.java    License: GNU General Public License v2.0 6 votes vote down vote up
/**
 * Finds any namespaces defined in this element. Returns any tag prefix.
 */
private String updateNamespaces(org.jsoup.nodes.Element el) {
    // scan the element for namespace declarations
    // like: xmlns="blah" or xmlns:prefix="blah"
    Attributes attributes = el.attributes();
    for (Attribute attr : attributes) {
        String key = attr.getKey();
        String prefix;
        if (key.equals(xmlnsKey)) {
            prefix = "";
        } else if (key.startsWith(xmlnsPrefix)) {
            prefix = key.substring(xmlnsPrefix.length());
        } else {
            continue;
        }
        namespaces.put(prefix, attr.getValue());
    }

    // get the element prefix if any
    int pos = el.tagName().indexOf(":");
    return pos > 0 ? el.tagName().substring(0, pos) : "";
}
 
Example 8
Source Project: astor   Source File: AttributeParseTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void parsesBooleanAttributes() {
      String html = "<a normal=\"123\" boolean empty=\"\"></a>";
      Element el = Jsoup.parse(html).select("a").first();
      
      assertEquals("123", el.attr("normal"));
      assertEquals("", el.attr("boolean"));
      assertEquals("", el.attr("empty"));
      
      List<Attribute> attributes = el.attributes().asList();
      assertEquals("There should be 3 attribute present", 3, attributes.size());
      
      // Assuming the list order always follows the parsed html
assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute);        
assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute);        
assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute);        
      
      assertEquals(html, el.outerHtml());
  }
 
Example 9
Source Project: astor   Source File: Cleaner.java    License: GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example 10
Source Project: astor   Source File: W3CDom.java    License: GNU General Public License v2.0 6 votes vote down vote up
/**
 * Finds any namespaces defined in this element. Returns any tag prefix.
 */
private String updateNamespaces(org.jsoup.nodes.Element el) {
    // scan the element for namespace declarations
    // like: xmlns="blah" or xmlns:prefix="blah"
    Attributes attributes = el.attributes();
    for (Attribute attr : attributes) {
        String key = attr.getKey();
        String prefix;
        if (key.equals(xmlnsKey)) {
            prefix = "";
        } else if (key.startsWith(xmlnsPrefix)) {
            prefix = key.substring(xmlnsPrefix.length());
        } else {
            continue;
        }
        namespaces.put(prefix, attr.getValue());
    }

    // get the element prefix if any
    int pos = el.tagName().indexOf(":");
    return pos > 0 ? el.tagName().substring(0, pos) : "";
}
 
Example 11
Source Project: astor   Source File: AttributeParseTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void parsesBooleanAttributes() {
      String html = "<a normal=\"123\" boolean empty=\"\"></a>";
      Element el = Jsoup.parse(html).select("a").first();
      
      assertEquals("123", el.attr("normal"));
      assertEquals("", el.attr("boolean"));
      assertEquals("", el.attr("empty"));
      
      List<Attribute> attributes = el.attributes().asList();
      assertEquals("There should be 3 attribute present", 3, attributes.size());
      
      // Assuming the list order always follows the parsed html
assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute);        
assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute);        
assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute);        
      
      assertEquals(html, el.outerHtml());
  }
 
Example 12
Source Project: astor   Source File: Cleaner.java    License: GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example 13
Source Project: astor   Source File: W3CDom.java    License: GNU General Public License v2.0 6 votes vote down vote up
/**
 * Finds any namespaces defined in this element. Returns any tag prefix.
 */
private String updateNamespaces(org.jsoup.nodes.Element el) {
    // scan the element for namespace declarations
    // like: xmlns="blah" or xmlns:prefix="blah"
    Attributes attributes = el.attributes();
    for (Attribute attr : attributes) {
        String key = attr.getKey();
        String prefix;
        if (key.equals(xmlnsKey)) {
            prefix = "";
        } else if (key.startsWith(xmlnsPrefix)) {
            prefix = key.substring(xmlnsPrefix.length());
        } else {
            continue;
        }
        namespaces.put(prefix, attr.getValue());
    }

    // get the element prefix if any
    int pos = el.tagName().indexOf(":");
    return pos > 0 ? el.tagName().substring(0, pos) : "";
}
 
Example 14
Source Project: astor   Source File: AttributeParseTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void parsesBooleanAttributes() {
      String html = "<a normal=\"123\" boolean empty=\"\"></a>";
      Element el = Jsoup.parse(html).select("a").first();
      
      assertEquals("123", el.attr("normal"));
      assertEquals("", el.attr("boolean"));
      assertEquals("", el.attr("empty"));
      
      List<Attribute> attributes = el.attributes().asList();
      assertEquals("There should be 3 attribute present", 3, attributes.size());
      
      // Assuming the list order always follows the parsed html
assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute);        
assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute);        
assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute);        
      
      assertEquals(html, el.outerHtml());
  }
 
Example 15
Source Project: jsoup-learning   Source File: Whitelist.java    License: MIT License 6 votes vote down vote up
/**
 * Test if the supplied attribute is allowed by this whitelist for this tag
 * @param tagName tag to consider allowing the attribute in
 * @param el element under test, to confirm protocol
 * @param attr attribute under test
 * @return true if allowed
 */
boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
    TagName tag = TagName.valueOf(tagName);
    AttributeKey key = AttributeKey.valueOf(attr.getKey());

    if (attributes.containsKey(tag)) {
        if (attributes.get(tag).contains(key)) {
            if (protocols.containsKey(tag)) {
                Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag);
                // ok if not defined protocol; otherwise test
                return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key));
            } else { // attribute found, no protocols defined, so OK
                return true;
            }
        }
    }
    // no attributes defined for tag, try :all tag
    return !tagName.equals(":all") && isSafeAttribute(":all", el, attr);
}
 
Example 16
Source Project: jsoup-learning   Source File: Whitelist.java    License: MIT License 6 votes vote down vote up
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) {
    // try to resolve relative urls to abs, and optionally update the attribute so output html has abs.
    // rels without a baseuri get removed
    String value = el.absUrl(attr.getKey());
    if (value.length() == 0)
        value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols
    if (!preserveRelativeLinks)
        attr.setValue(value);
    
    for (Protocol protocol : protocols) {
        String prot = protocol.toString() + ":";
        if (value.toLowerCase().startsWith(prot)) {
            return true;
        }
    }
    return false;
}
 
Example 17
Source Project: metafacture-core   Source File: HtmlDecoder.java    License: Apache License 2.0 6 votes vote down vote up
private void process(Element parent, StreamReceiver receiver) {
    for (Element element : parent.children()) {
        receiver.startEntity(element.nodeName());
        Attributes attributes = element.attributes();
        for (Attribute attribute : attributes) {
            receiver.literal(attribute.getKey(), attribute.getValue());
        }
        if (element.children().isEmpty()) {
            String text = element.text().trim();
            String value = text.isEmpty() ? element.data() : text;
            if (!value.isEmpty()) {
                receiver.literal("value", value);
            }
        }
        process(element, receiver);
        receiver.endEntity();
    }
}
 
Example 18
Source Project: jstarcraft-core   Source File: HtmlNavigator.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean isAttribute(Object object) {
    if (object instanceof Attribute) {
        return true;
    } else {
        return false;
    }
}
 
Example 19
Source Project: jstarcraft-core   Source File: HtmlNavigator.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Iterator<Attribute> getAttributeAxisIterator(Object contextNode, String localName, String namespacePrefix, String namespaceURI) {
    Element node = ((Element) contextNode);
    LinkedList<Attribute> attributes = new LinkedList<>();
    for (Attribute attribute : node.attributes()) {
        if (localName.equals(attribute.getKey())) {
            attributes.add(attribute);
        }
    }
    return attributes.iterator();
}
 
Example 20
@Override
public Attr getAttributeNode(String name) {
    if (element.attr(name) == null) {
        return null;
    }
    return NodeAdaptors.getAttr(new Attribute(name, element.attr(name)), element);
}
 
Example 21
public AttributesAdaptor(Attributes attributes, Element element) {
    this.attributes = attributes;
    this.element = element;
    attrList = new ArrayList<Attr>();
    for (Attribute attribute : attributes) {
        attrList.add(new AttributeAdaptor(attribute,element));
    }
}
 
Example 22
Source Project: flow   Source File: Html.java    License: Apache License 2.0 5 votes vote down vote up
private void setAttribute(Attribute attribute) {
    String name = attribute.getKey();
    String value = attribute.getValue();
    if (value == null) {
        value = "";
    }
    getElement().setAttribute(name, value);
}
 
Example 23
Source Project: compiler   Source File: HtmlVisitor.java    License: Apache License 2.0 5 votes vote down vote up
public void visit(DocumentType node) {
	Ast.Element.Builder b = Ast.Element.newBuilder();
	b.setKind(ElementKind.DOC_TYPE);
	b.setTag("!DOCTYPE");
	for (Attribute n : node.attributes()) {
		visit(n);
		b.addAttributes(attributes.pop());
	}
	elements.peek().add(b.build());
}
 
Example 24
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 5 votes vote down vote up
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) {
    // try to resolve relative urls to abs, and optionally update the attribute so output html has abs.
    // rels without a baseuri get removed
    String value = el.absUrl(attr.getKey());
    if (value.length() == 0)
        value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols
    if (!preserveRelativeLinks)
        attr.setValue(value);
    
    for (Protocol protocol : protocols) {
        String prot = protocol.toString();

        if (prot.equals("#")) { // allows anchor links
            if (isValidAnchor(value)) {
                return true;
            } else {
                continue;
            }
        }

        prot += ":";

        if (value.toLowerCase().startsWith(prot)) {
            return true;
        }
    }
    return false;
}
 
Example 25
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 5 votes vote down vote up
/**
 * Test if the supplied attribute is allowed by this whitelist for this tag
 * @param tagName tag to consider allowing the attribute in
 * @param el element under test, to confirm protocol
 * @param attr attribute under test
 * @return true if allowed
 */
protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
    TagName tag = TagName.valueOf(tagName);
    AttributeKey key = AttributeKey.valueOf(attr.getKey());

    Set<AttributeKey> okSet = attributes.get(tag);
    if (okSet != null && okSet.contains(key)) {
        if (protocols.containsKey(tag)) {
            Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag);
            // ok if not defined protocol; otherwise test
            return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key));
        } else { // attribute found, no protocols defined, so OK
            return true;
        }
    }
    // might be an enforced attribute?
    Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag);
    if (enforcedSet != null) {
        Attributes expect = getEnforcedAttributes(tagName);
        String attrKey = attr.getKey();
        if (expect.hasKeyIgnoreCase(attrKey)) {
            return expect.getIgnoreCase(attrKey).equals(attr.getValue());
        }
    }
    // no attributes defined for tag, try :all tag
    return !tagName.equals(":all") && isSafeAttribute(":all", el, attr);
}
 
Example 26
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 5 votes vote down vote up
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) {
    // try to resolve relative urls to abs, and optionally update the attribute so output html has abs.
    // rels without a baseuri get removed
    String value = el.absUrl(attr.getKey());
    if (value.length() == 0)
        value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols
    if (!preserveRelativeLinks)
        attr.setValue(value);
    
    for (Protocol protocol : protocols) {
        String prot = protocol.toString();

        if (prot.equals("#")) { // allows anchor links
            if (isValidAnchor(value)) {
                return true;
            } else {
                continue;
            }
        }

        prot += ":";

        if (lowerCase(value).startsWith(prot)) {
            return true;
        }
    }
    return false;
}
 
Example 27
Source Project: astor   Source File: W3CDom.java    License: GNU General Public License v2.0 5 votes vote down vote up
private void copyAttributes(org.jsoup.nodes.Node source, Element el) {
    for (Attribute attribute : source.attributes()) {
        // valid xml attribute names are: ^[a-zA-Z_:][-a-zA-Z0-9_:.]
        String key = attribute.getKey().replaceAll("[^-a-zA-Z0-9_:.]", "");
        if (key.matches("[a-zA-Z_:][-a-zA-Z0-9_:.]*"))
            el.setAttribute(key, attribute.getValue());
    }
}
 
Example 28
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 5 votes vote down vote up
/**
 * Test if the supplied attribute is allowed by this whitelist for this tag
 * @param tagName tag to consider allowing the attribute in
 * @param el element under test, to confirm protocol
 * @param attr attribute under test
 * @return true if allowed
 */
protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
    TagName tag = TagName.valueOf(tagName);
    AttributeKey key = AttributeKey.valueOf(attr.getKey());

    Set<AttributeKey> okSet = attributes.get(tag);
    if (okSet != null && okSet.contains(key)) {
        if (protocols.containsKey(tag)) {
            Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag);
            // ok if not defined protocol; otherwise test
            return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key));
        } else { // attribute found, no protocols defined, so OK
            return true;
        }
    }
    // might be an enforced attribute?
    Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag);
    if (enforcedSet != null) {
        Attributes expect = getEnforcedAttributes(tagName);
        String attrKey = attr.getKey();
        if (expect.hasKeyIgnoreCase(attrKey)) {
            return expect.getIgnoreCase(attrKey).equals(attr.getValue());
        }
    }
    // no attributes defined for tag, try :all tag
    return !tagName.equals(":all") && isSafeAttribute(":all", el, attr);
}
 
Example 29
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 5 votes vote down vote up
private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) {
    // try to resolve relative urls to abs, and optionally update the attribute so output html has abs.
    // rels without a baseuri get removed
    String value = el.absUrl(attr.getKey());
    if (value.length() == 0)
        value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols
    if (!preserveRelativeLinks)
        attr.setValue(value);
    
    for (Protocol protocol : protocols) {
        String prot = protocol.toString();

        if (prot.equals("#")) { // allows anchor links
            if (isValidAnchor(value)) {
                return true;
            } else {
                continue;
            }
        }

        prot += ":";

        if (lowerCase(value).startsWith(prot)) {
            return true;
        }
    }
    return false;
}
 
Example 30
Source Project: astor   Source File: W3CDom.java    License: GNU General Public License v2.0 5 votes vote down vote up
private void copyAttributes(org.jsoup.nodes.Node source, Element el) {
    for (Attribute attribute : source.attributes()) {
        // valid xml attribute names are: ^[a-zA-Z_:][-a-zA-Z0-9_:.]
        String key = attribute.getKey().replaceAll("[^-a-zA-Z0-9_:.]", "");
        if (key.matches("[a-zA-Z_:][-a-zA-Z0-9_:.]*"))
            el.setAttribute(key, attribute.getValue());
    }
}