Java Code Examples for org.jsoup.nodes.Attributes

The following examples show how to use org.jsoup.nodes.Attributes. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: LicenseScout   Source File: HtmlExporterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void assertResultContent(TestVariant testVariant, final String resultContent) {
    final Document doc = Jsoup.parse(resultContent);
    final Elements metaElements = doc.getElementsByTag("meta");
    final Iterator<Element> iter = metaElements.iterator();
    while (iter.hasNext()) {
        final Element element = iter.next();
        final Attributes attributes = element.attributes();
        if (attributes.get("http-equiv") != null) {
            final String contentAttribute = attributes.get("content");
            final String expected = "charset=" + testVariant.getOutputCharset().name();
            Assert.assertTrue("Encoding", contentAttribute.endsWith(expected));
        }
    }
    Assert.assertNotNull("Detection statistics table present", doc.getElementById("detection_statistics_table"));
    Assert.assertNotNull("Legal statistics table present", doc.getElementById("legal_statistics_table"));
    Assert.assertNotNull("Genral statistics table present", doc.getElementById("general_statistics_table"));
    Assert.assertNotNull("Main table present", doc.getElementById("license_table"));
}
 
Example 2
Source Project: flow   Source File: Html.java    License: Apache License 2.0 6 votes vote down vote up
private void setOuterHtml(String outerHtml) {
    Document doc = Jsoup.parseBodyFragment(outerHtml);
    int nrChildren = doc.body().children().size();
    if (nrChildren != 1) {
        String message = "HTML must contain exactly one top level element (ignoring text nodes). Found "
                + nrChildren;
        if (nrChildren > 1) {
            String tagNames = doc.body().children().stream()
                    .map(org.jsoup.nodes.Element::tagName)
                    .collect(Collectors.joining(", "));
            message += " elements with the tag names " + tagNames;
        }
        throw new IllegalArgumentException(message);
    }

    org.jsoup.nodes.Element root = doc.body().child(0);
    Attributes attrs = root.attributes();

    Component.setElement(this, new Element(root.tagName()));
    attrs.forEach(this::setAttribute);

    doc.outputSettings().prettyPrint(false);
    setInnerHtml(root.html());

}
 
Example 3
Source Project: astor   Source File: W3CDom.java    License: GNU General Public License v2.0 6 votes vote down vote up
/**
 * Finds any namespaces defined in this element. Returns any tag prefix.
 */
private String updateNamespaces(org.jsoup.nodes.Element el) {
    // scan the element for namespace declarations
    // like: xmlns="blah" or xmlns:prefix="blah"
    Attributes attributes = el.attributes();
    for (Attribute attr : attributes) {
        String key = attr.getKey();
        String prefix;
        if (key.equals(xmlnsKey)) {
            prefix = "";
        } else if (key.startsWith(xmlnsPrefix)) {
            prefix = key.substring(xmlnsPrefix.length());
        } else {
            continue;
        }
        namespaces.put(prefix, attr.getValue());
    }

    // get the element prefix if any
    int pos = el.tagName().indexOf(":");
    return pos > 0 ? el.tagName().substring(0, pos) : "";
}
 
Example 4
Source Project: astor   Source File: Cleaner.java    License: GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example 5
Source Project: astor   Source File: W3CDom.java    License: GNU General Public License v2.0 6 votes vote down vote up
/**
 * Finds any namespaces defined in this element. Returns any tag prefix.
 */
private String updateNamespaces(org.jsoup.nodes.Element el) {
    // scan the element for namespace declarations
    // like: xmlns="blah" or xmlns:prefix="blah"
    Attributes attributes = el.attributes();
    for (Attribute attr : attributes) {
        String key = attr.getKey();
        String prefix;
        if (key.equals(xmlnsKey)) {
            prefix = "";
        } else if (key.startsWith(xmlnsPrefix)) {
            prefix = key.substring(xmlnsPrefix.length());
        } else {
            continue;
        }
        namespaces.put(prefix, attr.getValue());
    }

    // get the element prefix if any
    int pos = el.tagName().indexOf(":");
    return pos > 0 ? el.tagName().substring(0, pos) : "";
}
 
Example 6
Source Project: astor   Source File: Cleaner.java    License: GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example 7
Source Project: astor   Source File: W3CDom.java    License: GNU General Public License v2.0 6 votes vote down vote up
/**
 * Finds any namespaces defined in this element. Returns any tag prefix.
 */
private String updateNamespaces(org.jsoup.nodes.Element el) {
    // scan the element for namespace declarations
    // like: xmlns="blah" or xmlns:prefix="blah"
    Attributes attributes = el.attributes();
    for (Attribute attr : attributes) {
        String key = attr.getKey();
        String prefix;
        if (key.equals(xmlnsKey)) {
            prefix = "";
        } else if (key.startsWith(xmlnsPrefix)) {
            prefix = key.substring(xmlnsPrefix.length());
        } else {
            continue;
        }
        namespaces.put(prefix, attr.getValue());
    }

    // get the element prefix if any
    int pos = el.tagName().indexOf(":");
    return pos > 0 ? el.tagName().substring(0, pos) : "";
}
 
Example 8
Source Project: metafacture-core   Source File: HtmlDecoder.java    License: Apache License 2.0 6 votes vote down vote up
private void process(Element parent, StreamReceiver receiver) {
    for (Element element : parent.children()) {
        receiver.startEntity(element.nodeName());
        Attributes attributes = element.attributes();
        for (Attribute attribute : attributes) {
            receiver.literal(attribute.getKey(), attribute.getValue());
        }
        if (element.children().isEmpty()) {
            String text = element.text().trim();
            String value = text.isEmpty() ? element.data() : text;
            if (!value.isEmpty()) {
                receiver.literal("value", value);
            }
        }
        process(element, receiver);
        receiver.endEntity();
    }
}
 
Example 9
/**
 * Traverses the element tree expanding all color codes.
 * 
 * @param element root element of (sub-)tree to traverse
 */
private static void traverse(Element element) {
	if(isStyleTag(element)) {
		expandColorCodeInStyleTag(element);
	}
	
	Attributes attributes = element.attributes();
	
	attributes.asList().stream().filter(a -> isStyleAttribute(a)).forEach(a -> expandColorCodeInAttribute(a));

	element.children().forEach(child -> traverse(child));
}
 
Example 10
Source Project: calendar-component   Source File: Calendar.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void readDesign(Element design, DesignContext designContext) {
    super.readDesign(design, designContext);

    Attributes attr = design.attributes();

    if (design.hasAttr("time-zone")) {
        setZoneId(ZoneId.of(DesignAttributeHandler.readAttribute("end-date", attr, String.class)));
    }

    if (design.hasAttr("time-format")) {
        setTimeFormat(TimeFormat.valueOf(
                "Format" + design.attr("time-format").toUpperCase()));
    }

    if (design.hasAttr("start-date")) {
        setStartDate(
                ZonedDateTime.ofInstant(DesignAttributeHandler.readAttribute("start-date", attr, Date.class)
                        .toInstant(), getZoneId()));
    }

    if (design.hasAttr("end-date")) {
        setEndDate(
                ZonedDateTime.ofInstant(DesignAttributeHandler.readAttribute("end-date", attr, Date.class)
                        .toInstant(), getZoneId()));
    }
}
 
Example 11
public AttributesAdaptor(Attributes attributes, Element element) {
    this.attributes = attributes;
    this.element = element;
    attrList = new ArrayList<Attr>();
    for (Attribute attribute : attributes) {
        attrList.add(new AttributeAdaptor(attribute,element));
    }
}
 
Example 12
Source Project: baleen   Source File: TemplateRecordDefinitionAnnotator.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Add the attributes to the given record definition
 *
 * <p>Uses Jsoup to parse the tag as if html
 *
 * @param recordDefinition the record definition
 * @param beginText the begin tag of the record definition
 */
private void addAttributes(TemplateRecordDefinition recordDefinition, String beginText) {

  Document doc = Jsoup.parseBodyFragment(beginText);
  Element fieldElement = doc.body().child(0);
  Attributes attributes = fieldElement.attributes();
  if (attributes.hasKey(REPEAT_ATTRIBUTE)) {
    String required = attributes.get(REPEAT_ATTRIBUTE);
    recordDefinition.setRepeat(
        Strings.isNullOrEmpty(required) ? true : Boolean.valueOf(required));
  }
}
 
Example 13
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 5 votes vote down vote up
Attributes getEnforcedAttributes(String tagName) {
    Attributes attrs = new Attributes();
    TagName tag = TagName.valueOf(tagName);
    if (enforcedAttributes.containsKey(tag)) {
        Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag);
        for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) {
            attrs.put(entry.getKey().toString(), entry.getValue().toString());
        }
    }
    return attrs;
}
 
Example 14
Source Project: astor   Source File: TreeBuilder.java    License: GNU General Public License v2.0 5 votes vote down vote up
public boolean processStartTag(String name, Attributes attrs) {
    if (currentToken == start) { // don't recycle an in-use token
        return process(new Token.StartTag().nameAttr(name, attrs));
    }
    start.reset();
    start.nameAttr(name, attrs);
    return process(start);
}
 
Example 15
Source Project: astor   Source File: Token.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Override
Tag reset() {
    super.reset();
    attributes = new Attributes();
    // todo - would prefer these to be null, but need to check Element assertions
    return this;
}
 
Example 16
Source Project: astor   Source File: AttributeParseTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void parsesRoughAttributeString() {
    String html = "<a id=\"123\" class=\"baz = 'bar'\" style = 'border: 2px'qux zim foo = 12 mux=18 />";
    // should be: <id=123>, <class=baz = 'bar'>, <qux=>, <zim=>, <foo=12>, <mux.=18>

    Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
    Attributes attr = el.attributes();
    assertEquals(7, attr.size());
    assertEquals("123", attr.get("id"));
    assertEquals("baz = 'bar'", attr.get("class"));
    assertEquals("border: 2px", attr.get("style"));
    assertEquals("", attr.get("qux"));
    assertEquals("", attr.get("zim"));
    assertEquals("12", attr.get("foo"));
    assertEquals("18", attr.get("mux"));
}
 
Example 17
Source Project: astor   Source File: AttributeParseTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void canStartWithEq() {
    String html = "<a =empty />";
    Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
    Attributes attr = el.attributes();
    assertEquals(1, attr.size());
    assertTrue(attr.hasKey("=empty"));
    assertEquals("", attr.get("=empty"));
}
 
Example 18
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 5 votes vote down vote up
/**
 * Test if the supplied attribute is allowed by this whitelist for this tag
 * @param tagName tag to consider allowing the attribute in
 * @param el element under test, to confirm protocol
 * @param attr attribute under test
 * @return true if allowed
 */
protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
    TagName tag = TagName.valueOf(tagName);
    AttributeKey key = AttributeKey.valueOf(attr.getKey());

    Set<AttributeKey> okSet = attributes.get(tag);
    if (okSet != null && okSet.contains(key)) {
        if (protocols.containsKey(tag)) {
            Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag);
            // ok if not defined protocol; otherwise test
            return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key));
        } else { // attribute found, no protocols defined, so OK
            return true;
        }
    }
    // might be an enforced attribute?
    Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag);
    if (enforcedSet != null) {
        Attributes expect = getEnforcedAttributes(tagName);
        String attrKey = attr.getKey();
        if (expect.hasKeyIgnoreCase(attrKey)) {
            return expect.getIgnoreCase(attrKey).equals(attr.getValue());
        }
    }
    // no attributes defined for tag, try :all tag
    return !tagName.equals(":all") && isSafeAttribute(":all", el, attr);
}
 
Example 19
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 5 votes vote down vote up
Attributes getEnforcedAttributes(String tagName) {
    Attributes attrs = new Attributes();
    TagName tag = TagName.valueOf(tagName);
    if (enforcedAttributes.containsKey(tag)) {
        Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag);
        for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) {
            attrs.put(entry.getKey().toString(), entry.getValue().toString());
        }
    }
    return attrs;
}
 
Example 20
Source Project: astor   Source File: TreeBuilder.java    License: GNU General Public License v2.0 5 votes vote down vote up
public boolean processStartTag(String name, Attributes attrs) {
    if (currentToken == start) { // don't recycle an in-use token
        return process(new Token.StartTag().nameAttr(name, attrs));
    }
    start.reset();
    start.nameAttr(name, attrs);
    return process(start);
}
 
Example 21
Source Project: astor   Source File: Token.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Override
Tag reset() {
    super.reset();
    attributes = new Attributes();
    // todo - would prefer these to be null, but need to check Element assertions
    return this;
}
 
Example 22
Source Project: astor   Source File: AttributeParseTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void parsesRoughAttributeString() {
    String html = "<a id=\"123\" class=\"baz = 'bar'\" style = 'border: 2px'qux zim foo = 12 mux=18 />";
    // should be: <id=123>, <class=baz = 'bar'>, <qux=>, <zim=>, <foo=12>, <mux.=18>

    Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
    Attributes attr = el.attributes();
    assertEquals(7, attr.size());
    assertEquals("123", attr.get("id"));
    assertEquals("baz = 'bar'", attr.get("class"));
    assertEquals("border: 2px", attr.get("style"));
    assertEquals("", attr.get("qux"));
    assertEquals("", attr.get("zim"));
    assertEquals("12", attr.get("foo"));
    assertEquals("18", attr.get("mux"));
}
 
Example 23
Source Project: astor   Source File: AttributeParseTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void canStartWithEq() {
    String html = "<a =empty />";
    Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
    Attributes attr = el.attributes();
    assertEquals(1, attr.size());
    assertTrue(attr.hasKey("=empty"));
    assertEquals("", attr.get("=empty"));
}
 
Example 24
Source Project: astor   Source File: ParserSettingsTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test @MultiLocaleTest public void attributesCaseNormalization() throws Exception {
    ParseSettings parseSettings = new ParseSettings(false, false);
    Attributes attributes = new Attributes();
    attributes.put("ITEM", "1");

    Attributes normalizedAttributes = parseSettings.normalizeAttributes(attributes);

    assertEquals("item", normalizedAttributes.asList().get(0).getKey());
}
 
Example 25
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 5 votes vote down vote up
/**
 * Test if the supplied attribute is allowed by this whitelist for this tag
 * @param tagName tag to consider allowing the attribute in
 * @param el element under test, to confirm protocol
 * @param attr attribute under test
 * @return true if allowed
 */
protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
    TagName tag = TagName.valueOf(tagName);
    AttributeKey key = AttributeKey.valueOf(attr.getKey());

    Set<AttributeKey> okSet = attributes.get(tag);
    if (okSet != null && okSet.contains(key)) {
        if (protocols.containsKey(tag)) {
            Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag);
            // ok if not defined protocol; otherwise test
            return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key));
        } else { // attribute found, no protocols defined, so OK
            return true;
        }
    }
    // might be an enforced attribute?
    Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag);
    if (enforcedSet != null) {
        Attributes expect = getEnforcedAttributes(tagName);
        String attrKey = attr.getKey();
        if (expect.hasKeyIgnoreCase(attrKey)) {
            return expect.getIgnoreCase(attrKey).equals(attr.getValue());
        }
    }
    // no attributes defined for tag, try :all tag
    return !tagName.equals(":all") && isSafeAttribute(":all", el, attr);
}
 
Example 26
Source Project: astor   Source File: Whitelist.java    License: GNU General Public License v2.0 5 votes vote down vote up
Attributes getEnforcedAttributes(String tagName) {
    Attributes attrs = new Attributes();
    TagName tag = TagName.valueOf(tagName);
    if (enforcedAttributes.containsKey(tag)) {
        Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag);
        for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) {
            attrs.put(entry.getKey().toString(), entry.getValue().toString());
        }
    }
    return attrs;
}
 
Example 27
Source Project: astor   Source File: TreeBuilder.java    License: GNU General Public License v2.0 5 votes vote down vote up
public boolean processStartTag(String name, Attributes attrs) {
    if (currentToken == start) { // don't recycle an in-use token
        return process(new Token.StartTag().nameAttr(name, attrs));
    }
    start.reset();
    start.nameAttr(name, attrs);
    return process(start);
}
 
Example 28
Source Project: astor   Source File: Token.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Override
Tag reset() {
    super.reset();
    attributes = new Attributes();
    // todo - would prefer these to be null, but need to check Element assertions
    return this;
}
 
Example 29
Source Project: astor   Source File: AttributeParseTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void parsesRoughAttributeString() {
    String html = "<a id=\"123\" class=\"baz = 'bar'\" style = 'border: 2px'qux zim foo = 12 mux=18 />";
    // should be: <id=123>, <class=baz = 'bar'>, <qux=>, <zim=>, <foo=12>, <mux.=18>

    Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
    Attributes attr = el.attributes();
    assertEquals(7, attr.size());
    assertEquals("123", attr.get("id"));
    assertEquals("baz = 'bar'", attr.get("class"));
    assertEquals("border: 2px", attr.get("style"));
    assertEquals("", attr.get("qux"));
    assertEquals("", attr.get("zim"));
    assertEquals("12", attr.get("foo"));
    assertEquals("18", attr.get("mux"));
}
 
Example 30
Source Project: astor   Source File: AttributeParseTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void canStartWithEq() {
    String html = "<a =empty />";
    Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
    Attributes attr = el.attributes();
    assertEquals(1, attr.size());
    assertTrue(attr.hasKey("=empty"));
    assertEquals("", attr.get("=empty"));
}