org.jsoup.nodes.Attributes Java Examples

The following examples show how to use org.jsoup.nodes.Attributes. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HtmlDecoder.java    From metafacture-core with Apache License 2.0 6 votes vote down vote up
private void process(Element parent, StreamReceiver receiver) {
    for (Element element : parent.children()) {
        receiver.startEntity(element.nodeName());
        Attributes attributes = element.attributes();
        for (Attribute attribute : attributes) {
            receiver.literal(attribute.getKey(), attribute.getValue());
        }
        if (element.children().isEmpty()) {
            String text = element.text().trim();
            String value = text.isEmpty() ? element.data() : text;
            if (!value.isEmpty()) {
                receiver.literal("value", value);
            }
        }
        process(element, receiver);
        receiver.endEntity();
    }
}
 
Example #2
Source File: Cleaner.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example #3
Source File: W3CDom.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Finds any namespaces defined in this element. Returns any tag prefix.
 */
private String updateNamespaces(org.jsoup.nodes.Element el) {
    // scan the element for namespace declarations
    // like: xmlns="blah" or xmlns:prefix="blah"
    Attributes attributes = el.attributes();
    for (Attribute attr : attributes) {
        String key = attr.getKey();
        String prefix;
        if (key.equals(xmlnsKey)) {
            prefix = "";
        } else if (key.startsWith(xmlnsPrefix)) {
            prefix = key.substring(xmlnsPrefix.length());
        } else {
            continue;
        }
        namespaces.put(prefix, attr.getValue());
    }

    // get the element prefix if any
    int pos = el.tagName().indexOf(":");
    return pos > 0 ? el.tagName().substring(0, pos) : "";
}
 
Example #4
Source File: W3CDom.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Finds any namespaces defined in this element. Returns any tag prefix.
 */
private String updateNamespaces(org.jsoup.nodes.Element el) {
    // scan the element for namespace declarations
    // like: xmlns="blah" or xmlns:prefix="blah"
    Attributes attributes = el.attributes();
    for (Attribute attr : attributes) {
        String key = attr.getKey();
        String prefix;
        if (key.equals(xmlnsKey)) {
            prefix = "";
        } else if (key.startsWith(xmlnsPrefix)) {
            prefix = key.substring(xmlnsPrefix.length());
        } else {
            continue;
        }
        namespaces.put(prefix, attr.getValue());
    }

    // get the element prefix if any
    int pos = el.tagName().indexOf(":");
    return pos > 0 ? el.tagName().substring(0, pos) : "";
}
 
Example #5
Source File: Cleaner.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
        if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
            destAttrs.put(sourceAttr);
        else
            numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
}
 
Example #6
Source File: Html.java    From flow with Apache License 2.0 6 votes vote down vote up
private void setOuterHtml(String outerHtml) {
    Document doc = Jsoup.parseBodyFragment(outerHtml);
    int nrChildren = doc.body().children().size();
    if (nrChildren != 1) {
        String message = "HTML must contain exactly one top level element (ignoring text nodes). Found "
                + nrChildren;
        if (nrChildren > 1) {
            String tagNames = doc.body().children().stream()
                    .map(org.jsoup.nodes.Element::tagName)
                    .collect(Collectors.joining(", "));
            message += " elements with the tag names " + tagNames;
        }
        throw new IllegalArgumentException(message);
    }

    org.jsoup.nodes.Element root = doc.body().child(0);
    Attributes attrs = root.attributes();

    Component.setElement(this, new Element(root.tagName()));
    attrs.forEach(this::setAttribute);

    doc.outputSettings().prettyPrint(false);
    setInnerHtml(root.html());

}
 
Example #7
Source File: HtmlExporterTest.java    From LicenseScout with Apache License 2.0 6 votes vote down vote up
@Override
protected void assertResultContent(TestVariant testVariant, final String resultContent) {
    final Document doc = Jsoup.parse(resultContent);
    final Elements metaElements = doc.getElementsByTag("meta");
    final Iterator<Element> iter = metaElements.iterator();
    while (iter.hasNext()) {
        final Element element = iter.next();
        final Attributes attributes = element.attributes();
        if (attributes.get("http-equiv") != null) {
            final String contentAttribute = attributes.get("content");
            final String expected = "charset=" + testVariant.getOutputCharset().name();
            Assert.assertTrue("Encoding", contentAttribute.endsWith(expected));
        }
    }
    Assert.assertNotNull("Detection statistics table present", doc.getElementById("detection_statistics_table"));
    Assert.assertNotNull("Legal statistics table present", doc.getElementById("legal_statistics_table"));
    Assert.assertNotNull("Genral statistics table present", doc.getElementById("general_statistics_table"));
    Assert.assertNotNull("Main table present", doc.getElementById("license_table"));
}
 
Example #8
Source File: W3CDom.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Finds any namespaces defined in this element. Returns any tag prefix.
 */
private String updateNamespaces(org.jsoup.nodes.Element el) {
    // scan the element for namespace declarations
    // like: xmlns="blah" or xmlns:prefix="blah"
    Attributes attributes = el.attributes();
    for (Attribute attr : attributes) {
        String key = attr.getKey();
        String prefix;
        if (key.equals(xmlnsKey)) {
            prefix = "";
        } else if (key.startsWith(xmlnsPrefix)) {
            prefix = key.substring(xmlnsPrefix.length());
        } else {
            continue;
        }
        namespaces.put(prefix, attr.getValue());
    }

    // get the element prefix if any
    int pos = el.tagName().indexOf(":");
    return pos > 0 ? el.tagName().substring(0, pos) : "";
}
 
Example #9
Source File: ParserSettingsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test @MultiLocaleTest public void attributesCaseNormalization() throws Exception {
    ParseSettings parseSettings = new ParseSettings(false, false);
    Attributes attributes = new Attributes();
    attributes.put("ITEM", "1");

    Attributes normalizedAttributes = parseSettings.normalizeAttributes(attributes);

    assertEquals("item", normalizedAttributes.asList().get(0).getKey());
}
 
Example #10
Source File: TagServlet.java    From firing-range with Apache License 2.0 5 votes vote down vote up
/**
 * Handles the request filtering out unallowed tags. Note that an empty allowedTag we allow
 * all tags.
 */
private void handleRequest(
    Elements elements, HttpServletResponse response, String allowedTag, String allowedAttr)
        throws IOException {
  if (allowedTag.equalsIgnoreCase("script")) {
    elements.empty();
  }

  StringBuilder res = new StringBuilder();
  for (Element element : elements) {
    String tag = element.tagName();

    if (!allowedTag.isEmpty() && !allowedTag.equalsIgnoreCase(tag)) {
      continue;
    }

    if (!allowedAttr.isEmpty()) {
      Attributes attributes = element.attributes();
      for (Attribute attribute : attributes) {
        if (!attribute.getKey().equalsIgnoreCase(allowedAttr)) {
          Responses.sendError(response, "Invalid input attribute", 400);
          return;
        }
      }
    }
    res.append(element.toString());
  }
  Responses.sendXssed(response, res.toString());
}
 
Example #11
Source File: Token.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Override
Tag reset() {
    super.reset();
    attributes = new Attributes();
    // todo - would prefer these to be null, but need to check Element assertions
    return this;
}
 
Example #12
Source File: TreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
public boolean processStartTag(String name, Attributes attrs) {
    if (currentToken == start) { // don't recycle an in-use token
        return process(new Token.StartTag().nameAttr(name, attrs));
    }
    start.reset();
    start.nameAttr(name, attrs);
    return process(start);
}
 
Example #13
Source File: AttributeParseTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void canStartWithEq() {
    String html = "<a =empty />";
    Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
    Attributes attr = el.attributes();
    assertEquals(1, attr.size());
    assertTrue(attr.hasKey("=empty"));
    assertEquals("", attr.get("=empty"));
}
 
Example #14
Source File: Whitelist.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
Attributes getEnforcedAttributes(String tagName) {
    Attributes attrs = new Attributes();
    TagName tag = TagName.valueOf(tagName);
    if (enforcedAttributes.containsKey(tag)) {
        Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag);
        for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) {
            attrs.put(entry.getKey().toString(), entry.getValue().toString());
        }
    }
    return attrs;
}
 
Example #15
Source File: Whitelist.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Test if the supplied attribute is allowed by this whitelist for this tag
 * @param tagName tag to consider allowing the attribute in
 * @param el element under test, to confirm protocol
 * @param attr attribute under test
 * @return true if allowed
 */
protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
    TagName tag = TagName.valueOf(tagName);
    AttributeKey key = AttributeKey.valueOf(attr.getKey());

    Set<AttributeKey> okSet = attributes.get(tag);
    if (okSet != null && okSet.contains(key)) {
        if (protocols.containsKey(tag)) {
            Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag);
            // ok if not defined protocol; otherwise test
            return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key));
        } else { // attribute found, no protocols defined, so OK
            return true;
        }
    }
    // might be an enforced attribute?
    Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag);
    if (enforcedSet != null) {
        Attributes expect = getEnforcedAttributes(tagName);
        String attrKey = attr.getKey();
        if (expect.hasKeyIgnoreCase(attrKey)) {
            return expect.getIgnoreCase(attrKey).equals(attr.getValue());
        }
    }
    // no attributes defined for tag, try :all tag
    return !tagName.equals(":all") && isSafeAttribute(":all", el, attr);
}
 
Example #16
Source File: ParserSettingsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test @MultiLocaleTest public void attributesCaseNormalization() throws Exception {
    ParseSettings parseSettings = new ParseSettings(false, false);
    Attributes attributes = new Attributes();
    attributes.put("ITEM", "1");

    Attributes normalizedAttributes = parseSettings.normalizeAttributes(attributes);

    assertEquals("item", normalizedAttributes.asList().get(0).getKey());
}
 
Example #17
Source File: AttributeParseTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void canStartWithEq() {
    String html = "<a =empty />";
    Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
    Attributes attr = el.attributes();
    assertEquals(1, attr.size());
    assertTrue(attr.hasKey("=empty"));
    assertEquals("", attr.get("=empty"));
}
 
Example #18
Source File: Whitelist.java    From jsoup-learning with MIT License 5 votes vote down vote up
Attributes getEnforcedAttributes(String tagName) {
    Attributes attrs = new Attributes();
    TagName tag = TagName.valueOf(tagName);
    if (enforcedAttributes.containsKey(tag)) {
        Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag);
        for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) {
            attrs.put(entry.getKey().toString(), entry.getValue().toString());
        }
    }
    return attrs;
}
 
Example #19
Source File: AttributeParseTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void parsesRoughAttributeString() {
    String html = "<a id=\"123\" class=\"baz = 'bar'\" style = 'border: 2px'qux zim foo = 12 mux=18 />";
    // should be: <id=123>, <class=baz = 'bar'>, <qux=>, <zim=>, <foo=12>, <mux.=18>

    Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
    Attributes attr = el.attributes();
    assertEquals(7, attr.size());
    assertEquals("123", attr.get("id"));
    assertEquals("baz = 'bar'", attr.get("class"));
    assertEquals("border: 2px", attr.get("style"));
    assertEquals("", attr.get("qux"));
    assertEquals("", attr.get("zim"));
    assertEquals("12", attr.get("foo"));
    assertEquals("18", attr.get("mux"));
}
 
Example #20
Source File: Token.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Override
Tag reset() {
    super.reset();
    attributes = new Attributes();
    // todo - would prefer these to be null, but need to check Element assertions
    return this;
}
 
Example #21
Source File: TreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
public boolean processStartTag(String name, Attributes attrs) {
    if (currentToken == start) { // don't recycle an in-use token
        return process(new Token.StartTag().nameAttr(name, attrs));
    }
    start.reset();
    start.nameAttr(name, attrs);
    return process(start);
}
 
Example #22
Source File: AttributeParseTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void parsesRoughAttributeString() {
    String html = "<a id=\"123\" class=\"baz = 'bar'\" style = 'border: 2px'qux zim foo = 12 mux=18 />";
    // should be: <id=123>, <class=baz = 'bar'>, <qux=>, <zim=>, <foo=12>, <mux.=18>

    Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
    Attributes attr = el.attributes();
    assertEquals(7, attr.size());
    assertEquals("123", attr.get("id"));
    assertEquals("baz = 'bar'", attr.get("class"));
    assertEquals("border: 2px", attr.get("style"));
    assertEquals("", attr.get("qux"));
    assertEquals("", attr.get("zim"));
    assertEquals("12", attr.get("foo"));
    assertEquals("18", attr.get("mux"));
}
 
Example #23
Source File: Whitelist.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
Attributes getEnforcedAttributes(String tagName) {
    Attributes attrs = new Attributes();
    TagName tag = TagName.valueOf(tagName);
    if (enforcedAttributes.containsKey(tag)) {
        Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag);
        for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) {
            attrs.put(entry.getKey().toString(), entry.getValue().toString());
        }
    }
    return attrs;
}
 
Example #24
Source File: ColorCodeCorrector.java    From openemm with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Traverses the element tree expanding all color codes.
 * 
 * @param element root element of (sub-)tree to traverse
 */
private static void traverse(Element element) {
	if(isStyleTag(element)) {
		expandColorCodeInStyleTag(element);
	}
	
	Attributes attributes = element.attributes();
	
	attributes.asList().stream().filter(a -> isStyleAttribute(a)).forEach(a -> expandColorCodeInAttribute(a));

	element.children().forEach(child -> traverse(child));
}
 
Example #25
Source File: Calendar.java    From calendar-component with Apache License 2.0 5 votes vote down vote up
@Override
public void readDesign(Element design, DesignContext designContext) {
    super.readDesign(design, designContext);

    Attributes attr = design.attributes();

    if (design.hasAttr("time-zone")) {
        setZoneId(ZoneId.of(DesignAttributeHandler.readAttribute("end-date", attr, String.class)));
    }

    if (design.hasAttr("time-format")) {
        setTimeFormat(TimeFormat.valueOf(
                "Format" + design.attr("time-format").toUpperCase()));
    }

    if (design.hasAttr("start-date")) {
        setStartDate(
                ZonedDateTime.ofInstant(DesignAttributeHandler.readAttribute("start-date", attr, Date.class)
                        .toInstant(), getZoneId()));
    }

    if (design.hasAttr("end-date")) {
        setEndDate(
                ZonedDateTime.ofInstant(DesignAttributeHandler.readAttribute("end-date", attr, Date.class)
                        .toInstant(), getZoneId()));
    }
}
 
Example #26
Source File: AttributesAdaptor.java    From zongtui-webcrawler with GNU General Public License v2.0 5 votes vote down vote up
public AttributesAdaptor(Attributes attributes, Element element) {
    this.attributes = attributes;
    this.element = element;
    attrList = new ArrayList<Attr>();
    for (Attribute attribute : attributes) {
        attrList.add(new AttributeAdaptor(attribute,element));
    }
}
 
Example #27
Source File: SishuokWhitelist.java    From es with Apache License 2.0 5 votes vote down vote up
Attributes getEnforcedAttributes(String tagName) {
    Attributes attrs = new Attributes();
    TagName tag = TagName.valueOf(tagName);
    if (enforcedAttributes.containsKey(tag)) {
        Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag);
        for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) {
            attrs.put(entry.getKey().toString(), entry.getValue().toString());
        }
    }
    return attrs;
}
 
Example #28
Source File: TemplateRecordDefinitionAnnotator.java    From baleen with Apache License 2.0 5 votes vote down vote up
/**
 * Add the attributes to the given record definition
 *
 * <p>Uses Jsoup to parse the tag as if html
 *
 * @param recordDefinition the record definition
 * @param beginText the begin tag of the record definition
 */
private void addAttributes(TemplateRecordDefinition recordDefinition, String beginText) {

  Document doc = Jsoup.parseBodyFragment(beginText);
  Element fieldElement = doc.body().child(0);
  Attributes attributes = fieldElement.attributes();
  if (attributes.hasKey(REPEAT_ATTRIBUTE)) {
    String required = attributes.get(REPEAT_ATTRIBUTE);
    recordDefinition.setRepeat(
        Strings.isNullOrEmpty(required) ? true : Boolean.valueOf(required));
  }
}
 
Example #29
Source File: Whitelist.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
Attributes getEnforcedAttributes(String tagName) {
    Attributes attrs = new Attributes();
    TagName tag = TagName.valueOf(tagName);
    if (enforcedAttributes.containsKey(tag)) {
        Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag);
        for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) {
            attrs.put(entry.getKey().toString(), entry.getValue().toString());
        }
    }
    return attrs;
}
 
Example #30
Source File: TreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
public boolean processStartTag(String name, Attributes attrs) {
    if (currentToken == start) { // don't recycle an in-use token
        return process(new Token.StartTag().nameAttr(name, attrs));
    }
    start.reset();
    start.nameAttr(name, attrs);
    return process(start);
}