package org.jsoup.parser; import org.jsoup.Jsoup; import org.jsoup.TextUtil; import org.jsoup.helper.StringUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.junit.Ignore; import org.junit.Test; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URISyntaxException; import java.util.List; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotSame; /** * Tests XmlTreeBuilder. * * @author Jonathan Hedley */ public class XmlTreeBuilderTest { @Test public void testSimpleXmlParse() { String xml = "<doc id=2 href='/bar'>Foo <br /><link>One</link><link>Two</link></doc>"; XmlTreeBuilder tb = new XmlTreeBuilder(); Document doc = tb.parse(xml, "http://foo.com/"); assertEquals("<doc id=\"2\" href=\"/bar\">Foo <br /><link>One</link><link>Two</link></doc>", TextUtil.stripNewlines(doc.html())); assertEquals(doc.getElementById("2").absUrl("href"), "http://foo.com/bar"); } @Test public void testPopToClose() { // test: </val> closes Two, </bar> ignored String xml = "<doc><val>One<val>Two</val></bar>Three</doc>"; XmlTreeBuilder tb = new XmlTreeBuilder(); Document doc = tb.parse(xml, "http://foo.com/"); assertEquals("<doc><val>One<val>Two</val>Three</val></doc>", TextUtil.stripNewlines(doc.html())); } @Test public void testCommentAndDocType() { String xml = "<!DOCTYPE html><!-- a comment -->One <qux />Two"; XmlTreeBuilder tb = new XmlTreeBuilder(); Document doc = tb.parse(xml, "http://foo.com/"); assertEquals("<!DOCTYPE html><!-- a comment -->One <qux />Two", TextUtil.stripNewlines(doc.html())); } @Test public void testSupplyParserToJsoupClass() { String xml = "<doc><val>One<val>Two</val></bar>Three</doc>"; Document doc = Jsoup.parse(xml, "http://foo.com/", Parser.xmlParser()); assertEquals("<doc><val>One<val>Two</val>Three</val></doc>", TextUtil.stripNewlines(doc.html())); } @Ignore @Test public void testSupplyParserToConnection() throws IOException { String xmlUrl = "http://direct.infohound.net/tools/jsoup-xml-test.xml"; // parse with both xml and html parser, ensure different Document xmlDoc = Jsoup.connect(xmlUrl).parser(Parser.xmlParser()).get(); Document htmlDoc = Jsoup.connect(xmlUrl).get(); assertEquals("<doc><val>One<val>Two</val>Three</val></doc>", TextUtil.stripNewlines(xmlDoc.html())); assertNotSame(htmlDoc, xmlDoc); assertEquals(1, htmlDoc.select("head").size()); // html parser normalises assertEquals(0, xmlDoc.select("head").size()); // xml parser does not } @Test public void testSupplyParserToDataStream() throws IOException, URISyntaxException { File xmlFile = new File(XmlTreeBuilder.class.getResource("/htmltests/xml-test.xml").toURI()); InputStream inStream = new FileInputStream(xmlFile); Document doc = Jsoup.parse(inStream, null, "http://foo.com", Parser.xmlParser()); assertEquals("<doc><val>One<val>Two</val>Three</val></doc>", TextUtil.stripNewlines(doc.html())); } @Test public void testDoesNotForceSelfClosingKnownTags() { // html will force "<br>one</br>" to "<br />One<br />". XML should be stay "<br>one</br> -- don't recognise tag. Document htmlDoc = Jsoup.parse("<br>one</br>"); assertEquals("<br />one\n<br />", htmlDoc.body().html()); Document xmlDoc = Jsoup.parse("<br>one</br>", "", Parser.xmlParser()); assertEquals("<br>one</br>", xmlDoc.html()); } @Test public void handlesXmlDeclarationAsDeclaration() { String html = "<?xml encoding='UTF-8' ?><body>One</body><!-- comment -->"; Document doc = Jsoup.parse(html, "", Parser.xmlParser()); assertEquals("<?xml encoding='UTF-8' ?> <body> One </body> <!-- comment -->", StringUtil.normaliseWhitespace(doc.outerHtml())); assertEquals("#declaration", doc.childNode(0).nodeName()); assertEquals("#comment", doc.childNode(2).nodeName()); } @Test public void xmlFragment() { String xml = "<one src='/foo/' />Two<three><four /></three>"; List<Node> nodes = Parser.parseXmlFragment(xml, "http://example.com/"); assertEquals(3, nodes.size()); assertEquals("http://example.com/foo/", nodes.get(0).absUrl("src")); assertEquals("one", nodes.get(0).nodeName()); assertEquals("Two", ((TextNode)nodes.get(1)).text()); } }