org.jsoup.helper.StringUtil Java Examples

The following examples show how to use org.jsoup.helper.StringUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RuleDatabaseService.java    From WeEvent with Apache License 2.0 6 votes vote down vote up
public List<RuleDatabaseEntity> getRuleDataBaseList(HttpServletRequest request, RuleDatabaseEntity ruleDatabaseEntity) throws GovernanceException {
    try {
        ruleDatabaseEntity.setSystemTag(false);
        Example<RuleDatabaseEntity> entityExample = Example.of(ruleDatabaseEntity);
        List<RuleDatabaseEntity> ruleDatabaseEntityList = ruleDatabaseRepository.findAll(entityExample);
        ruleDatabaseEntityList.forEach(ruleDataBase -> {
            String dataBaseUrl = ruleDataBase.getDatabaseUrl();
            if (StringUtil.isBlank(ruleDataBase.getOptionalParameter())) {
                ruleDataBase.setDatabaseUrl(dataBaseUrl);
            } else {
                ruleDataBase.setDatabaseUrl(dataBaseUrl + "?" + ruleDataBase.getOptionalParameter());
            }
        });
        return ruleDatabaseEntityList;
    } catch (Exception e) {
        log.error("get ruleDatabaseList fail", e);
        throw new GovernanceException("get ruleDatabaseList fail", e);
    }

}
 
Example #2
Source File: RuleEngineService.java    From WeEvent with Apache License 2.0 6 votes vote down vote up
private void checkStartRuleRequired(RuleEngineEntity rule) throws GovernanceException {
    if (StringUtil.isBlank(rule.getRuleName())) {
        log.error("the ruleName is empty");
        throw new GovernanceException("the ruleName is empty");
    }
    if (rule.getUserId() == null) {
        log.error("the userId is empty");
        throw new GovernanceException("the userId is empty");
    }
    if (rule.getBrokerId() == null) {
        log.error("the brokerId is empty");
        throw new GovernanceException("the brokerId is empty");
    }

    if (StringUtil.isBlank(rule.getBrokerUrl())) {
        log.error("the brokerUrl is empty");
        throw new GovernanceException("the brokerUrl is empty");
    }
    this.checkField(rule);
}
 
Example #3
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void handlesInvalidDoctypes() {
    // would previously throw invalid name exception on empty doctype
    Document doc = Jsoup.parse("<!DOCTYPE>");
    assertEquals(
            "<!doctype> <html> <head></head> <body></body> </html>",
            StringUtil.normaliseWhitespace(doc.outerHtml()));

    doc = Jsoup.parse("<!DOCTYPE><html><p>Foo</p></html>");
    assertEquals(
            "<!doctype> <html> <head></head> <body> <p>Foo</p> </body> </html>",
            StringUtil.normaliseWhitespace(doc.outerHtml()));

    doc = Jsoup.parse("<!DOCTYPE \u0000>");
    assertEquals(
            "<!doctype �> <html> <head></head> <body></body> </html>",
            StringUtil.normaliseWhitespace(doc.outerHtml()));
}
 
Example #4
Source File: Element.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Get a CSS selector that will uniquely select this element.
 * <p>
 * If the element has an ID, returns #id;
 * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'},
 * followed by a unique selector for the element (tag.class.class:nth-child(n)).
 * </p>
 *
 * @return the CSS Path that can be used to retrieve the element in a selector.
 */
public String cssSelector() {
    if (id().length() > 0)
        return "#" + id();

    // Translate HTML namespace ns:tag to CSS namespace syntax ns|tag
    String tagName = tagName().replace(':', '|');
    StringBuilder selector = new StringBuilder(tagName);
    String classes = StringUtil.join(classNames(), ".");
    if (classes.length() > 0)
        selector.append('.').append(classes);

    if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
        return selector.toString();

    selector.insert(0, " > ");
    if (parent().select(selector.toString()).size() > 1)
        selector.append(String.format(
            ":nth-child(%d)", elementSiblingIndex() + 1));

    return parent().cssSelector() + selector.toString();
}
 
Example #5
Source File: AbstractHACCommunicationManager.java    From hybris-commerce-eclipse-plugin with Apache License 2.0 6 votes vote down vote up
/**
 * Send HTTP GET request to {@link #endpointUrl}, updates {@link #csrfToken}
 * token
 *
 * @return true if {@link #endpointUrl} is accessible
 * @throws IOException
 * @throws ClientProtocolException
 * @throws AuthenticationException
 */
protected void fetchCsrfTokenFromHac() throws ClientProtocolException, IOException, AuthenticationException {
	final HttpGet getRequest = new HttpGet(getEndpointUrl());

	try {
		final HttpResponse response = httpClient.execute(getRequest, getContext());
		final String responseString = new BasicResponseHandler().handleResponse(response);
		csrfToken = getCsrfToken(responseString);

		if (StringUtil.isBlank(csrfToken)) {
			throw new AuthenticationException(ErrorMessage.CSRF_TOKEN_CANNOT_BE_OBTAINED);
		}
	} catch (UnknownHostException error) {
		final String errorMessage = error.getMessage();
		final Matcher matcher = HACPreferenceConstants.HOST_REGEXP_PATTERN.matcher(getEndpointUrl());

		if (matcher.find() && matcher.group(1).equals(errorMessage)) {
			throw new UnknownHostException(
					String.format(ErrorMessage.UNKNOWN_HOST_EXCEPTION_MESSAGE_FORMAT, matcher.group(1)));
		}
		throw error;
	}
}
 
Example #6
Source File: Tokeniser.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Utility method to consume reader and unescape entities found within.
 * @param inAttribute
 * @return unescaped string from reader
 */
String unescapeEntities(boolean inAttribute) {
    StringBuilder builder = StringUtil.stringBuilder();
    while (!reader.isEmpty()) {
        builder.append(reader.consumeTo('&'));
        if (reader.matches('&')) {
            reader.consume();
            int[] c = consumeCharacterReference(null, inAttribute);
            if (c == null || c.length==0)
                builder.append('&');
            else {
                builder.appendCodePoint(c[0]);
                if (c.length == 2)
                    builder.appendCodePoint(c[1]);
            }

        }
    }
    return builder.toString();
}
 
Example #7
Source File: RuleEngineService.java    From WeEvent with Apache License 2.0 6 votes vote down vote up
private void setRuleDataBaseUrl(RuleEngineEntity rule) {
    if (rule.getRuleDataBaseId() == null) {
        return;
    }
    RuleDatabaseEntity ruleDataBase = ruleDatabaseRepository.findById(rule.getRuleDataBaseId());
    if (ruleDataBase != null) {
        String dbUrl = ruleDataBase.getDatabaseUrl() + "?user=" + ruleDataBase.getUsername() + "&password=" + ruleDataBase.getPassword() +
                "&tableName=" + rule.getTableName();
        if (!StringUtil.isBlank(ruleDataBase.getOptionalParameter())) {
            dbUrl = dbUrl + "&" + ruleDataBase.getOptionalParameter();
        }
        rule.setDatabaseUrl(dbUrl);
        rule.setDatabaseType(ruleDataBase.getDatabaseType());
        log.info("dataBaseUrl:{}", rule.getDatabaseUrl());
    }
}
 
Example #8
Source File: HtmlToPlainText.java    From eclipse.jdt.ls with Eclipse Public License 2.0 6 votes vote down vote up
@Override
public void head(Node node, int depth) {
	String name = node.nodeName();
	if (node instanceof TextNode) {
		append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
	} else if (name.equals("ul")) {
		listNesting++;
	} else if (name.equals("li")) {
		append("\n ");
		for (int i = 1; i < listNesting; i++) {
			append("  ");
		}
		if (listNesting == 1) {
			append("* ");
		} else {
			append("- ");
		}
	} else if (name.equals("dt")) {
		append("  ");
	} else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
		append("\n");
	}
}
 
Example #9
Source File: RestAPINetworkManager.java    From Shaarlier with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Link prefetchLinkData(Link link) throws IOException {
    // TODO: There might be some bugs here, e.g:
    // - If the scheme used is not the same that on the saved link
    // - If there are tracking tags that don't match
    // We might want to open an Issue on Shaarli to get feedback
    String url = new URL(this.mAccount.getUrlShaarli() + LINK_URL).toExternalForm();
    String body = this.newConnection(url, Connection.Method.GET)
            .data("offset", "0")
            .data("limit", "1")
            .data("searchterm", link.getUrl())
            .execute()
            .body();
    Log.d("RestAPI:prefetch", body);

    Link updatedLink = new Link(link);
    try {
        JSONArray resp = new JSONArray(body);
        if (resp.length() < 1) {
            Log.i("RestAPI:prefetch", "New link");
        } else {
            Log.i("RestAPI:prefetch", "Found 1 link result (not new link)");
            JSONObject returnedLink = resp.getJSONObject(0);
            updatedLink.setUrl(returnedLink.getString("url"));
            updatedLink.setTitle(returnedLink.getString("title"));
            updatedLink.setDescription(returnedLink.getString("description"));
            updatedLink.setPrivate(returnedLink.getBoolean("private"));
            JSONArray jsonTags = returnedLink.getJSONArray("tags");
            ArrayList<String> tags = new ArrayList<>();
            for (int i = 0; i < jsonTags.length(); i++) {
                tags.add(jsonTags.getString(i));
            }
            updatedLink.setTags(StringUtil.join(tags, ", "));
        }
    } catch (JSONException e) {
        Log.e("RestAPI:prefetch", e.toString());
    }
    return updatedLink;
}
 
Example #10
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void fetchHandlesXmlAsHtmlWhenParserSet() throws IOException {
    // should auto-detect xml and use XML parser, unless explicitly requested the html parser
    String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml";
    Connection con = Jsoup.connect(xmlUrl).parser(Parser.htmlParser());
    Document doc = con.get();
    Connection.Request req = con.request();
    assertTrue(req.parser().getTreeBuilder() instanceof HtmlTreeBuilder);
    assertEquals("<html> <head></head> <body> <xml> <link>one <table> Two </table> </xml> </body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml()));
}
 
Example #11
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void doesNotFindShortestMatchingEntity() {
    // previous behaviour was to identify a possible entity, then chomp down the string until a match was found.
    // (as defined in html5.) However in practise that lead to spurious matches against the author's intent.
    String html = "One &clubsuite; &clubsuit;";
    Document doc = Jsoup.parse(html);
    assertEquals(StringUtil.normaliseWhitespace("One &amp;clubsuite; ♣"), doc.body().html());
}
 
Example #12
Source File: XmlTreeBuilderTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void handlesXmlDeclarationAsDeclaration() {
    String html = "<?xml encoding='UTF-8' ?><body>One</body><!-- comment -->";
    Document doc = Jsoup.parse(html, "", Parser.xmlParser());
    assertEquals("<?xml encoding='UTF-8' ?> <body> One </body> <!-- comment -->",
            StringUtil.normaliseWhitespace(doc.outerHtml()));
    assertEquals("#declaration", doc.childNode(0).nodeName());
    assertEquals("#comment", doc.childNode(2).nodeName());
}
 
Example #13
Source File: HtmlToPlainText.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
private void append(String text) {
    if (text.startsWith("\n"))
        width = 0; // reset counter if starts with a newline. only from formats above, not in natural text
    if (text.equals(" ") &&
            (accum.length() == 0 || StringUtil.in(accum.substring(accum.length() - 1), " ", "\n")))
        return; // don't accumulate long runs of empty spaces

    if (text.length() + width > maxWidth) { // won't fit, needs to wrap
        String words[] = text.split("\\s+");
        for (int i = 0; i < words.length; i++) {
            String word = words[i];
            boolean last = i == words.length - 1;
            if (!last) // insert a space if not the last word
                word = word + " ";
            if (word.length() + width > maxWidth) { // wrap and reset counter
                accum.append("\n").append(word);
                width = word.length();
            } else {
                accum.append(word);
                width += word.length();
            }
        }
    } else { // fits as is, without need to wrap text
        accum.append(text);
        width += text.length();
    }
}
 
Example #14
Source File: AbstractHACCommunicationManager.java    From hybris-commerce-eclipse-plugin with Apache License 2.0 5 votes vote down vote up
/**
 * Retrieves csrf token from response body
 *
 * @param responseBody
 *            response body of GET method
 * @return csrf token
 * @throws AuthenticationException
 */
protected String getCsrfToken(String responseBody) throws AuthenticationException {
	if (StringUtil.isBlank(responseBody)) {
		throw new AuthenticationException(ErrorMessage.CSRF_RESPONSE_CANNOT_BE_BLANK);
	}

	final Document document = Jsoup.parse(responseBody);
	return document.select(Meta.CSRF_META_TAG).attr(Meta.CSRF_META_TAG_CONTENT);
}
 
Example #15
Source File: HtmlToPlainText.java    From intellij-quarkus with Eclipse Public License 2.0 5 votes vote down vote up
@Override
public void tail(Node node, int depth) {
    String name = node.nodeName();
    if (StringUtil.in(name, "br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5")) {
        append("\n");
    } else if (StringUtil.in(name, "th", "td")) {
        append(" ");
    } else if (name.equals("a")) {
        append(String.format(" <%s>", node.absUrl("href")));
    } else if (name.equals("ul")) {
        listNesting--;
    }
}
 
Example #16
Source File: DocumentType.java    From jsoup-learning with MIT License 5 votes vote down vote up
@Override
void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
    accum.append("<!DOCTYPE ").append(attr("name"));
    if (!StringUtil.isBlank(attr("publicId")))
        accum.append(" PUBLIC \"").append(attr("publicId")).append("\"");
    if (!StringUtil.isBlank(attr("systemId")))
        accum.append(" \"").append(attr("systemId")).append("\"");
    accum.append('>');
}
 
Example #17
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void fetchHandlesXmlAsHtmlWhenParserSet() throws IOException {
    // should auto-detect xml and use XML parser, unless explicitly requested the html parser
    String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml";
    Connection con = Jsoup.connect(xmlUrl).parser(Parser.htmlParser());
    Document doc = con.get();
    Connection.Request req = con.request();
    assertTrue(req.parser().getTreeBuilder() instanceof HtmlTreeBuilder);
    assertEquals("<html> <head></head> <body> <xml> <link>one <table> Two </table> </xml> </body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml()));
}
 
Example #18
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void caseSensitiveParseTree() {
    String html = "<r><X>A</X><y>B</y></r>";
    Parser parser = Parser.htmlParser();
    parser.settings(ParseSettings.preserveCase);
    Document doc = parser.parseInput(html, "");
    assertEquals("<r> <X> A </X> <y> B </y> </r>", StringUtil.normaliseWhitespace(doc.body().html()));
}
 
Example #19
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void doesNotFindShortestMatchingEntity() {
    // previous behaviour was to identify a possible entity, then chomp down the string until a match was found.
    // (as defined in html5.) However in practise that lead to spurious matches against the author's intent.
    String html = "One &clubsuite; &clubsuit;";
    Document doc = Jsoup.parse(html);
    assertEquals(StringUtil.normaliseWhitespace("One &amp;clubsuite; ♣"), doc.body().html());
}
 
Example #20
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void handleNullContextInParseFragment() {
    String html = "<ol><li>One</li></ol><p>Two</p>";
    List<Node> nodes = Parser.parseFragment(html, null, "http://example.com/");
    assertEquals(1, nodes.size()); // returns <html> node (not document) -- no context means doc gets created
    assertEquals("html", nodes.get(0).nodeName());
    assertEquals("<html> <head></head> <body> <ol> <li>One</li> </ol> <p>Two</p> </body> </html>", StringUtil.normaliseWhitespace(nodes.get(0).outerHtml()));
}
 
Example #21
Source File: HtmlTreeBuilder.java    From jsoup-learning with MIT License 5 votes vote down vote up
private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
    Iterator<Element> it = stack.descendingIterator();
    while (it.hasNext()) {
        Element el = it.next();
        String elName = el.nodeName();
        if (StringUtil.in(elName, targetNames))
            return true;
        if (StringUtil.in(elName, baseTypes))
            return false;
        if (extraTypes != null && StringUtil.in(elName, extraTypes))
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}
 
Example #22
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void fetchHandlesXml() throws IOException {
    // should auto-detect xml and use XML parser, unless explicitly requested the html parser
    String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml";
    Connection con = Jsoup.connect(xmlUrl);
    Document doc = con.get();
    Connection.Request req = con.request();
    assertTrue(req.parser().getTreeBuilder() instanceof XmlTreeBuilder);
    assertEquals("<xml> <link> one </link> <table> Two </table> </xml>", StringUtil.normaliseWhitespace(doc.outerHtml()));
}
 
Example #23
Source File: HtmlTreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
void popStackToClose(String... elNames) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element next = stack.get(pos);
        stack.remove(pos);
        if (StringUtil.in(next.nodeName(), elNames))
            break;
    }
}
 
Example #24
Source File: HtmlTreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
private void clearStackToContext(String... nodeNames) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element next = stack.get(pos);
        if (StringUtil.in(next.nodeName(), nodeNames) || next.nodeName().equals("html"))
            break;
        else
            stack.remove(pos);
    }
}
 
Example #25
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void selfClosingOnNonvoidIsError() {
    String html = "<p>test</p><div /><div>Two</div>";
    Parser parser = Parser.htmlParser().setTrackErrors(5);
    parser.parseInput(html, "");
    assertEquals(1, parser.getErrors().size());
    assertEquals("18: Tag cannot be self closing; not a void tag", parser.getErrors().get(0).toString());

    assertFalse(Jsoup.isValid(html, Whitelist.relaxed()));
    String clean = Jsoup.clean(html, Whitelist.relaxed());
    assertEquals("<p>test</p> <div></div> <div> Two </div>", StringUtil.normaliseWhitespace(clean));
}
 
Example #26
Source File: HtmlTreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
boolean inSelectScope(String targetName) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element el = stack.get(pos);
        String elName = el.nodeName();
        if (elName.equals(targetName))
            return true;
        if (!StringUtil.in(elName, TagSearchSelectScope)) // all elements except
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}
 
Example #27
Source File: HtmlToPlainText.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
public void head(Node node, int depth) {
    String name = node.nodeName();
    if (node instanceof TextNode)
        append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
    else if (name.equals("li"))
        append("\n * ");
    else if (name.equals("dt"))
        append("  ");
    else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
        append("\n");
}
 
Example #28
Source File: Element.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
    String text = textNode.getWholeText();

    if (preserveWhitespace(textNode.parentNode))
        accum.append(text);
    else
        StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum));
}
 
Example #29
Source File: Element.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
    String text = textNode.getWholeText();

    if (preserveWhitespace(textNode.parentNode))
        accum.append(text);
    else
        StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum));
}
 
Example #30
Source File: HtmlTreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element el = stack.get(pos);
        String elName = el.nodeName();
        if (StringUtil.in(elName, targetNames))
            return true;
        if (StringUtil.in(elName, baseTypes))
            return false;
        if (extraTypes != null && StringUtil.in(elName, extraTypes))
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}