Java Code Examples for org.jsoup.helper.Validate#fail()

The following examples show how to use org.jsoup.helper.Validate#fail() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PrefixXmlTreeBuilder.java    From CrawlerPack with Apache License 2.0 6 votes vote down vote up
protected boolean process(Token token) {
    switch (token.type) {
        case StartTag:
            insert(token.asStartTag());
            break;
        case EndTag:
            popStackToClose(token.asEndTag());
            break;
        case Comment:
            insert(token.asComment());
            break;
        case Character:
            insert(token.asCharacter());
            break;
        case Doctype:
            insert(token.asDoctype());
            break;
        case EOF: // could put some normalisation here if desired
            break;
        default:
            Validate.fail("Unexpected token type: " + token.type);
    }

    return true;
}
 
Example 2
Source File: XmlTreeBuilder.java    From jsoup-learning with MIT License 6 votes vote down vote up
@Override
protected boolean process(Token token) {
    // start tag, end tag, doctype, comment, character, eof
    switch (token.type) {
        case StartTag:
            insert(token.asStartTag());
            break;
        case EndTag:
            popStackToClose(token.asEndTag());
            break;
        case Comment:
            insert(token.asComment());
            break;
        case Character:
            insert(token.asCharacter());
            break;
        case Doctype:
            insert(token.asDoctype());
            break;
        case EOF: // could put some normalisation here if desired
            break;
        default:
            Validate.fail("Unexpected token type: " + token.type);
    }
    return true;
}
 
Example 3
Source File: XmlTreeBuilder.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Override
protected boolean process(Token token) {
    // start tag, end tag, doctype, comment, character, eof
    switch (token.type) {
        case StartTag:
            insert(token.asStartTag());
            break;
        case EndTag:
            popStackToClose(token.asEndTag());
            break;
        case Comment:
            insert(token.asComment());
            break;
        case Character:
            insert(token.asCharacter());
            break;
        case Doctype:
            insert(token.asDoctype());
            break;
        case EOF: // could put some normalisation here if desired
            break;
        default:
            Validate.fail("Unexpected token type: " + token.type);
    }
    return true;
}
 
Example 4
Source File: XmlTreeBuilder.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Override
protected boolean process(Token token) {
    // start tag, end tag, doctype, comment, character, eof
    switch (token.type) {
        case StartTag:
            insert(token.asStartTag());
            break;
        case EndTag:
            popStackToClose(token.asEndTag());
            break;
        case Comment:
            insert(token.asComment());
            break;
        case Character:
            insert(token.asCharacter());
            break;
        case Doctype:
            insert(token.asDoctype());
            break;
        case EOF: // could put some normalisation here if desired
            break;
        default:
            Validate.fail("Unexpected token type: " + token.type);
    }
    return true;
}
 
Example 5
Source File: XmlTreeBuilder.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Override
protected boolean process(Token token) {
    // start tag, end tag, doctype, comment, character, eof
    switch (token.type) {
        case StartTag:
            insert(token.asStartTag());
            break;
        case EndTag:
            popStackToClose(token.asEndTag());
            break;
        case Comment:
            insert(token.asComment());
            break;
        case Character:
            insert(token.asCharacter());
            break;
        case Doctype:
            insert(token.asDoctype());
            break;
        case EOF: // could put some normalisation here if desired
            break;
        default:
            Validate.fail("Unexpected token type: " + token.type);
    }
    return true;
}
 
Example 6
Source File: HtmlTreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element el = stack.get(pos);
        String elName = el.nodeName();
        if (StringUtil.in(elName, targetNames))
            return true;
        if (StringUtil.in(elName, baseTypes))
            return false;
        if (extraTypes != null && StringUtil.in(elName, extraTypes))
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}
 
Example 7
Source File: HtmlTreeBuilder.java    From jsoup-learning with MIT License 5 votes vote down vote up
boolean inSelectScope(String targetName) {
    Iterator<Element> it = stack.descendingIterator();
    while (it.hasNext()) {
        Element el = it.next();
        String elName = el.nodeName();
        if (elName.equals(targetName))
            return true;
        if (!StringUtil.in(elName, "optgroup", "option")) // all elements except
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}
 
Example 8
Source File: HtmlTreeBuilder.java    From jsoup-learning with MIT License 5 votes vote down vote up
private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
    Iterator<Element> it = stack.descendingIterator();
    while (it.hasNext()) {
        Element el = it.next();
        String elName = el.nodeName();
        if (StringUtil.in(elName, targetNames))
            return true;
        if (StringUtil.in(elName, baseTypes))
            return false;
        if (extraTypes != null && StringUtil.in(elName, extraTypes))
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}
 
Example 9
Source File: TokenQueue.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Pulls a balanced string off the queue. E.g. if queue is "(one (two) three) four", (,) will return "one (two) three",
 * and leave " four" on the queue. Unbalanced openers and closers can be quoted (with ' or ") or escaped (with \). Those escapes will be left
 * in the returned string, which is suitable for regexes (where we need to preserve the escape), but unsuitable for
 * contains text strings; use unescape for that.
 * @param open opener
 * @param close closer
 * @return data matched from the queue
 */
public String chompBalanced(char open, char close) {
    int start = -1;
    int end = -1;
    int depth = 0;
    char last = 0;
    boolean inQuote = false;

    do {
        if (isEmpty()) break;
        Character c = consume();
        if (last == 0 || last != ESC) {
            if ((c.equals('\'') || c.equals('"')) && c != open)
                inQuote = !inQuote;
            if (inQuote)
                continue;
            if (c.equals(open)) {
                depth++;
                if (start == -1)
                    start = pos;
            }
            else if (c.equals(close))
                depth--;
        }

        if (depth > 0 && last != 0)
            end = pos; // don't include the outer match pair in the return
        last = c;
    } while (depth > 0);
    final String out = (end >= 0) ? queue.substring(start, end) : "";
    if (depth > 0) {// ran out of queue before seeing enough )
        Validate.fail("Did not find balanced marker at '" + out + "'");
    }
    return out;
}
 
Example 10
Source File: HtmlTreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
boolean inSelectScope(String targetName) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element el = stack.get(pos);
        String elName = el.nodeName();
        if (elName.equals(targetName))
            return true;
        if (!StringUtil.in(elName, TagSearchSelectScope)) // all elements except
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}
 
Example 11
Source File: HtmlTreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element el = stack.get(pos);
        String elName = el.nodeName();
        if (StringUtil.in(elName, targetNames))
            return true;
        if (StringUtil.in(elName, baseTypes))
            return false;
        if (extraTypes != null && StringUtil.in(elName, extraTypes))
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}
 
Example 12
Source File: TokenQueue.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Pulls a balanced string off the queue. E.g. if queue is "(one (two) three) four", (,) will return "one (two) three",
 * and leave " four" on the queue. Unbalanced openers and closers can be quoted (with ' or ") or escaped (with \). Those escapes will be left
 * in the returned string, which is suitable for regexes (where we need to preserve the escape), but unsuitable for
 * contains text strings; use unescape for that.
 * @param open opener
 * @param close closer
 * @return data matched from the queue
 */
public String chompBalanced(char open, char close) {
    int start = -1;
    int end = -1;
    int depth = 0;
    char last = 0;
    boolean inQuote = false;

    do {
        if (isEmpty()) break;
        Character c = consume();
        if (last == 0 || last != ESC) {
            if ((c.equals('\'') || c.equals('"')) && c != open)
                inQuote = !inQuote;
            if (inQuote)
                continue;
            if (c.equals(open)) {
                depth++;
                if (start == -1)
                    start = pos;
            }
            else if (c.equals(close))
                depth--;
        }

        if (depth > 0 && last != 0)
            end = pos; // don't include the outer match pair in the return
        last = c;
    } while (depth > 0);
    final String out = (end >= 0) ? queue.substring(start, end) : "";
    if (depth > 0) {// ran out of queue before seeing enough )
        Validate.fail("Did not find balanced marker at '" + out + "'");
    }
    return out;
}
 
Example 13
Source File: HtmlTreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
boolean inSelectScope(String targetName) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element el = stack.get(pos);
        String elName = el.nodeName();
        if (elName.equals(targetName))
            return true;
        if (!StringUtil.in(elName, TagSearchSelectScope)) // all elements except
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}
 
Example 14
Source File: HtmlTreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element el = stack.get(pos);
        String elName = el.nodeName();
        if (StringUtil.in(elName, targetNames))
            return true;
        if (StringUtil.in(elName, baseTypes))
            return false;
        if (extraTypes != null && StringUtil.in(elName, extraTypes))
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}
 
Example 15
Source File: HtmlTreeBuilder.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
boolean inSelectScope(String targetName) {
    for (int pos = stack.size() -1; pos >= 0; pos--) {
        Element el = stack.get(pos);
        String elName = el.nodeName();
        if (elName.equals(targetName))
            return true;
        if (!StringUtil.in(elName, TagSearchSelectScope)) // all elements except
            return false;
    }
    Validate.fail("Should not be reachable");
    return false;
}
 
Example 16
Source File: Tokeniser.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
int[] consumeCharacterReference(Character additionalAllowedCharacter, boolean inAttribute) {
    if (reader.isEmpty())
        return null;
    if (additionalAllowedCharacter != null && additionalAllowedCharacter == reader.current())
        return null;
    if (reader.matchesAnySorted(notCharRefCharsSorted))
        return null;

    final int[] codeRef = codepointHolder;
    reader.mark();
    if (reader.matchConsume("#")) { // numbered
        boolean isHexMode = reader.matchConsumeIgnoreCase("X");
        String numRef = isHexMode ? reader.consumeHexSequence() : reader.consumeDigitSequence();
        if (numRef.length() == 0) { // didn't match anything
            characterReferenceError("numeric reference with no numerals");
            reader.rewindToMark();
            return null;
        }
        if (!reader.matchConsume(";"))
            characterReferenceError("missing semicolon"); // missing semi
        int charval = -1;
        try {
            int base = isHexMode ? 16 : 10;
            charval = Integer.valueOf(numRef, base);
        } catch (NumberFormatException ignored) {
        } // skip
        if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) {
            characterReferenceError("character outside of valid range");
            codeRef[0] = replacementChar;
            return codeRef;
        } else {
            // todo: implement number replacement table
            // todo: check for extra illegal unicode points as parse errors
            codeRef[0] = charval;
            return codeRef;
        }
    } else { // named
        // get as many letters as possible, and look for matching entities.
        String nameRef = reader.consumeLetterThenDigitSequence();
        boolean looksLegit = reader.matches(';');
        // found if a base named entity without a ;, or an extended entity with the ;.
        boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));

        if (!found) {
            reader.rewindToMark();
            if (looksLegit) // named with semicolon
                characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
            return null;
        }
        if (inAttribute && (reader.matchesLetter() || reader.matchesDigit() || reader.matchesAny('=', '-', '_'))) {
            // don't want that to match
            reader.rewindToMark();
            return null;
        }
        if (!reader.matchConsume(";"))
            characterReferenceError("missing semicolon"); // missing semi
        int numChars = Entities.codepointsForName(nameRef, multipointHolder);
        if (numChars == 1) {
            codeRef[0] = multipointHolder[0];
            return codeRef;
        } else if (numChars ==2) {
            return multipointHolder;
        } else {
            Validate.fail("Unexpected characters returned for " + nameRef);
            return multipointHolder;
        }
    }
}
 
Example 17
Source File: Tokeniser.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
int[] consumeCharacterReference(Character additionalAllowedCharacter, boolean inAttribute) {
    if (reader.isEmpty())
        return null;
    if (additionalAllowedCharacter != null && additionalAllowedCharacter == reader.current())
        return null;
    if (reader.matchesAnySorted(notCharRefCharsSorted))
        return null;

    final int[] codeRef = codepointHolder;
    reader.mark();
    if (reader.matchConsume("#")) { // numbered
        boolean isHexMode = reader.matchConsumeIgnoreCase("X");
        String numRef = isHexMode ? reader.consumeHexSequence() : reader.consumeDigitSequence();
        if (numRef.length() == 0) { // didn't match anything
            characterReferenceError("numeric reference with no numerals");
            reader.rewindToMark();
            return null;
        }
        if (!reader.matchConsume(";"))
            characterReferenceError("missing semicolon"); // missing semi
        int charval = -1;
        try {
            int base = isHexMode ? 16 : 10;
            charval = Integer.valueOf(numRef, base);
        } catch (NumberFormatException ignored) {
        } // skip
        if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) {
            characterReferenceError("character outside of valid range");
            codeRef[0] = replacementChar;
            return codeRef;
        } else {
            // todo: implement number replacement table
            // todo: check for extra illegal unicode points as parse errors
            codeRef[0] = charval;
            return codeRef;
        }
    } else { // named
        // get as many letters as possible, and look for matching entities.
        String nameRef = reader.consumeLetterThenDigitSequence();
        boolean looksLegit = reader.matches(';');
        // found if a base named entity without a ;, or an extended entity with the ;.
        boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));

        if (!found) {
            reader.rewindToMark();
            if (looksLegit) // named with semicolon
                characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
            return null;
        }
        if (inAttribute && (reader.matchesLetter() || reader.matchesDigit() || reader.matchesAny('=', '-', '_'))) {
            // don't want that to match
            reader.rewindToMark();
            return null;
        }
        if (!reader.matchConsume(";"))
            characterReferenceError("missing semicolon"); // missing semi
        int numChars = Entities.codepointsForName(nameRef, multipointHolder);
        if (numChars == 1) {
            codeRef[0] = multipointHolder[0];
            return codeRef;
        } else if (numChars ==2) {
            return multipointHolder;
        } else {
            Validate.fail("Unexpected characters returned for " + nameRef);
            return multipointHolder;
        }
    }
}