Java Code Examples for org.w3c.dom.Node#normalize()

The following examples show how to use org.w3c.dom.Node#normalize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: AIMLProcessor.java From aiml-java-interpreter with GNU General Public License v3.0

6 votes

private String recurseParse(Node node, List<String> stars) {
    node.normalize();
    String nodeName = node.getNodeName();
    switch (nodeName) {
        case text:
            return textParse(node);
        case template:
            return getTemplateValue(node, stars);
        case random:
            return randomParse(node);
        case srai:
            return sraiParse(node, stars);
        case set:
            setParse(node, stars);
            return "";//FIXME?
        case bot:
            return botInfoParse(node);
        case star:
            return starParse(node, stars);
        case think:
            getTemplateValue(node, stars);
            return "";
    }
    return "";
}

Example 2

Source File: Nodes.java From xmlunit with Apache License 2.0

5 votes

/**
 * Creates a new Node (of the same type as the original node) that
 * is similar to the orginal but doesn't contain any empty text or
 * CDATA nodes and where all textual content including attribute
 * values or comments are trimmed.
 */
public static Node stripWhitespace(Node original) {
    Node cloned = original.cloneNode(true);
    cloned.normalize();
    handleWsRec(cloned, false);
    return cloned;
}

Example 3

Source File: AimlLoader.java From aiml-java-interpreter with GNU General Public License v3.0

5 votes

private AimlCategory parseCategory(Node node, String topic) {
    AimlCategory category = new AimlCategory();
    category.setTopic(topic);
    NodeList childNodes = node.getChildNodes();
    for (int i = 0; i < childNodes.getLength(); ++i) {
        Node childNode = childNodes.item(i);
        childNode.normalize();
        String childNodeName = childNode.getNodeName();
        switch (childNodeName) {
            case AimlTag.text:
            case AimlTag.comment:
                break;
            case AimlTag.pattern:
                category.setPattern(node2String(childNode));
                break;
            case AimlTag.template:
                category.setTemplate(childNode);
                break;
            case AimlTag.topic:
                category.setTopic(node2String(childNode));
                break;
            case AimlTag.that:
                category.setThat(node2String(childNode));
                break;
            default:
                log.warn("Wrong structure: <category> tag contain " + childNodeName + " tag");
        }
    }
    return category;
}

Example 4

Source File: ElementImpl.java From hottub with GNU General Public License v2.0

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of
 * Text -- is considered "markup" and will _not_ be merged either with
 * normal Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if ( kid.getNodeType() == Node.TEXT_NODE )
        {
            // If an adjacent text node, merge it with kid
            if ( next!=null && next.getNodeType() == Node.TEXT_NODE )
            {
                ((Text)kid).appendData(next.getNodeValue());
                removeChild( next );
                next = kid; // Don't advance; there might be another.
            }
            else
            {
                // If kid is empty, remove it
                if ( kid.getNodeValue() == null || kid.getNodeValue().length() == 0 ) {
                    removeChild( kid );
                }
            }
        }

        // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if ( attributes!=null )
    {
        for( int i=0; i<attributes.getLength(); ++i )
        {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.

    isNormalized(true);
}

Example 5

Source File: NormalizedSource.java From xmlunit with Apache License 2.0

4 votes

private void setNormalizedNode(Node n) {
    if (n != null) {
        n.normalize();
    }
    super.setNode(n);
}

Example 6

Source File: ElementImpl.java From openjdk-8 with GNU General Public License v2.0

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of
 * Text -- is considered "markup" and will _not_ be merged either with
 * normal Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if ( kid.getNodeType() == Node.TEXT_NODE )
        {
            // If an adjacent text node, merge it with kid
            if ( next!=null && next.getNodeType() == Node.TEXT_NODE )
            {
                ((Text)kid).appendData(next.getNodeValue());
                removeChild( next );
                next = kid; // Don't advance; there might be another.
            }
            else
            {
                // If kid is empty, remove it
                if ( kid.getNodeValue() == null || kid.getNodeValue().length() == 0 ) {
                    removeChild( kid );
                }
            }
        }

        // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if ( attributes!=null )
    {
        for( int i=0; i<attributes.getLength(); ++i )
        {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.

    isNormalized(true);
}

Example 7

Source File: ElementImpl.java From openjdk-8-source with GNU General Public License v2.0

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of
 * Text -- is considered "markup" and will _not_ be merged either with
 * normal Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if ( kid.getNodeType() == Node.TEXT_NODE )
        {
            // If an adjacent text node, merge it with kid
            if ( next!=null && next.getNodeType() == Node.TEXT_NODE )
            {
                ((Text)kid).appendData(next.getNodeValue());
                removeChild( next );
                next = kid; // Don't advance; there might be another.
            }
            else
            {
                // If kid is empty, remove it
                if ( kid.getNodeValue() == null || kid.getNodeValue().length() == 0 ) {
                    removeChild( kid );
                }
            }
        }

        // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if ( attributes!=null )
    {
        for( int i=0; i<attributes.getLength(); ++i )
        {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.

    isNormalized(true);
}

Example 8

Source File: ElementImpl.java From jdk1.8-source-analysis with Apache License 2.0

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of
 * Text -- is considered "markup" and will _not_ be merged either with
 * normal Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if ( kid.getNodeType() == Node.TEXT_NODE )
        {
            // If an adjacent text node, merge it with kid
            if ( next!=null && next.getNodeType() == Node.TEXT_NODE )
            {
                ((Text)kid).appendData(next.getNodeValue());
                removeChild( next );
                next = kid; // Don't advance; there might be another.
            }
            else
            {
                // If kid is empty, remove it
                if ( kid.getNodeValue() == null || kid.getNodeValue().length() == 0 ) {
                    removeChild( kid );
                }
            }
        }

        // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if ( attributes!=null )
    {
        for( int i=0; i<attributes.getLength(); ++i )
        {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.

    isNormalized(true);
}

Example 9

Source File: ElementImpl.java From openjdk-jdk9 with GNU General Public License v2.0

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of Text
 * -- is considered "markup" and will _not_ be merged either with normal
 * Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if (kid.getNodeType() == Node.TEXT_NODE) {
            // If an adjacent text node, merge it with kid
            if (next != null && next.getNodeType() == Node.TEXT_NODE) {
                ((Text) kid).appendData(next.getNodeValue());
                removeChild(next);
                next = kid; // Don't advance; there might be another.
            } else {
                // If kid is empty, remove it
                if (kid.getNodeValue() == null || kid.getNodeValue().length() == 0) {
                    removeChild(kid);
                }
            }
        } // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if (attributes != null) {
        for (int i = 0; i < attributes.getLength(); ++i) {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.
    isNormalized(true);
}

Example 10

Source File: ElementImpl.java From Bytecoder with Apache License 2.0

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of Text
 * -- is considered "markup" and will _not_ be merged either with normal
 * Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if (kid.getNodeType() == Node.TEXT_NODE) {
            // If an adjacent text node, merge it with kid
            if (next != null && next.getNodeType() == Node.TEXT_NODE) {
                ((Text) kid).appendData(next.getNodeValue());
                removeChild(next);
                next = kid; // Don't advance; there might be another.
            } else {
                // If kid is empty, remove it
                if (kid.getNodeValue() == null || kid.getNodeValue().length() == 0) {
                    removeChild(kid);
                }
            }
        } // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if (attributes != null) {
        for (int i = 0; i < attributes.getLength(); ++i) {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.
    isNormalized(true);
}

Example 11

Source File: ElementImpl.java From openjdk-jdk8u-backup with GNU General Public License v2.0

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of
 * Text -- is considered "markup" and will _not_ be merged either with
 * normal Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if ( kid.getNodeType() == Node.TEXT_NODE )
        {
            // If an adjacent text node, merge it with kid
            if ( next!=null && next.getNodeType() == Node.TEXT_NODE )
            {
                ((Text)kid).appendData(next.getNodeValue());
                removeChild( next );
                next = kid; // Don't advance; there might be another.
            }
            else
            {
                // If kid is empty, remove it
                if ( kid.getNodeValue() == null || kid.getNodeValue().length() == 0 ) {
                    removeChild( kid );
                }
            }
        }

        // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if ( attributes!=null )
    {
        for( int i=0; i<attributes.getLength(); ++i )
        {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.

    isNormalized(true);
}

Example 12

Source File: ElementImpl.java From openjdk-jdk8u with GNU General Public License v2.0

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of
 * Text -- is considered "markup" and will _not_ be merged either with
 * normal Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if ( kid.getNodeType() == Node.TEXT_NODE )
        {
            // If an adjacent text node, merge it with kid
            if ( next!=null && next.getNodeType() == Node.TEXT_NODE )
            {
                ((Text)kid).appendData(next.getNodeValue());
                removeChild( next );
                next = kid; // Don't advance; there might be another.
            }
            else
            {
                // If kid is empty, remove it
                if ( kid.getNodeValue() == null || kid.getNodeValue().length() == 0 ) {
                    removeChild( kid );
                }
            }
        }

        // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if ( attributes!=null )
    {
        for( int i=0; i<attributes.getLength(); ++i )
        {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.

    isNormalized(true);
}

Example 13

Source File: ElementImpl.java From JDKSourceCode1.8 with MIT License

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of
 * Text -- is considered "markup" and will _not_ be merged either with
 * normal Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if ( kid.getNodeType() == Node.TEXT_NODE )
        {
            // If an adjacent text node, merge it with kid
            if ( next!=null && next.getNodeType() == Node.TEXT_NODE )
            {
                ((Text)kid).appendData(next.getNodeValue());
                removeChild( next );
                next = kid; // Don't advance; there might be another.
            }
            else
            {
                // If kid is empty, remove it
                if ( kid.getNodeValue() == null || kid.getNodeValue().length() == 0 ) {
                    removeChild( kid );
                }
            }
        }

        // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if ( attributes!=null )
    {
        for( int i=0; i<attributes.getLength(); ++i )
        {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.

    isNormalized(true);
}

Example 14

Source File: ElementImpl.java From jdk8u60 with GNU General Public License v2.0

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of
 * Text -- is considered "markup" and will _not_ be merged either with
 * normal Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if ( kid.getNodeType() == Node.TEXT_NODE )
        {
            // If an adjacent text node, merge it with kid
            if ( next!=null && next.getNodeType() == Node.TEXT_NODE )
            {
                ((Text)kid).appendData(next.getNodeValue());
                removeChild( next );
                next = kid; // Don't advance; there might be another.
            }
            else
            {
                // If kid is empty, remove it
                if ( kid.getNodeValue() == null || kid.getNodeValue().length() == 0 ) {
                    removeChild( kid );
                }
            }
        }

        // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if ( attributes!=null )
    {
        for( int i=0; i<attributes.getLength(); ++i )
        {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.

    isNormalized(true);
}

Example 15

Source File: ElementImpl.java From TencentKona-8 with GNU General Public License v2.0

4 votes

/**
 * In "normal form" (as read from a source file), there will never be two
 * Text children in succession. But DOM users may create successive Text
 * nodes in the course of manipulating the document. Normalize walks the
 * sub-tree and merges adjacent Texts, as if the DOM had been written out
 * and read back in again. This simplifies implementation of higher-level
 * functions that may want to assume that the document is in standard form.
 * <p>
 * To normalize a Document, normalize its top-level Element child.
 * <p>
 * As of PR-DOM-Level-1-19980818, CDATA -- despite being a subclass of
 * Text -- is considered "markup" and will _not_ be merged either with
 * normal Text or with other CDATASections.
 */
public void normalize() {
    // No need to normalize if already normalized.
    if (isNormalized()) {
        return;
    }
    if (needsSyncChildren()) {
        synchronizeChildren();
    }
    ChildNode kid, next;
    for (kid = firstChild; kid != null; kid = next) {
        next = kid.nextSibling;

        // If kid is a text node, we need to check for one of two
        // conditions:
        //   1) There is an adjacent text node
        //   2) There is no adjacent text node, but kid is
        //      an empty text node.
        if ( kid.getNodeType() == Node.TEXT_NODE )
        {
            // If an adjacent text node, merge it with kid
            if ( next!=null && next.getNodeType() == Node.TEXT_NODE )
            {
                ((Text)kid).appendData(next.getNodeValue());
                removeChild( next );
                next = kid; // Don't advance; there might be another.
            }
            else
            {
                // If kid is empty, remove it
                if ( kid.getNodeValue() == null || kid.getNodeValue().length() == 0 ) {
                    removeChild( kid );
                }
            }
        }

        // Otherwise it might be an Element, which is handled recursively
        else if (kid.getNodeType() == Node.ELEMENT_NODE) {
            kid.normalize();
        }
    }

    // We must also normalize all of the attributes
    if ( attributes!=null )
    {
        for( int i=0; i<attributes.getLength(); ++i )
        {
            Node attr = attributes.item(i);
            attr.normalize();
        }
    }

    // changed() will have occurred when the removeChild() was done,
    // so does not have to be reissued.

    isNormalized(true);
}

Example 16

Source File: Nodes.java From xmlunit with Apache License 2.0

3 votes

/**
 * Creates a new Node (of the same type as the original node) that
 * is similar to the orginal but doesn't contain any empty text or
 * CDATA nodes and where all textual content including attribute
 * values or comments are trimmed and normalized.
 *
 * <p>"normalized" in this context means all whitespace characters
 * are replaced by space characters and consecutive whitespace
 * characaters are collapsed.</p>
 */
public static Node normalizeWhitespace(Node original) {
    Node cloned = original.cloneNode(true);
    cloned.normalize();
    handleWsRec(cloned, true);
    return cloned;
}

Example 17

Source File: Nodes.java From xmlunit with Apache License 2.0

3 votes

/**
 * Creates a new Node (of the same type as the original node) that
 * is similar to the orginal but doesn't contain any text or CDATA
 * nodes that only consist of whitespace.
 *
 * <p>This doesn't have any effect if applied to a text or CDATA
 * node itself.</p>
 *
 * @since XMLUnit 2.6.0
 */
public static Node stripElementContentWhitespace(Node original) {
    Node cloned = original.cloneNode(true);
    cloned.normalize();
    stripECW(cloned);
    return cloned;
}