Java Code Examples for org.htmlcleaner.HtmlCleaner#getProperties()

The following examples show how to use org.htmlcleaner.HtmlCleaner#getProperties() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: HtmlSpanner.java From SDHtmlTextView with Apache License 2.0

6 votes

private static HtmlCleaner createHtmlCleaner() {
    HtmlCleaner result = new HtmlCleaner();
    CleanerProperties cleanerProperties = result.getProperties();

    cleanerProperties.setAdvancedXmlEscape(true);

    cleanerProperties.setOmitXmlDeclaration(true);
    cleanerProperties.setOmitDoctypeDeclaration(false);

    cleanerProperties.setTranslateSpecialEntities(true);
    cleanerProperties.setTransResCharsToNCR(true);
    cleanerProperties.setRecognizeUnicodeChars(true);

    cleanerProperties.setIgnoreQuestAndExclam(true);
    cleanerProperties.setUseEmptyElementTags(false);

    cleanerProperties.setPruneTags("script,title");

    return result;
}

Example 2

Source File: XMLEscape.java From xframium-java with GNU General Public License v3.0

6 votes

/**
 * Converts a given xml to HTML String
 * @param htmlIn - xml in String
 * @return String - in HTML format
 */
public static String toHTML( String htmlIn )
{
	try
    {
        HtmlCleaner cleaner = new HtmlCleaner();
        cleaner.getProperties().setNamespacesAware( true ); 
        
        XmlSerializer xmlSerializer = new PrettyXmlSerializer( cleaner.getProperties(), "  " );

        String htmlData = xmlSerializer.getAsString( htmlIn );
        
        htmlData = escapeXML( htmlData.replaceAll("(?m)^[ \t]*\r?\n", "") );
        
        return htmlData;

    }
    catch( Exception e )
    {
    	e.printStackTrace();
        return null;
    }
}

Example 3

Source File: ResponseRenderPrintWriter.java From zrlog with Apache License 2.0

6 votes

private void parseCustomHtmlTag(HtmlCleaner htmlCleaner, Map<String, String> plugin, TagNode tag, String tagName) throws IOException {
    if ("plugin".equals(tagName) && tag.hasAttribute("name")) {
        tag.setForeignMarkup(true);
        Map<String, String> tmp = new LinkedHashMap<>(tag.getAttributes());
        tmp.put("_tmp", System.currentTimeMillis() + "");
        tag.setAttributes(tmp);
        SimpleHtmlSerializer serializer = new SimpleHtmlSerializer(htmlCleaner.getProperties());
        StringWriter stringWriter = new StringWriter();
        tag.serialize(serializer, stringWriter);
        String content = stringWriter.toString();
        try {
            String url = "/" + tag.getAttributeByName("name") + "/" + tag.getAttributeByName("view");
            if (tag.hasAttribute("param")) {
                url += "?" + tag.getAttributeByName("param");
            }
            CloseResponseHandle handle = PluginHelper.getContext(url, "GET", request, false, adminTokenVO);
            byte[] bytes = IOUtil.getByteByInputStream(handle.getT().getEntity().getContent());
            plugin.put(content, new String(bytes, StandardCharsets.UTF_8));
        } catch (Exception e) {
            LOGGER.error("", e);
        }
    }
}

Example 4

Source File: UtilsStaticAnalyzer.java From apogen with Apache License 2.0

5 votes

private static String digForAMeaningfulName(String xp, String dom) throws UnsupportedEncodingException {

		xp = xp.toLowerCase();

		HtmlCleaner cleaner = new HtmlCleaner();
		CleanerProperties props = cleaner.getProperties();
		props.setAllowHtmlInsideAttributes(true);
		props.setAllowMultiWordAttributes(true);
		props.setRecognizeUnicodeChars(true);
		props.setOmitComments(true);
		props.setOmitDoctypeDeclaration(true);

		TagNode node = cleaner.clean(dom);
		dom = "<html>\n" + cleaner.getInnerHtml(node) + "\n</html>";

		// workaround: htmlcleaner works with rel xpaths
		xp = xp.replace("html[1]/", "/");
		try {
			Object[] result = node.evaluateXPath(xp);

			if (result.length > 0) {
				TagNode r = (TagNode) result[0];
				return digTheTagTreeForAString(r);
			}

		} catch (XPatherException e) {
			e.printStackTrace();
		}

		// couldn't find a representative string :(

		return "";
	}

Example 5

Source File: HTTPLinkCheck.java From xframium-java with GNU General Public License v3.0

5 votes

/**
 * Converts a given xml to HTML String
 * @param htmlIn - xml in String
 * @return String - in HTML format
 */
public InputStream toHTML( InputStream htmlIn )
{
    try
    {
        

        
        byte[] buffer = new byte[ 512 ];
        int bytesRead = 0;
        
        StringBuilder sB = new StringBuilder();
        while ( (bytesRead = htmlIn.read( buffer ) ) != -1 )
        {
            sB.append( new String( buffer, 0, bytesRead ) );
        }
        
        if ( sB.indexOf( "html" ) != -1 )
        {
        
            HtmlCleaner cleaner = new HtmlCleaner();
            cleaner.getProperties().setNamespacesAware( true ); 
            
            XmlSerializer xmlSerializer = new PrettyXmlSerializer( cleaner.getProperties(), "  " );
            String htmlData = xmlSerializer.getAsString( sB.toString() );
            
            htmlData = escapeXML( htmlData.replaceAll("(?m)^[ \t]*\r?\n", "") );
            
            htmlData = htmlData.replace( "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">", "" );
            return new ByteArrayInputStream( htmlData.getBytes() );
        }
        else
            return null;

    }
    catch( Exception e )
    {
        return null;
    }
}

Example 6

Source File: ResponseRenderPrintWriter.java From zrlog with Apache License 2.0

5 votes

private String getCompressAndParseHtml(String inputBody) throws IOException {
    String currentBody = inputBody;

    //不显示none标签
    if (currentBody.endsWith(endFlag)) {
        currentBody = currentBody.substring(0, currentBody.length() - endFlag.length());
    }
    HtmlCleaner htmlCleaner = new HtmlCleaner();
    htmlCleaner.getProperties().setCharset(charset);
    htmlCleaner.getProperties().setUseCdataForScriptAndStyle(false);
    TagNode tagNode = htmlCleaner.clean(currentBody);
    TagNode[] tagNodes = tagNode.getAllElements(true);
    Map<String, String> plugin = new HashMap<>();
    for (TagNode tag : tagNodes) {
        if (tag != null) {
            String tagName = tag.getName();
            addStaticResourceFlag(tag, tagName);
            parseCustomHtmlTag(htmlCleaner, plugin, tag, tagName);
        }
    }

    SimpleHtmlSerializer serializer = new SimpleHtmlSerializer(htmlCleaner.getProperties());
    StringWriter stringWriter = new StringWriter();
    tagNode.serialize(serializer, stringWriter);
    currentBody = stringWriter.toString();
    if (tagNode.getDocType() != null) {
        currentBody = tagNode.getDocType() + currentBody;
    }
    for (Map.Entry<String, String> entry : plugin.entrySet()) {
        currentBody = currentBody.replace(entry.getKey(), entry.getValue());
    }
    currentBody = currentBody + "<!--" + (System.currentTimeMillis() - startTime) + "ms-->";
    return currentBody;

}