Java Code Examples for org.htmlcleaner.HtmlCleaner#getProperties()

The following examples show how to use org.htmlcleaner.HtmlCleaner#getProperties() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HtmlSpanner.java    From SDHtmlTextView with Apache License 2.0 6 votes vote down vote up
private static HtmlCleaner createHtmlCleaner() {
    HtmlCleaner result = new HtmlCleaner();
    CleanerProperties cleanerProperties = result.getProperties();

    cleanerProperties.setAdvancedXmlEscape(true);

    cleanerProperties.setOmitXmlDeclaration(true);
    cleanerProperties.setOmitDoctypeDeclaration(false);

    cleanerProperties.setTranslateSpecialEntities(true);
    cleanerProperties.setTransResCharsToNCR(true);
    cleanerProperties.setRecognizeUnicodeChars(true);

    cleanerProperties.setIgnoreQuestAndExclam(true);
    cleanerProperties.setUseEmptyElementTags(false);

    cleanerProperties.setPruneTags("script,title");

    return result;
}
 
Example 2
Source File: XMLEscape.java    From xframium-java with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Converts a given xml to HTML String
 * @param htmlIn - xml in String
 * @return String - in HTML format
 */
public static String toHTML( String htmlIn )
{
	try
    {
        HtmlCleaner cleaner = new HtmlCleaner();
        cleaner.getProperties().setNamespacesAware( true ); 
        
        XmlSerializer xmlSerializer = new PrettyXmlSerializer( cleaner.getProperties(), "  " );

        String htmlData = xmlSerializer.getAsString( htmlIn );
        
        htmlData = escapeXML( htmlData.replaceAll("(?m)^[ \t]*\r?\n", "") );
        
        return htmlData;

    }
    catch( Exception e )
    {
    	e.printStackTrace();
        return null;
    }
}
 
Example 3
Source File: ResponseRenderPrintWriter.java    From zrlog with Apache License 2.0 6 votes vote down vote up
private void parseCustomHtmlTag(HtmlCleaner htmlCleaner, Map<String, String> plugin, TagNode tag, String tagName) throws IOException {
    if ("plugin".equals(tagName) && tag.hasAttribute("name")) {
        tag.setForeignMarkup(true);
        Map<String, String> tmp = new LinkedHashMap<>(tag.getAttributes());
        tmp.put("_tmp", System.currentTimeMillis() + "");
        tag.setAttributes(tmp);
        SimpleHtmlSerializer serializer = new SimpleHtmlSerializer(htmlCleaner.getProperties());
        StringWriter stringWriter = new StringWriter();
        tag.serialize(serializer, stringWriter);
        String content = stringWriter.toString();
        try {
            String url = "/" + tag.getAttributeByName("name") + "/" + tag.getAttributeByName("view");
            if (tag.hasAttribute("param")) {
                url += "?" + tag.getAttributeByName("param");
            }
            CloseResponseHandle handle = PluginHelper.getContext(url, "GET", request, false, adminTokenVO);
            byte[] bytes = IOUtil.getByteByInputStream(handle.getT().getEntity().getContent());
            plugin.put(content, new String(bytes, StandardCharsets.UTF_8));
        } catch (Exception e) {
            LOGGER.error("", e);
        }
    }
}
 
Example 4
Source File: UtilsStaticAnalyzer.java    From apogen with Apache License 2.0 5 votes vote down vote up
private static String digForAMeaningfulName(String xp, String dom) throws UnsupportedEncodingException {

		xp = xp.toLowerCase();

		HtmlCleaner cleaner = new HtmlCleaner();
		CleanerProperties props = cleaner.getProperties();
		props.setAllowHtmlInsideAttributes(true);
		props.setAllowMultiWordAttributes(true);
		props.setRecognizeUnicodeChars(true);
		props.setOmitComments(true);
		props.setOmitDoctypeDeclaration(true);

		TagNode node = cleaner.clean(dom);
		dom = "<html>\n" + cleaner.getInnerHtml(node) + "\n</html>";

		// workaround: htmlcleaner works with rel xpaths
		xp = xp.replace("html[1]/", "/");
		try {
			Object[] result = node.evaluateXPath(xp);

			if (result.length > 0) {
				TagNode r = (TagNode) result[0];
				return digTheTagTreeForAString(r);
			}

		} catch (XPatherException e) {
			e.printStackTrace();
		}

		// couldn't find a representative string :(

		return "";
	}
 
Example 5
Source File: HTTPLinkCheck.java    From xframium-java with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Converts a given xml to HTML String
 * @param htmlIn - xml in String
 * @return String - in HTML format
 */
public InputStream toHTML( InputStream htmlIn )
{
    try
    {
        

        
        byte[] buffer = new byte[ 512 ];
        int bytesRead = 0;
        
        StringBuilder sB = new StringBuilder();
        while ( (bytesRead = htmlIn.read( buffer ) ) != -1 )
        {
            sB.append( new String( buffer, 0, bytesRead ) );
        }
        
        if ( sB.indexOf( "html" ) != -1 )
        {
        
            HtmlCleaner cleaner = new HtmlCleaner();
            cleaner.getProperties().setNamespacesAware( true ); 
            
            XmlSerializer xmlSerializer = new PrettyXmlSerializer( cleaner.getProperties(), "  " );
            String htmlData = xmlSerializer.getAsString( sB.toString() );
            
            htmlData = escapeXML( htmlData.replaceAll("(?m)^[ \t]*\r?\n", "") );
            
            htmlData = htmlData.replace( "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">", "" );
            return new ByteArrayInputStream( htmlData.getBytes() );
        }
        else
            return null;

    }
    catch( Exception e )
    {
        return null;
    }
}
 
Example 6
Source File: ResponseRenderPrintWriter.java    From zrlog with Apache License 2.0 5 votes vote down vote up
private String getCompressAndParseHtml(String inputBody) throws IOException {
    String currentBody = inputBody;

    //不显示none标签
    if (currentBody.endsWith(endFlag)) {
        currentBody = currentBody.substring(0, currentBody.length() - endFlag.length());
    }
    HtmlCleaner htmlCleaner = new HtmlCleaner();
    htmlCleaner.getProperties().setCharset(charset);
    htmlCleaner.getProperties().setUseCdataForScriptAndStyle(false);
    TagNode tagNode = htmlCleaner.clean(currentBody);
    TagNode[] tagNodes = tagNode.getAllElements(true);
    Map<String, String> plugin = new HashMap<>();
    for (TagNode tag : tagNodes) {
        if (tag != null) {
            String tagName = tag.getName();
            addStaticResourceFlag(tag, tagName);
            parseCustomHtmlTag(htmlCleaner, plugin, tag, tagName);
        }
    }

    SimpleHtmlSerializer serializer = new SimpleHtmlSerializer(htmlCleaner.getProperties());
    StringWriter stringWriter = new StringWriter();
    tagNode.serialize(serializer, stringWriter);
    currentBody = stringWriter.toString();
    if (tagNode.getDocType() != null) {
        currentBody = tagNode.getDocType() + currentBody;
    }
    for (Map.Entry<String, String> entry : plugin.entrySet()) {
        currentBody = currentBody.replace(entry.getKey(), entry.getValue());
    }
    currentBody = currentBody + "<!--" + (System.currentTimeMillis() - startTime) + "ms-->";
    return currentBody;

}