/* * Copyright 2015 herd contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.finra.herd.core; import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.StringEscapeUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Entities; import org.jsoup.safety.Cleaner; import org.jsoup.safety.Whitelist; /** * HerdStringUtils */ public class HerdStringUtils { // Regex to check for CSV Injection. Any text that starts with "+", "=", "@", or "-" will be vulnerable to CSV Injection attack. private static final String CSV_INJECTION_REGEX = "^[+=@-].*"; // Hidden text content public static final String HIDDEN_TEXT = "hidden"; // Regex to check json password pattern, password wrapped in " or "\", like "jdbc.password":"password1" private static Pattern REGEX_JSON_PASSWORD = Pattern.compile("(\\\\?\".*?password\\\\?\":\\\\?\")[\\w\\p{Punct}&&[^&]]*?(\\\\?\")"); // Regex to check json password pattern, like "name": "password", "value": "password1" private static Pattern REGEX_JSON_PASSWORD2 = Pattern.compile("(\"name\": \".*?password\", \"value\": \")[\\w\\p{Punct}&&[^&]]*?\""); // Regex to check xml password pattern, like "<password>password1</password> private static Pattern REGEX_XML_PASSWORD = Pattern.compile("(<.*?password>)[\\w\\p{Punct}&&[^&]]*?<"); /** * Decodes and return the base64 encoded string. * * @param base64EncodedText the base64 encoded string * * @return the decoded string */ public static String decodeBase64(String base64EncodedText) { return StringUtils.toEncodedString(Base64.getDecoder().decode(base64EncodedText.getBytes(StandardCharsets.UTF_8)), StandardCharsets.UTF_8); } /** * Truncates the description field to a configurable value thereby producing a 'short description' * * @param description the specified description * @param shortDescMaxLength the short description maximum length * * @return truncated (short) description */ public static String getShortDescription(String description, Integer shortDescMaxLength) { // Parse out only html tags, truncate and return // Do a partial HTML parse just in case there are some elements that don't have ending tags or the like String toParse = description != null ? description : ""; return StringUtils.left(stripHtml(toParse), shortDescMaxLength); } /** * Strips HTML tags from a given input String, allows some tags to be retained via a whitelist * * @param fragment the specified String * @param whitelistTags the specified whitelist tags * * @return cleaned String with allowed tags */ public static String stripHtml(String fragment, String... whitelistTags) { // Unescape HTML. String unEscapedFragment = StringEscapeUtils.unescapeHtml4(fragment); // Parse out html tags except those from a given list of whitelist tags Document dirty = Jsoup.parseBodyFragment(unEscapedFragment); Whitelist whitelist = new Whitelist(); for (String whitelistTag : whitelistTags) { // Get the actual tag name from the whitelist tag // this is vulnerable in general to complex tags but will suffice for our simple needs whitelistTag = StringUtils.removePattern(whitelistTag, "[^\\{IsAlphabetic}]"); // Add all specified tags to the whitelist while preserving inline css whitelist.addTags(whitelistTag).addAttributes(whitelistTag, "class"); } Cleaner cleaner = new Cleaner(whitelist); Document clean = cleaner.clean(dirty); // Set character encoding to UTF-8 and make sure no line-breaks are added clean.outputSettings().escapeMode(Entities.EscapeMode.base).charset(StandardCharsets.UTF_8).prettyPrint(false); // return 'cleaned' html body return clean.body().html(); } /** * Check if the text is vulnerable to CSV Injection attack. * * @param text the text * @param errorMessage the error message in the exception when CVS Injection check fails */ public static void checkCsvInjection(String text, String errorMessage) { if (StringUtils.isNotEmpty(text) && text.matches(CSV_INJECTION_REGEX)) { throw new IllegalArgumentException(errorMessage); } } /** * Convert a <code>String</code> to an <code>Integer</code>, returning a default value if the string is blank.</p> * * @param stringValue the string to convert, may be null * @param defaultValue the default value * * @return the Integer represented by the string, or the default if the string is blank */ public static Integer convertStringToInteger(final String stringValue, final Integer defaultValue) { if (StringUtils.isNotBlank(stringValue)) { try { return Integer.parseInt(stringValue); } catch (final NumberFormatException e) { throw new IllegalArgumentException(String.format("Failed to convert \"%s\" value to %s.", stringValue, Integer.class.getName()), e); } } else { return defaultValue; } } /** * Sanitize log text by replacing the password * * @param loggingText logging text * @return sanitized text */ public static String sanitizeLogText(String loggingText) { String sanitizedText = (loggingText != null) ? loggingText : ""; sanitizedText = sanitizedText.replaceAll(""", "\""); sanitizedText = REGEX_JSON_PASSWORD.matcher(sanitizedText).replaceAll("$1" + HIDDEN_TEXT + "$2"); sanitizedText = REGEX_JSON_PASSWORD2.matcher(sanitizedText).replaceAll("$1" + HIDDEN_TEXT + "\""); sanitizedText = REGEX_XML_PASSWORD.matcher(sanitizedText).replaceAll("$1" + HIDDEN_TEXT + "<"); return sanitizedText; } }