package com.socrata.datasync;

import com.socrata.datasync.config.controlfile.FileTypeControl;
import com.socrata.datasync.config.userpreferences.UserPreferences;

import java.awt.Desktop;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.UUID;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.entity.ContentType;
import org.apache.http.Header;

public class Utils {

    private static final String VERSION_API_ENDPOINT = "/api/version.json";
    private static final String X_SOCRATA_REGION = "X-Socrata-Region";

    public static final String BOM = "\uFEFF";

     * Get file extension from the given path to a file
     * @param file filename
     * @return
    public static String getFileExtension(String file) {
        String extension = "";
        int i = file.lastIndexOf('.');
        if (i > 0)
            extension = file.substring(i+1).toLowerCase();
        return extension;

    public static String getFilename(String path) {
        return Paths.get(path).getFileName().toString();

     * Returns a random 32 character request id
    public static String generateRequestId() {
        String uuid = UUID.randomUUID().toString();
        String requestId = uuid.replace("-", "");
        return requestId;

    public static String capitalizeFirstLetter(String s) {
        return s.substring(0, 1).toUpperCase()
                + s.substring(1);

     * @param uid to validate
     * @return true if given uid is a valid Socrata uid (e.g. abcd-1234)
    public static boolean uidIsValid(String uid) {
        Matcher uidMatcher = Pattern.compile("[a-z0-9]{4}-[a-z0-9]{4}").matcher(uid);
        return uidMatcher.matches();

     * Reads first line of the given file after skipping 'skip' lines and returns it's contents as a string array.
    public static String[] pullHeadersFromFile(File fileToPublish, FileTypeControl fileControl, int skip)
            throws IOException {

        CSVReader reader = getReader(fileToPublish, fileControl);

        int linesRead = 0;
        String[] nextRecord;
        Charset charset = getCharset(fileControl);
        while ((nextRecord = reader.readNext()) != null && linesRead++ < skip) {}
        byte[] bom = BOM.getBytes(charset);
        if (nextRecord != null && nextRecord.length > 0) {
            byte[] firstStringBytes = nextRecord[0].getBytes(charset);
            if (startsWith(bom, firstStringBytes))
                nextRecord[0] = nextRecord[0].substring(1);
        return nextRecord;

    public static Charset getCharset(FileTypeControl fileControl) {
        Charset charset;
        try {
            charset = Charset.forName(fileControl.encoding);
        } catch (Exception e) {
            charset = Charset.defaultCharset();
        return charset;

    /** returns whether the source array starts with the prefix array
     * @param prefix the byte array containing the prefix
     * @param source the byte array of the source
    private static boolean startsWith(byte[] prefix, byte[] source) {
        if (prefix.length > source.length) {
            return false;
        } else {
            for (int i = 0; i < prefix.length; i++) {
                if (prefix[i] != source[i]) {
                    return false;
            return true;

     * Reads first line of the given file and determines whether the UTF-8 bom marker is present.
    public static boolean fileStartsWithBom(File fileToPublish, FileTypeControl fileControl) throws IOException {
        Charset charset = getCharset(fileControl);
        if (charset.newEncoder().canEncode('\uffef')) {
            byte bom[] = BOM.getBytes(charset);
            FileInputStream is = null;
            try {
                is = new FileInputStream(fileToPublish);
                byte startingBytes[] = new byte[bom.length];
                int result =;
                if (result != -1) {
                    return startsWith(bom, startingBytes);
                } else {
                    return false;
            } finally {
                if (is != null)
        } else {
            return false;

     * Sets up a csvReader.
    private static CSVReader getReader(File fileToPublish, FileTypeControl fileControl)
            throws IOException {

        String separator = fileControl.separator;
        if (separator == null)
            separator = Utils.getFileExtension(fileToPublish.getName()).equals("csv") ? "," : "\t";

        String quote = fileControl.quote == null ? "\"" : fileControl.quote;
        String escape = fileControl.escape == null ? "\u0000" : fileControl.escape;

        return new CSVReader(new FileReader(fileToPublish), separator.charAt(0), quote.charAt(0), escape.charAt(0), 0);

     * Open given uri in local web browser
     * @param uri to open in browser
    public static void openWebpage(URI uri) {
        Desktop desktop = Desktop.isDesktopSupported() ? Desktop.getDesktop() : null;
        if (desktop != null && desktop.isSupported(Desktop.Action.BROWSE)) {
            try {
            } catch (Exception e) {
                System.out.println("Error: cannot open web page");

     * @param pathToSaveJobFile path to a saved job file
     * @return command with absolute paths to execute job file at given path
    public static String getRunJobCommand(String pathToSaveJobFile) {
        String jarPath = Main.class.getProtectionDomain().getCodeSource().getLocation().getPath();
        try {
            jarPath = URLDecoder.decode(jarPath, "UTF-8");
            // Needed correct issue with windows where path includes a leading slash
            if(jarPath.contains(":") && (jarPath.startsWith("/") || jarPath.startsWith("\\"))) {
                jarPath = jarPath.substring(1, jarPath.length());
            //TODO: This may change based on how we implement running metadata jobs from the command line.
            return "java -jar \"" + jarPath + "\" \"" + pathToSaveJobFile + "\"";
        } catch (UnsupportedEncodingException unsupportedEncoding) {
            return "Error getting path to this executeable: " + unsupportedEncoding.getMessage();

    public static int readChunk(InputStream in, byte[] buffer, int offset, int length) throws IOException {
        // isn't guaranteed to read all the bytes requested in one go.
        int initialOffset = offset;
        while(length > 0) {
            int count =, offset, length);
            if(count == -1) break;
            offset += count;
            length -= count;
        if(offset == initialOffset && length != 0) return -1;
        return offset - initialOffset;

    public static String ordinal(int i) {
        return i % 100 == 11 || i % 100 == 12 || i % 100 == 13 ? i + "th" : i + new String[]{"th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th"}[i % 10];

    public static boolean nullOrEmpty(String s) {
        return (s == null || s.equals(""));

    public static String getUserAgentString(String agentName) {
        try {
            String osName = System.getProperty("");
            String osVersion = System.getProperty("os.version");
            String javaVersion = System.getProperty("java.version");
            String userLocale = System.getProperty("") + "-" + System.getProperty("user.language");
            return "DataSync/" + VersionProvider.getThisVersion() +
                    " (" + agentName + "; " + osName + " " + osVersion + "; Java " + javaVersion + "; " + userLocale + ")";
        } catch (Exception e) {
            return "DataSync/" + VersionProvider.getThisVersion() +
                    " (" + agentName + "; Error obtaining OS/Java/Locale info)";

    public static String regionOfDomain(UserPreferences userPrefs, String domain) throws URISyntaxException, IOException {
        HttpUtility http = new HttpUtility(userPrefs, false);
        URI versionApiUri = new URI("https://" + DatasetUtils.getDomainWithoutScheme(domain) + VERSION_API_ENDPOINT);
        try(CloseableHttpResponse response = http.get(versionApiUri, ContentType.APPLICATION_JSON.getMimeType())) {
            Header[] headers = response.getHeaders(X_SOCRATA_REGION);
            if(headers.length == 0) {
                return "development";
            } else {
                return headers[0].getValue();

    public static String[] commaSplit(String s) {
        List<String> result = new ArrayList<String>();
        s = s.trim();

        StringBuilder sb = new StringBuilder();
        for(int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            if(c == '\\' && i != s.length() - 1) { // if the backslash is the last character, we'll just treat it as a literal
            } else if(c == ',') {
                sb = new StringBuilder();
            } else {
        String finalString = sb.toString().trim();
        if(!result.isEmpty() || finalString.length() != 0) {
        return result.toArray(new String[result.size()]);

    public static String commaJoin(String[] ss) {
        return commaJoin(Arrays.asList(ss));

    public static String commaJoin(List<String> ss) {
        StringBuilder sb = new StringBuilder();
        boolean didOne = false;
        for(String s : ss) {
            if(didOne) sb.append(", ");
            else didOne = true;

            s = s.trim();
            if(s.contains(",") || s.contains("\\")) {
                for(int i = 0; i < s.length(); ++i) {
                    char c = s.charAt(i);
                    if(c == ',' || c == '\\') sb.append('\\');
            } else {
        return sb.toString();