/**
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.

 **/
 
/**
 This file is part of Save For Offline, an Android app which saves / downloads complete webpages for offine reading.
**/
 
/**
 If you modify, redistribute, or write something based on this or parts of it, you MUST,
 I repeat, you MUST comply with the GPLv2+ license. This means that if you use or modify
 my code, you MUST release the source code of your modified version, if / when this is
 required under the terms of the license.
 
 If you cannot / do not want to do this, DO NOT USE MY CODE. Thanks.
 
 (I've added this message to to the source because it's been used in severeral proprietary
 closed source apps, which I don't want, and which is also a violation of the liense.)
**/

/**
 Written by Jonas Czech (JonasCz, stackoverflow.com/users/4428462/JonasCz and github.com/JonasCz) originally and partially based on https://github.com/PramodKhare/GetMeThatPage/
 with lots of improvements. (4428462jonascz/eafc4d1afq)
 **/

package jonas.tool.saveForOffline;

import com.squareup.okhttp.Cache;
import com.squareup.okhttp.OkHttpClient;
import com.squareup.okhttp.Request;
import com.squareup.okhttp.Response;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class PageSaver {
    private EventCallback eventCallback;

    private OkHttpClient client = new OkHttpClient();
    private final String HTTP_REQUEST_TAG = "TAG";

    private boolean isCancelled = false;
    private Options options = new Options();

    // filesToGrab - maintains all the links to files (eg images, scripts) which we are going to grab/download
    private List<String> filesToGrab = new ArrayList<String>();
    //framesToGrab - list of html frame files to download, as we parse these recursively
    private List<String> framesToGrab = new ArrayList<String>();
    //cssToGrab - list of all css files to download and parse, these need to be parsed to extract urls
    private List<String> cssToGrab = new ArrayList<String>();
	
    private String title = "";
	private String pageIconUrl = "";

    private String indexFileName = "index.html";

    private final Pattern fileNameReplacementPattern = Pattern.compile("[^a-zA-Z0-9-_\\.]");

    public Options getOptions() {
        return this.options;
    }

    public String getPageTitle () {
        return this.title;
    }

    public PageSaver(EventCallback callback) {
        this.eventCallback = callback;
		
		client.setConnectTimeout(20, TimeUnit.SECONDS);
		client.setReadTimeout(20, TimeUnit.SECONDS);
		client.setWriteTimeout(20, TimeUnit.SECONDS);
		
		client.setFollowRedirects(true);
		client.setFollowSslRedirects(true);
    }

    public void cancel() {
        this.isCancelled = true;
        client.cancel(HTTP_REQUEST_TAG);
    }
	
	public void resetState () {
		filesToGrab.clear();
		framesToGrab.clear();
		cssToGrab.clear();
		
		title = "";
		pageIconUrl = "";
		isCancelled = false;
	}

    public boolean isCancelled () {
        return this.isCancelled;
    }

    public boolean getPage(String url, String outputDirPath, String indexFilename) {

        this.indexFileName = indexFilename;

        File outputDir = new File(outputDirPath);

        if (!outputDir.exists() && outputDir.mkdirs() == false) {
            eventCallback.onFatalError(new IOException("File " + outputDirPath + "could not be created"), url);
            return false;
        }

        //download main html and parse -- isExtra parameter should be false
        boolean success = downloadHtmlAndParseLinks(url, outputDirPath, false);
        if (isCancelled || !success) {
			return false;
		}

        //download and parse html frames - use iterator because our list may be modified as frames can contain other frames
		for (Iterator<String> i = framesToGrab.iterator(); i.hasNext();) {
			downloadHtmlAndParseLinks(i.next(), outputDirPath, true);
			if (isCancelled) return true;
		}

        //download and parse css files
		for (Iterator<String> i = cssToGrab.iterator(); i.hasNext();) {
			if (isCancelled) return true;
            downloadCssAndParse(i.next(), outputDirPath);
		}
		
		ThreadPoolExecutor pool = new ThreadPoolExecutor(Runtime.getRuntime().availableProcessors(), Runtime.getRuntime().availableProcessors(), 60, TimeUnit.SECONDS, new BlockingDownloadTaskQueue<Runnable>());
		
		for (Iterator<String> i = filesToGrab.iterator(); i.hasNext();) {
			if (isCancelled) {
				eventCallback.onProgressMessage("Cancelling...");
				shutdownExecutor(pool, 10, TimeUnit.SECONDS);
				return success;
			}
			
			String urlToDownload = i.next();
			
            eventCallback.onProgressMessage("Saving file: " + getFileName(urlToDownload));
            eventCallback.onProgressChanged(filesToGrab.indexOf(urlToDownload), filesToGrab.size(), false);
			
			pool.submit(new DownloadTask(urlToDownload, outputDir));
		}
		pool.submit(new DownloadTask(pageIconUrl, outputDir, "saveForOffline_icon.png"));
		
		eventCallback.onProgressMessage("Finishing file downloads...");
		shutdownExecutor(pool, 60, TimeUnit.SECONDS);
		
		return success;
    }

    private boolean downloadHtmlAndParseLinks(final String url, final String outputDir, final boolean isExtra) {
        //isExtra should be true when saving a html frame file.
        String filename;

        if (isExtra) {
            filename = getFileName(url);
        } else {
            filename = indexFileName;
        }

        String baseUrl = url;
        if (url.endsWith("/")) {
            baseUrl = url + filename;
        }

        try {
			eventCallback.onProgressMessage(isExtra ? "Getting HTML frame file: " + filename : "Getting main HTML file");
            String htmlContent = getStringFromUrl(url);
			eventCallback.onProgressMessage(isExtra ? "Processing HTML frame file: " + filename: "Processing main HTML file");
            htmlContent = parseHtmlForLinks(htmlContent, baseUrl);

			eventCallback.onProgressMessage(isExtra ? "Saving HTML frame file: " + filename: "Saving main HTML file");
            File outputFile = new File(outputDir, filename);
            saveStringToFile(htmlContent, outputFile);
            return true;

        } catch (IOException | IllegalStateException e) {
			if (isExtra) {
				eventCallback.onError(e);
			} else {
				eventCallback.onFatalError(e, url);
			}
			e.printStackTrace();
            return false;
        }
    }

    private void downloadCssAndParse(final String url, final String outputDir) {

        String filename = getFileName(url);
        File outputFile = new File(outputDir, filename);

        try {
			eventCallback.onProgressMessage("Getting CSS file: " + filename);
            String cssContent = getStringFromUrl(url);
			
			eventCallback.onProgressMessage("Processing CSS file: " + filename);
            cssContent = parseCssForLinks(cssContent, url);
			
			eventCallback.onProgressMessage("Saving CSS file: " + filename);
            saveStringToFile(cssContent, outputFile);
        } catch (IOException e) {
			eventCallback.onError(e);
            e.printStackTrace();
        }
    }

    private class DownloadTask implements Runnable {

        private String url;
        private File outputDir;
		private String fileName;

        public DownloadTask(String url, File toPath) {
            this.url = url;
            this.outputDir = toPath;
        }
		
		public DownloadTask(String url, File toPath, String fileName) {
            this.url = url;
            this.outputDir = toPath;
			this.fileName = fileName;
        }

        @Override
        public void run() {
			if (fileName == null) {
				fileName = getFileName(url);
			}
			
            File outputFile = new File(outputDir, fileName);

            Request request = new Request.Builder()
                    .url(url)
                    .addHeader("User-Agent", getOptions().getUserAgent())
                    .tag(HTTP_REQUEST_TAG)
                    .build();

            try {
                Response response = client.newCall(request).execute();
                InputStream is = response.body().byteStream();
				
                FileOutputStream fos = new FileOutputStream(outputFile);
                final byte[] buffer = new byte[1024 * 32]; // read in batches of 32K
                int length;
                while ((length = is.read(buffer)) != -1) {
                    fos.write(buffer, 0, length);
                }

                response.body().close();
                fos.flush();
                fos.close();
                is.close();

            } catch (IllegalArgumentException | IOException e) {
				IOException ex = new IOException("File download failed, URL: " + url + ", Output file path: " + outputFile.getPath());
				
				if (isCancelled) {
					ex.initCause(new IOException("Save was cancelled, isCancelled is true").initCause(e));
					eventCallback.onError(ex);
				} else {
					eventCallback.onError(ex.initCause(e));
				}
            }
        }
    }

    private String getStringFromUrl(String url) throws IOException, IllegalStateException {
        Request request = new Request.Builder()
                .url(url)
                .addHeader("User-Agent", getOptions().getUserAgent())
				.tag(HTTP_REQUEST_TAG)
                .build();
        Response response = client.newCall(request).execute();
        String out = response.body().string();
        response.body().close();
        return out;
    }

    private void saveStringToFile(String ToSave, File outputFile) throws IOException {

        if (outputFile.exists()) {
            return;
        }

        outputFile.createNewFile();

        FileOutputStream fos = new FileOutputStream(outputFile);
        fos.write(ToSave.getBytes());

        fos.flush();
        fos.close();
    }

    private String parseHtmlForLinks(String htmlToParse, String baseUrl) {
        //get all links from this webpage and add them to LinksToVisit ArrayList
        Document document = Jsoup.parse(htmlToParse, baseUrl);
        document.outputSettings().escapeMode(Entities.EscapeMode.extended);
		
		if (title.isEmpty()) {
			title = document.title();
			eventCallback.onPageTitleAvailable(title);
		}
		
		if (pageIconUrl.isEmpty()) {
			eventCallback.onProgressMessage("Getting icon...");
			pageIconUrl = FaviconFetcher.getInstance().getFaviconUrl(document);
		}
		
		eventCallback.onProgressMessage("Processing HTML...");

        String urlToGrab;

        Elements links;

        if (getOptions().saveFrames()) {
            links = document.select("frame[src]");
            eventCallback.onLogMessage("Got " + links.size() + " frames");
            for (Element link : links) {
                urlToGrab = link.attr("abs:src");
                addLinkToList(urlToGrab, framesToGrab);
                String replacedURL = getFileName(urlToGrab);
                link.attr("src", replacedURL);
            }

            links = document.select("iframe[src]");
            eventCallback.onLogMessage("Got " + links.size() + " iframes");
            for (Element link : links) {
                urlToGrab = link.attr("abs:src");

                addLinkToList(urlToGrab, framesToGrab);

                String replacedURL = getFileName(urlToGrab);
                link.attr("src", replacedURL);
            }
        }

        if (getOptions().saveOther()) {
            // Get all the links
            links = document.select("link[href]");
            eventCallback.onLogMessage("Got " + links.size() + " link elements with a href attribute");
            for (Element link : links) {
                urlToGrab = link.attr("abs:href");

                //if it is css, parse it later to extract urls (images referenced from "background" attributes for example)
                if (link.attr("rel").equals("stylesheet")) {
                    cssToGrab.add(link.attr("abs:href"));
                } else {
                    addLinkToList(urlToGrab, filesToGrab);
                }

                String replacedURL = getFileName(urlToGrab);
                link.attr("href", replacedURL);
            }

            //get links in embedded css also, and modify the links to point to local files
            links = document.select("style[type=text/css]");
            eventCallback.onLogMessage("Got " + links.size() + " embedded stylesheets, parsing CSS");
            for (Element link : links) {
                String cssToParse = link.data();
                String parsedCss = parseCssForLinks(cssToParse, baseUrl);
                if (link.dataNodes().size() != 0) {
                    link.dataNodes().get(0).setWholeData(parsedCss);
                }
            }
			
			//get input types with an image type
			links = document.select("input[type=image]");
			eventCallback.onLogMessage("Got " + links.size() + " input elements with type = image");
			for (Element link : links) {
                urlToGrab = link.attr("abs:src");
                addLinkToList(urlToGrab, filesToGrab);
                String replacedURL = getFileName(urlToGrab);
                link.attr("src", replacedURL);
            }
			
			//get everything which has a background attribute
			links = document.select("[background]");
			eventCallback.onLogMessage("Got " + links.size() + " elements with a background attribute");
			for (Element link : links) {
                urlToGrab = link.attr("abs:src");
                addLinkToList(urlToGrab, filesToGrab);
                String replacedURL = getFileName(urlToGrab);
                link.attr("src", replacedURL);
            }

            links = document.select("[style]");
            eventCallback.onLogMessage("Got " + links.size() + " elements with a style attribute, parsing CSS");
            for (Element link : links) {
                String cssToParse = link.attr("style");
                String parsedCss = parseCssForLinks(cssToParse, baseUrl);
                link.attr("style", parsedCss);
            }

        }

        if (getOptions().saveScripts()) {
            links = document.select("script[src]");
            eventCallback.onLogMessage("Got " + links.size() + " script elements");
            for (Element link : links) {
                urlToGrab = link.attr("abs:src");
                addLinkToList(urlToGrab, filesToGrab);
                String replacedURL = getFileName(urlToGrab);
                link.attr("src", replacedURL);
            }
        }

        if (getOptions().saveImages()) {
            links = document.select("img[src]");
            eventCallback.onLogMessage("Got " + links.size() + " image elements");
            for (Element link : links) {
                urlToGrab = link.attr("abs:src");
                addLinkToList(urlToGrab, filesToGrab);

                String replacedURL = getFileName(urlToGrab);
                link.attr("src", replacedURL);
				link.removeAttr("srcset"); //we don't use this for now, so remove it.
            }
			
			links = document.select("img[data-canonical-src]");
            eventCallback.onLogMessage("Got " + links.size() + " image elements, w. data-canonical-src");
            for (Element link : links) {
                urlToGrab = link.attr("abs:data-canonical-src");
                addLinkToList(urlToGrab, filesToGrab);

                String replacedURL = getFileName(urlToGrab);
                link.attr("data-canonical-src", replacedURL);
				link.removeAttr("srcset"); //we don't use this for now, so remove it.
            }
        }

        if (getOptions().saveVideo()) {
            //video src is sometimes in a child element
            links = document.select("video:not([src])");
            eventCallback.onLogMessage("Got " + links.size() + " video elements without src attribute");
            for (Element link : links.select("[src]")) {
                urlToGrab = link.attr("abs:src");
                addLinkToList(urlToGrab, filesToGrab);

                String replacedURL = getFileName(urlToGrab);
                link.attr("src", replacedURL);
            }

            links = document.select("video[src]");
            eventCallback.onLogMessage("Got " + links.size() + " video elements");
            for (Element link : links) {
                urlToGrab = link.attr("abs:src");
                addLinkToList(urlToGrab, filesToGrab);

                String replacedURL = getFileName(urlToGrab);
                link.attr("src", replacedURL);
            }
        }

        if (getOptions().makeLinksAbsolute()) {
            //make links absolute, so they are not broken
            links = document.select("a[href]");
            eventCallback.onLogMessage("Making " + links.size() + " links absolute");
            for (Element link : links) {
                String absUrl = link.attr("abs:href");
                link.attr("href", absUrl);
            }
        }
        return document.outerHtml();
    }

    private String parseCssForLinks(String cssToParse, String baseUrl) {

        String patternString = "url(\\s*\\(\\s*['\"]*\\s*)(.*?)\\s*['\"]*\\s*\\)"; //I hate regexes...

        Pattern pattern = Pattern.compile(patternString);
        Matcher matcher = pattern.matcher(cssToParse);

        eventCallback.onLogMessage("Parsing CSS");

        //find everything inside url(" ... ")
        while (matcher.find()) {
            if (matcher.group().replaceAll(patternString, "$2").contains("/")) {
                cssToParse = cssToParse.replace(matcher.group().replaceAll(patternString, "$2"), getFileName(matcher.group().replaceAll(patternString, "$2")));

            }
            addLinkToList(matcher.group().replaceAll(patternString, "$2").trim(), baseUrl, filesToGrab);
        }

        // find css linked with @import  -  needs testing
        //todo: test this to see if it actually works
        String importString = "@(import\\s*['\"])()([^ '\"]*)";
        pattern = Pattern.compile(importString);
        matcher = pattern.matcher(cssToParse);
        matcher.reset();
		
        while (matcher.find()) {
            if (matcher.group().replaceAll(patternString, "$2").contains("/")) {
                cssToParse = cssToParse.replace(matcher.group().replaceAll(patternString, "$2"), getFileName(matcher.group().replaceAll(patternString, "$2")));
            }
            addLinkToList(matcher.group().replaceAll(patternString, "$2").trim(), baseUrl, cssToGrab);
        }
        return cssToParse;
    }
	
	private boolean isLinkValid (String url) {
		if (url == null || url.length() == 0) {
			return false;
		} else if (!url.startsWith("http")) {
			return false;
		} else {
			return true;
		}
	}

    private void addLinkToList(String link, List<String> list) {
        if (!isLinkValid(link) || list.contains(link)) {
			return;
		} else {
			list.add(link);
		}
    }
	
	private void addLinkToList(String link, String baseUrl, List<String> list) {
		if (link.startsWith("data:image")) {
			return;
		}
		try {
			URL u = new URL(new URL(baseUrl), link);
			link = u.toString();
		} catch (MalformedURLException e) {
			return;
		}
		
		if (!isLinkValid(link) || list.contains(link)) {
			return;
		} else {
			list.add(link);
		}
    }

    private String getFileName(String url) {

        String filename = url.substring(url.lastIndexOf('/') + 1);
		
		if (filename.trim().length() == 0) {
			filename = String.valueOf(url.hashCode());
		}

        if (filename.contains("?")) {
            filename = filename.substring(0, filename.indexOf("?")) + filename.substring(filename.indexOf("?") + 1).hashCode();
        }

        filename = fileNameReplacementPattern.matcher(filename).replaceAll("_");
        filename = filename.substring(0, Math.min(200, filename.length()));
        ;

        return filename;
    }
	
	private void shutdownExecutor (ExecutorService e, int waitTime, TimeUnit waitTimeUnit) {
		e.shutdown();
		try {
            if (!e.awaitTermination(waitTime, waitTimeUnit)) {
				eventCallback.onError("Executor pool did not termimate after " + waitTime + " " + waitTimeUnit.toString() +", doing shutdownNow()");
				e.shutdownNow();
			}
        } catch (InterruptedException ie) {
            eventCallback.onError(ie);
        }
	}
	
	private class BlockingDownloadTaskQueue<E> extends SynchronousQueue<E> {
		public BlockingDownloadTaskQueue () {
			super();
		}
		
		@Override
		public boolean offer (E e) {
			try {
				put(e);
				return true;
			} catch (InterruptedException ie) {
				Thread.currentThread().interrupt();
				eventCallback.onError(ie);
				
				return false;
			}
		}
	}
	
	class Options {
        private boolean makeLinksAbsolute = true;

        private boolean saveImages = true;
        private boolean saveFrames = true;
        private boolean saveOther = true;
        private boolean saveScripts = true;
        private boolean saveVideo = false;

        private String userAgent = " ";

		public void setCache (File cacheDirectory, long maxCacheSize) {
			Cache cache = (new Cache(cacheDirectory, maxCacheSize));
			client.setCache(cache);
		}

		public void clearCache() throws IOException {
			client.getCache().evictAll();
		}

        public String getUserAgent() {
            return userAgent;
        }

        public void setUserAgent(final String userAgent) {
            this.userAgent = userAgent;
        }

        public boolean makeLinksAbsolute() {
            return makeLinksAbsolute;
        }

        public void makeLinksAbsolute(final boolean makeLinksAbsolute) {
            this.makeLinksAbsolute = makeLinksAbsolute;
        }

        public boolean saveImages() {
            return saveImages;
        }

        public void saveImages(final boolean saveImages) {
            this.saveImages = saveImages;
        }

        public boolean saveFrames() {
            return saveFrames;
        }

        public void saveFrames(final boolean saveFrames) {
            this.saveFrames = saveFrames;
        }

        public boolean saveScripts() {
            return saveScripts;
        }

        public void saveScripts(final boolean saveScripts) {
            this.saveScripts = saveScripts;
        }

        public boolean saveOther() {
            return saveOther;
        }

        public void saveOther(final boolean saveOther) {
            this.saveOther = saveOther;
        }

        public boolean saveVideo() {
            return saveVideo;
        }

        public void saveVideo(final boolean saveVideo) {
            this.saveVideo = saveVideo;
        }
    }
}

interface EventCallback {
    public void onProgressChanged(int progress, int maxProgress, boolean indeterminate);

    public void onProgressMessage(String fileName);
	
	public void onPageTitleAvailable (String pageTitle);

    public void onLogMessage (String message);

    public void onError(Throwable error);
	
	public void onError(String errorMessage);
	
	public void onFatalError (Throwable error, String pageUrl);
}