/*
 * Copyright (C) 2015 hu
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
package cn.edu.hfut.dmic.webcollector.util;

import cn.edu.hfut.dmic.webcollector.model.Page;
import java.io.File;
import java.net.URL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * FileSystemOutput并不属于WebCollector内核,它只是实现一个 简单的输出,将网页根据url路径,保存到本地目录,按照网站目录
 * 结构来存储网站内容。BreadthCrawler的visit函数中,默认使用
 * FileSystemOutput来保存网页。不推荐使用FileSystemOutput来 存储网页
 *
 * @author hu
 */
public class FileSystemOutput {

    public static final Logger LOG = LoggerFactory.getLogger(FileSystemOutput.class);

    protected String root;

    public FileSystemOutput(String root) {
        this.root = root;
    }

    public void output(Page page) {
        try {
            URL _URL = new URL(page.url());
            String query = "";
            if (_URL.getQuery() != null) {
                query = "_" + _URL.getQuery();
            }
            String path = _URL.getPath();
            if (path.length() == 0) {
                path = "index.html";
            } else {
                if (path.endsWith("/")) {
                    path = path + "index.html";
                } else {
                    int lastSlash = path.lastIndexOf("/");
                    int lastPoint = path.lastIndexOf(".");
                    if (lastPoint < lastSlash) {
                        path = path + ".html";
                    }
                }
            }
            path += query;
            File domain_path = new File(root, _URL.getHost());
            File f = new File(domain_path, path);
            FileUtils.write(f, page.content());
            LOG.info("output " + f.getAbsolutePath());
        } catch (Exception ex) {
            LOG.info("Exception", ex);
        }
    }
    

}