/**
 * 
 */
package org.howsun.util;


import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
import javax.swing.text.html.parser.ParserDelegator;
import static java.lang.System.out;
/**
 * 说明:<br>
 * 从文本串中获取图片<br>
 * 调用示例:
 * StringReader sr = new StringReader(src);
 * AnalizeWebParse parse = new AnalizeWebParse();
 * List<String> results = parse.parse(sr);
 * 其中src就是需要检测的字符串,它可以Textarea控件传过来的,也可以是从文件中读取的,也可以从互联网上抓取的等等。
 * results就是在字符串中分析出的图片结果集合。
 * 
 * 
 * @author 张纪豪
 * @version 0.1
 * Build Time Apr 10, 2009
 */
public class AnalizeWebParse extends ParserCallback {

	//String regex = "^(http://.+)";//有的图片URL是绝对路径,如http://image.sohu.com/2009/10/10/6278481.jpg
	String regex = "^(.+)";

	List<String> imgs = new ArrayList<String>();

	boolean start = false;
	boolean finished = false;

	public void p(String s) {
		out.println(s);
	}

	public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribs, int pos) {

		if (finished == true) {
			return;
		}

		if (start == false) {
			if (tag == HTML.Tag.DIV) {
				String cla = (String) attribs.getAttribute(HTML.Attribute.CLASS);
				if (cla == null) {
					return;
				}

				if (cla.indexOf("body") != -1) {
					// Start
					start = true;
				}
			}
		}
	}

	public void handleEndTag(HTML.Tag tag, int pos) {
		if (tag == HTML.Tag.DIV && start == true && finished == false) {
			finished = true;
		}
	}

	public void handleText(char[] text, int pos) {

	}

	public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
		if (t == HTML.Tag.IMG) {
			// get a src
			String src = (String) a.getAttribute(HTML.Attribute.SRC);
			if (src == null) {
				return;
			}

			if (Pattern.matches(regex, src)) {
				imgs.add(src);
			}
		}
	}

	//public String parse(BufferedReader file) throws Exception {
	public List<String> parse(Reader file) throws Exception {
		if (file == null) {
			return null;
		}

		ParserDelegator pd = new ParserDelegator();
		try {
			pd.parse(file, this, true);
		} catch (Exception e) {
			throw e;
		}

		return imgs;
	}
}

/*
 * 
 * 调用,并将结果保存到文件中
 * 
 * public class AnalizeIMG {

	public void p(String s) {
		System.out.println(s);
	}

	public void analizeFile(String infile, String outfile) throws Exception {
		File file = new File(infile);
		if (file == null || !file.exists()) {
			p("File " + infile + " not exits !");
		}

		if (!file.canRead()) {
			p("File " + infile + " can't read !");

		}

		FileReader frd = new FileReader(infile);
		BufferedReader bufferedReader = new BufferedReader(frd);
		try {
			AnalizeWebParse parse = new AnalizeWebParse();
			List<String> s = parse.parse(bufferedReader);
			System.out.println(s);
			createFile(outfile, s.toString());

		} catch (Exception ex) {
			throw ex;
		} finally {
			frd.close();
			bufferedReader.close();
		}
	}

	private void createFile(String filename, String content) {
		FileWriter f = null;
		try {
			f = new FileWriter(filename);
			if (f == null || content == null) {
				return;
			}

			f.write(content);
			f.flush();
			f.close();

		} catch (Exception e) {

		} finally {
			if (f != null) {
				try {
					f.close();
				} catch (Exception e) {

				}
			}
		}
	}

	public static void main(String arg[]) {
		AnalizeIMG ana = new AnalizeIMG();
		try {
			ana.analizeFile("E:\\1.txt", "E:\\out.lst");
		} catch (Exception ex) {
			ex.printStackTrace();
		}
	}
}
*/