/*
 * Copyright (C) 2011 ankus (http://www.openankus.org).
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


package org.ankus.crawler.core;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;

import org.ankus.crawler.DEF.ElementDEF;
import org.ankus.crawler.DEF.ScriptDEF;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class ParseHTML {

	/**
	 * get value for structure data
	 * @param idx
	 * @param br
	 * @param sDef
	 * @return
	 */
	public ArrayList<String> getValue(int idx, BufferedReader br, ScriptDEF sDef){
		
		ArrayList<ElementDEF> eList = convertSDEFtoEDEF(idx, sDef);
		return convertBrToDoc(br, eList);
		
	}
	
	/**
	 * get structure value with scriptDef
	 * @param br
	 * @param list
	 * @return
	 */
	public ArrayList<String> convertBrToDoc(BufferedReader br, ArrayList<ElementDEF> list){
		String line;
		StringBuffer buf = new StringBuffer();
		ArrayList<String> retList = new ArrayList<String>();
		
		try {
			while((line = br.readLine())!=null){
				buf.append(line);
				buf.append("\r\n");
			}
			
			br.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		Document doc = Jsoup.parse(buf.toString());
		Elements eList = doc.getAllElements();
		
		int size = list.size();
		
		for(int i = 0 ; i < size ; i++){
			ElementDEF eDef = list.get(i);
			Elements eleList = null;
			if(eDef.ElementType.equals("CLASS")){
				eleList = doc.getElementsByClass(eDef.ElementValue);
			} else if(eDef.ElementType.equals("TAG")){
				eleList = doc.getElementsByTag(eDef.ElementValue);
			}
			
			if(eleList != null){
				if(eDef.ElementValueType.equals("html")){
					retList.add(eDef.ElementName+"\t"+eleList.toString());
				} else if (eDef.ElementValueType.equals("text")){
					retList.add(eDef.ElementName+"\t"+eleList.text());
				}
				
			}
		}
		return retList;
	}
	
	/**
	 * convert scriptdef for html element
	 * @param idx
	 * @param sDef
	 * @return
	 */
	public ArrayList<ElementDEF> convertSDEFtoEDEF(int idx, ScriptDEF sDef){
		HashMap<String, String> map = sDef.sDef.get(idx);
		Object keyList[] = map.keySet().toArray();
		int len = keyList.length;
		
		ArrayList<ElementDEF> list = new ArrayList<ElementDEF>();
		
		for(int i = 0 ; i < len ; i++){
			String key = (String)keyList[i];
			if(!key.equals("addr")){
				String[] valList = map.get(key).split(",");
				
				ElementDEF eDef = new ElementDEF();
				//info-2:CLASS,conStyle_Bold01,title
				
				eDef.ElementType = valList[0];
				eDef.ElementValue = valList[1];
				eDef.ElementName = valList[2];
				eDef.ElementValueType = valList[3];
				
				list.add(eDef);
			}
		}
		
		return list;
	}
}