package ch.epfl.bbp.uima.xml;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.Maps.newHashMap;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;

import ch.epfl.bbp.uima.XmlHelper;
import ch.epfl.bbp.uima.utils.ConceptFileWriter.Concept;

import com.google.common.collect.Sets;

/**
 * @author [email protected]
 * 
 * 
 *         <pre>
 * <neuroName xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 *     <concept brainInfoID="1" cNIDType="h" cNID="-2063754382" standardName="superior frontopolar gyrus" standardAcronym="FPGS">
 *         <brainInfoURL>http://braininfo.rprc.washington.edu/centraldirectory.aspx?ID=1</brainInfoURL>
 *         <synonyms>
 *             <synonym synonymLanguage="English" pubMedHits="0">superior frontopolar gyrus</synonym>
 *             <synonym synonymLanguage="English" pubMedHits="0">superior transverse frontopolar gyrus</synonym>
 *             <synonym synonymLanguage="Latin" pubMedHits="0">Gyrus frontopolaris transversus superior</synonym>
 *         </synonyms>
 * </pre>
 */
public class NeuronamesXmlParser {

	public static Map<String, Concept> parse() throws JDOMException,
			IOException {
		return parse(new File(XmlHelper.XML_RESOURCES
				+ "neuronames/NeuroNames.xml"));
	}

	public static Map<String, Concept> parse(File f) throws JDOMException,
			IOException {

		Map<String, Concept> concepts = newHashMap();

		InputStream corpusIs = new FileInputStream(f);

		SAXBuilder builder = new SAXBuilder();
		Document doc = builder.build(corpusIs);
		Element rootNode = doc.getRootElement();

		Iterator<Element> cs = rootNode.getChildren().iterator();
		while (cs.hasNext()) {
			Element concept = cs.next();

			String id = concept.getAttributeValue("brainInfoID");
			String canonical = concept.getAttributeValue("standardName");
			canonical = canonical.replaceAll("\\(.*\\)$", "").trim();

			Set<String> variantStrings = Sets.newHashSet();

			List<Element> variants = concept.getChild("synonyms").getChildren();
			for (Element variant : variants) {
				if (variant.getAttribute("synonymLanguage").getValue()
						.matches("Latin|English")) {

					variantStrings.add(variant.getText()
							.replaceAll("\\(.*\\)$", "").trim());
				}
			}
			concepts.put(canonical, new Concept(canonical, id, variantStrings));
		}
		checkArgument(concepts.size() > 0, "empty concepts!");
		return concepts;
	}
}