/**
 * **************************************************************************
 *
 * Contributor(s): 
 *	C. Heazel (WiSC): Added Fortify adjudication changes
 *
 ***************************************************************************
 */
package com.occamlab.te.parsers;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.CharArrayWriter;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.net.ssl.SSLProtocolException;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.validation.Schema;
import javax.xml.validation.Validator;

import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;

import com.google.common.collect.ImmutableList;
import com.occamlab.te.ErrorHandlerImpl;
import com.occamlab.te.parsers.xml.CachingSchemaLoader;
import com.occamlab.te.parsers.xml.InMemorySchemaSupplier;
import com.occamlab.te.parsers.xml.XsdSchemaLoader;
import com.occamlab.te.parsers.xml.SchemaSupplier;
import com.occamlab.te.util.DomUtils;
import com.occamlab.te.util.URLConnectionUtils;

/**
 * Validates an XML resource against a set of W3C XML Schema or DTD schemas.
 * 
 */
public class XMLValidatingParser {
	static TransformerFactory TF = null;
	static DocumentBuilderFactory nonValidatingDBF = null;
	static DocumentBuilderFactory schemaValidatingDBF = null;
	static DocumentBuilderFactory dtdValidatingDBF = null;
	ArrayList<SchemaSupplier> schemaList = new ArrayList<>();
	ArrayList<Object> dtdList = new ArrayList<Object>();

	/*
	 * For now we create a new cache per instance of XMLValidatingParser, which
	 * means a new cache per test run. These schemas could be cached for a
	 * longer period than that, but then the question because "how long?" Until
	 * the web app shuts down? Try to obey the caching headers in the HTTP
	 * responses?
	 * 
	 * This solution at least fixes the major performance issue.
	 */
	private final CachingSchemaLoader schemaLoader =
			new CachingSchemaLoader(new XsdSchemaLoader());

	private static final Logger jlogger = Logger
			.getLogger("com.occamlab.te.parsers.XMLValidatingParser");

	private List<Object> loadSchemaList(Document schemaLinks,
			String schemaType) throws Exception {
		NodeList nodes = schemaLinks.getElementsByTagNameNS(
				"http://www.occamlab.com/te/parsers", schemaType);
		if (nodes.getLength() == 0) {
			return Collections.emptyList();
		}
		final ArrayList<Object> schemas = new ArrayList<>();
		for (int i = 0; i < nodes.getLength(); i++) {
			Element e = (Element) nodes.item(i);
			Object schema = null;
			String type = e.getAttribute("type");
			// URL, File, or Resource
			if (type.equals("url")) {
				schema = new URL(e.getTextContent());
			} else if (type.equals("file")) {
				schema = new File(e.getTextContent());
			} else if (type.equals("resource")) {
				ClassLoader cl = getClass().getClassLoader();
				String resource = e.getTextContent();
				URL url = cl.getResource(resource);
				if (url == null) {
					String msg = "Can't find schema resource on classpath at "
							+ resource;
					jlogger.warning(msg);
					throw new Exception(msg);
				}
				schema = url;
			} else {
				throw new Exception("Unknown schema resource type " + type);
			}
			jlogger.finer("Adding schema reference " + schema.toString());
			schemas.add(schema);
		}
		return schemas;
	}

	private void loadSchemaLists(Node schemaLinks, ArrayList<SchemaSupplier> schemas,
			ArrayList<Object> dtds) throws Exception {
		if (null == schemaLinks) {
			return;
		}
		jlogger.finer("Received schemaLinks\n"
				+ DomUtils.serializeNode(schemaLinks));
		Document configDoc;
		if (schemaLinks instanceof Document) {
			configDoc = (Document) schemaLinks;
		} else {
			configDoc = schemaLinks.getOwnerDocument();
		}

		final ArrayList<SchemaSupplier> schemaSuppliers = new ArrayList<>();
		for (final Object schemaObj : loadSchemaList(configDoc, "schema")) {
			schemaSuppliers.add(SchemaSupplier.makeSupplier(schemaObj));
		}
		schemas.addAll(schemaSuppliers);
		dtds.addAll(loadSchemaList(configDoc, "dtd"));

		// If instruction body is an embedded xsd:schema, add it to the
		// ArrayList
		NodeList nodes = configDoc.getElementsByTagNameNS(
				"http://www.w3.org/2001/XMLSchema", "schema");
		for (int i = 0; i < nodes.getLength(); i++) {
			Element e = (Element) nodes.item(i);
			CharArrayWriter caw = new CharArrayWriter();
			Transformer t = TF.newTransformer();
			t.transform(new DOMSource(e), new StreamResult(caw));
			schemas.add(new InMemorySchemaSupplier(caw.toCharArray()));
		}
	}

	public XMLValidatingParser() {

		if (nonValidatingDBF == null) {
			String property_name = "javax.xml.parsers.DocumentBuilderFactory";
			String oldprop = System.getProperty(property_name);
			System.setProperty(property_name,
					"org.apache.xerces.jaxp.DocumentBuilderFactoryImpl");
			nonValidatingDBF = DocumentBuilderFactory.newInstance();
			// Fortify Mod: Disable entity expansion to foil External Entity Injections
			nonValidatingDBF.setExpandEntityReferences(false);
			nonValidatingDBF.setNamespaceAware(true);
			schemaValidatingDBF = DocumentBuilderFactory.newInstance();
			schemaValidatingDBF.setNamespaceAware(true);
			schemaValidatingDBF.setValidating(true);
			schemaValidatingDBF.setAttribute(
					"http://java.sun.com/xml/jaxp/properties/schemaLanguage",
					"http://www.w3.org/2001/XMLSchema");
			dtdValidatingDBF = DocumentBuilderFactory.newInstance();
			dtdValidatingDBF.setNamespaceAware(true);
			dtdValidatingDBF.setValidating(true);
			// Fortify Mod: Disable entity expansion to foil External Entity Injections
			dtdValidatingDBF.setExpandEntityReferences(false);
			if (oldprop == null) {
				System.clearProperty(property_name);
			} else {
				System.setProperty(property_name, oldprop);
			}
		}

		if (TF == null) {
                  // Fortify Mod: prevent external entity injection
			  // includes try block to capture exceptions to setFeature. 
			TF = TransformerFactory.newInstance();
			try {
               	    TF.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
			} catch (Exception e) {
		         jlogger.warning("Failed to secure Transformer");
			}
		}
	}

	public XMLValidatingParser(Document schema_links) throws Exception {
		this();
		if (null != schema_links) {
			loadSchemaLists(schema_links, this.schemaList, this.dtdList);
		}
	}

	/**
	 * Attempts to parse a resource read using the given connection to a URL.
	 * 
	 * @param uc
	 *            A connection for reading from some URL.
	 * @param instruction
	 *            An Element node (ctlp:XMLValidatingParser) containing
	 *            instructions, usually schema references.
	 * @param logger
	 *            A log writer.
	 * @return A Document, or null if the resource could not be parsed.
	 * @throws SSLProtocolException 
	 */
	public Document parse(URLConnection uc, Element instruction,
			PrintWriter logger) throws SSLProtocolException {
		if (null == uc) {
			throw new NullPointerException(
					"Unable to parse resource: URLConnection is null.");
		}
		jlogger.fine("Received URLConnection object for " + uc.getURL());
		Document doc = null;
		try (InputStream inStream = URLConnectionUtils.getInputStream(uc)) {
			doc = parse(inStream, instruction, logger);
		} catch (SSLProtocolException sslep){
			throw new SSLProtocolException("[SSL ERROR] Failed to connect with the requested URL due to \"Invalid server_name\" found!! :" + uc.getURL() +":" + sslep.getClass() +" : "+ sslep.getMessage());
		} catch (Exception e) {
			throw new RuntimeException(
					String.format("Failed to parse resource from %s", uc.getURL()),
					e);
		}
		return doc;
	}

	/**
	 * Parses and validates an XML resource using the given schema references.
	 * 
	 * @param input
	 *            The XML input to parse and validate. It must be either an
	 *            InputStream or a Document object.
	 * @param parserConfig
	 *            An Element
	 *            ({http://www.occamlab.com/te/parsers}XMLValidatingParser)
	 *            containing configuration info. If it is {@code null} or empty
	 *            validation will be performed by using location hints in the
	 *            input document.
	 * @param logger
	 *            The PrintWriter to log all results to
	 * @return {@code null} If any non-ignorable errors or warnings occurred;
	 *         otherwise the resulting Document.
	 * 
	 */
	Document parse(Object input, Element parserConfig, PrintWriter logger)
			throws Exception {
		jlogger.finer("Received XML resource of type "
				+ input.getClass().getName());
		Document resultDoc = null;
		ErrorHandlerImpl errHandler = new ErrorHandlerImpl("Parsing", logger);

		if (input instanceof InputStream) {
			DocumentBuilderFactory dbf = nonValidatingDBF;
			DocumentBuilder db = dbf.newDocumentBuilder();
			db.setErrorHandler(errHandler);
			try (InputStream xmlInput = (InputStream) input) {
				resultDoc = db.parse(xmlInput);
			} catch (Exception e) {
				jlogger.log(Level.INFO, "Error parsing InputStream", e);
			}
		} else if (input instanceof Document) {
			resultDoc = (Document) input;
		} else {
			throw new IllegalArgumentException(
					"XML input must be an InputStream or a Document object.");
		}
		if (null == resultDoc) {
			throw new RuntimeException("Failed to parse input: "
					+ input.getClass().getName());
		}
		errHandler.setRole("Validation");
		validate(resultDoc, parserConfig, errHandler);
		int error_count = errHandler.getErrorCount();
		int warning_count = errHandler.getWarningCount();
		if (error_count > 0 || warning_count > 0) {
			String msg = "";
			if (error_count > 0) {
				msg += error_count + " validation error"
						+ (error_count == 1 ? "" : "s");
				if (warning_count > 0)
					msg += " and ";
			}
			if (warning_count > 0) {
				msg += warning_count + " warning"
						+ (warning_count == 1 ? "" : "s");
			}
			msg += " detected.";
			logger.println(msg);
		}

		if (error_count > 0) {
			String s = (null != parserConfig) ? parserConfig
					.getAttribute("ignoreErrors") : "false";
			if (s.length() == 0 || Boolean.parseBoolean(s) == false) {
				resultDoc = null;
			}
		}

		if (warning_count > 0) {
			String s = (null != parserConfig) ? parserConfig
					.getAttribute("ignoreWarnings") : "true";
			if (s.length() > 0 && Boolean.parseBoolean(s) == false) {
				resultDoc = null;
			}
		}
		return resultDoc;
	}

	/**
	 * A method to validate a pool of schemas outside of the request element.
	 * 
	 * @param Document
	 *            doc The file document to validate
	 * @param Document
	 *            instruction The xml encapsulated schema information (file
	 *            locations)
	 * @return false if there were errors, true if none.
	 * 
	 */
	public boolean checkXMLRules(Document doc, Document instruction)
			throws Exception {

		if (doc == null || doc.getDocumentElement() == null)
			return false;
		Element e = instruction.getDocumentElement();
		PrintWriter logger = new PrintWriter(System.out);
		Document parsedDoc = parse(doc, e, logger);
		return (parsedDoc != null);
	}

	/**
	 * Validates the given document against the schema references supplied in
	 * the accompanying instruction document.
	 * 
	 * @param doc
	 *            The document to be validated.
	 * @param instruction
	 *            A document containing schema references; may be null, in which
	 *            case embedded schema references will be used instead.
	 * @return A list of Element nodes ({@code <error>}) containing error
	 *         messages.
	 * @throws Exception
	 *             If any error occurs.
	 */
	public NodeList validate(Document doc, Document instruction)
			throws Exception {
		return schemaValidation(doc, instruction).toNodeList();
	}

	public Element validateSingleResult(Document doc, Document instruction)
			throws Exception {
		return schemaValidation(doc, instruction).toRootElement();
	}

	XmlErrorHandler schemaValidation(Document doc, Document instruction)
			throws Exception {
		if (doc == null || doc.getDocumentElement() == null) {
			throw new NullPointerException("Input document is null.");
		}
		XmlErrorHandler errHandler = new XmlErrorHandler();
		validate(doc, instruction, errHandler);
		return errHandler;
	}

	/**
	 * Validates the given XML {@link Document} per the given instructions,
	 * recording errors in the given error handler.
	 * 
	 * @param doc must not be null
	 * @param instruction may be null to signify no special instructions
	 * @param errHandler errors will be recorded on this object
	 */
	private void validate(
			final Document doc, final Node instruction, final ErrorHandler errHandler)
			throws Exception {
		ArrayList<SchemaSupplier> schemas = new ArrayList<>();
		ArrayList<Object> dtds = new ArrayList<Object>();
		schemas.addAll(schemaList);
		dtds.addAll(dtdList);
		loadSchemaLists(instruction, schemas, dtds);
		if (null == doc.getDoctype() && dtds.isEmpty()) {
			validateAgainstXMLSchemaList(doc, schemas, errHandler);
		} else {
			validateAgainstDTDList(doc, dtds, errHandler);
		}
	}

	/**
	 * Validates an XML resource against a list of XML Schemas. Validation
	 * errors are reported to the given handler.
	 * 
	 * @param doc
	 *            The input Document node.
	 * @param xsdList
	 *            A list of XML schema references. Must be non-null, but if
	 *            empty, validation will be performed by using location hints
	 *            found in the input document.
	 * @param errHandler
	 *            An ErrorHandler that collects validation errors.
	 * @throws SAXException
	 *             If a schema cannot be read for some reason.
	 * @throws IOException
	 *             If an I/O error occurs.
	 */
	void validateAgainstXMLSchemaList(Document doc, List<SchemaSupplier> xsdList,
			ErrorHandler errHandler) throws SAXException, IOException {
		jlogger.fine("Validating XML resource from " + doc.getDocumentURI()
			+ " with these specified schemas: " + xsdList);
		Schema schema;
		if (!xsdList.isEmpty()) {
			schema = schemaLoader.loadSchema(ImmutableList.copyOf(xsdList));
		} else {
			schema = schemaLoader.defaultSchema();
		}
		Validator validator = schema.newValidator();
		validator.setErrorHandler(errHandler);
		DOMSource source = new DOMSource(doc, doc.getBaseURI());
		validator.validate(source);
	}

	/**
	 * Validates an XML resource against a list of DTD schemas or as indicated by a
	 * DOCTYPE declaration. Validation errors are reported to the given handler. If
	 * no DTD references are provided the external schema reference in the DOCTYPE
	 * declaration is used (Note: an internal subset is ignored).
	 * 
	 * @param doc
	 *            The input Document.
	 * @param dtdList
	 *            A list of DTD schema references. May be empty but not null.
	 * @param errHandler
	 *            An ErrorHandler that collects validation errors.
	 * @throws Exception
	 *             If any errors occur while attempting to validate the document.
	 */
	private void validateAgainstDTDList(Document doc, ArrayList<Object> dtdList,
			ErrorHandler errHandler) throws Exception {
		jlogger.finer("Validating XML resource from " + doc.getDocumentURI());
		DocumentBuilder db = dtdValidatingDBF.newDocumentBuilder();
		db.setErrorHandler(errHandler);
              // Fortify Mod: prevent external entity injection
	         // includes try block to capture exceptions to setFeature.
		TransformerFactory tf = TransformerFactory.newInstance();
		try {
      	        tf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
		    } catch (Exception e) {
		        jlogger.warning("Failed to secure Transformer");
		    }
		// End Fortify Mod
	     Transformer copier = tf.newTransformer();
           ByteArrayOutputStream content = new ByteArrayOutputStream();
		Result copy = new StreamResult(content);
		if (dtdList.isEmpty()) {
			DocumentType doctype = doc.getDoctype();
			if (null == doctype) {
				return;
			}
			URI systemId = URI.create(doctype.getSystemId());
			if (!systemId.isAbsolute() && null != doc.getBaseURI()) {
				systemId = URI.create(doc.getBaseURI()).resolve(systemId);
			}
			copier.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
					systemId.toString());
			copier.transform(new DOMSource(doc), copy);
			db.parse(new ByteArrayInputStream(content.toByteArray()));
		} else {
			for (Object dtdRef : dtdList) {
				content.reset();
				copier.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
						dtdRef.toString());
				copier.transform(new DOMSource(doc), copy);
				db.parse(new ByteArrayInputStream(content.toByteArray()));
			}
		}
	}
}