package it.unimi.di.law.warc.records;

/*
 * Copyright (C) 2013-2017 Paolo Boldi, Massimo Santini, and Sebastiano Vigna
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// RELEASE-STATUS: DIST

import it.unimi.di.law.warc.io.WarcFormatException;

import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Locale;
import java.util.UUID;

import org.apache.commons.lang.time.FastDateFormat;
import org.apache.http.Header;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.HeaderGroup;

/**
 * A class used to represent WARC headers, with a set of static methods to handle them.
 */
@SuppressWarnings("serial")
public class WarcHeader extends BasicHeader {

	/** An enumeration of WARC headers. */
	public static enum Name {

		/* Mandatory */

		WARC_RECORD_ID("WARC-Record-ID"), // set by AbstractWarcRecord constructor
		WARC_DATE("WARC-Date"),			// set by AbstractWarcRecord constructor
		CONTENT_LENGTH("Content-Length"),	// set by AbstractWarcRecord.write
		WARC_TYPE("WARC-Type"),			// set by subclasses of AbstractWarcRecord

		/* Depending on type/case */

		CONTENT_TYPE("Content-Type"),
		WARC_CONCURRENT_TO(	"WARC-Concurrent-To"),
		WARC_BLOCK_DIGEST("WARC-Block-Digest"),
		WARC_PAYLOAD_DIGEST("WARC-Payload-Digest"),
		WARC_IP_ADDRESS("WARC-IP-Address"),
		WARC_REFERS_TO("WARC-Refers-To"),
		WARC_TARGET_URI("WARC-Target-URI"),						// set in HttpRequestWarcRecord and HttpResponseWarcRecord
		WARC_TRUNCATED("WARC-Truncated"),
		WARC_WARCINFO_ID("WARC-Warcinfo-ID"),
		WARC_IDENTIFIED_PAYLOAD_TYPE("WARC-Identified-Payload-Type"),
		WARC_SEGMENT_NUMBER("WARC-Segment-Number"),

		WARC_FILENAME("WARC-Filename"),							// only if warcinfo
		WARC_PROFILE("WARC-Profile"),								// only if revisit
		WARC_SEGMENT_ORIGIN_ID("WARC-Segment-Origin-ID"),			// only if continuation
		WARC_SEGMENT_TOTAL_LENGTH("WARC-Segment-Total-Length"),	// only if continuation

		/* BUbiNG headers */

		BUBING_GUESSED_CHARSET("BUbiNG-Guessed-Charset"),
		BUBING_IS_DUPLICATE("BUbiNG-Is-Duplicate");

		protected final String value;

		Name(final String value) {
			this.value = value;
		}

		@Override
		public String toString() {
			return this.value;
		}
	};

	private final static DateFormat W3C_ISO8601_DATE_PARSE = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT);
	private final static FastDateFormat W3C_ISO8601_DATE_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd'T'HH:mm:ss'Z'");
	private final static String UUID_HEAD = "<urn:uid:";
	private final static int UUID_HEAD_LENGTH = UUID_HEAD.length();
	private final static String UUID_TAIL = ">";
	private final static int UUID_TAIL_LENGTH = UUID_TAIL.length();
	private final static String UUID_FORMAT = "<urn:uid:%s>";

	/** Creates a WARC header.
	 *
	 * @param name the header name.
	 * @param value the header value.
	 */
	public WarcHeader(final WarcHeader.Name name, final String value) {
		super(name.value, value);
	}

	/**
	 * Adds the given header, if not present (otherwise does nothing).
	 *
	 * @param headers the headers where to add the new one.
	 * @param name the name of the header to add.
	 * @param value the value of the header to add.
	 */
	public static void addIfNotPresent(final HeaderGroup headers, final WarcHeader.Name name, final String value) {
		if (! headers.containsHeader(name.value)) headers.addHeader(new WarcHeader(name, value));
	}

	/**
	 * Returns the first header of given name.
	 *
	 * @param headers the headers to search from.
	 * @param name the name of the header to lookup.
	 * @return the header.
	 */
	public static Header getFirstHeader(final HeaderGroup headers, final WarcHeader.Name name) {
		return headers.getFirstHeader(name.value);
	}

	/**
	 * Parses the date found in a {@link WarcHeader.Name#WARC_DATE} header.
	 *
	 * @param date the date.
	 * @return the parsed date.
	 */
	public static Date parseDate(final String date) throws WarcFormatException {
		try {
			synchronized (W3C_ISO8601_DATE_PARSE) {
				return W3C_ISO8601_DATE_PARSE.parse(date);
			}
		} catch (ParseException e) {
			throw new WarcFormatException("Error parsing date " + date, e);
		}
	}

	/**
	 * Formats the date to be written in the {@link WarcHeader.Name#WARC_DATE} header.
	 *
	 * @param calendar the date.
	 * @return the formatted date.
	 */
	public static String formatDate(final Calendar calendar) {
		return W3C_ISO8601_DATE_FORMAT.format(calendar);
	}

	/**
	 * Parses the date found in a {@link WarcHeader.Name#WARC_RECORD_ID} header.
	 *
	 * @param id the record id.
	 * @return the parsed record id.
	 */
	public static UUID parseId(final String id) throws WarcFormatException {
		if (! (id.startsWith(UUID_HEAD) && id.endsWith(UUID_TAIL))) throw new WarcFormatException("'" + id + "' wrong format for " + Name.WARC_RECORD_ID.value);
		final int len = id.length();
		UUID uuid;
		try {
			uuid = UUID.fromString(id.substring(UUID_HEAD_LENGTH, len - UUID_TAIL_LENGTH));
		} catch (IllegalArgumentException e) {
			throw new WarcFormatException("Error parsing uuid " + id, e);
		}
		return uuid;
	}

	/**
	 * Formats the record id to be written in the {@link WarcHeader.Name#WARC_RECORD_ID} header.
	 *
	 * @param id the record id.
	 * @return the formatted record id.
	 */
	public static String formatId(final UUID id) {
		return String.format(UUID_FORMAT, id.toString());
	}
}