package nfa.transitionlabel;

import java.util.LinkedList;
import java.util.NoSuchElementException;
import java.util.Scanner;
import java.util.regex.PatternSyntaxException;

import util.RangeSet;
import util.RangeSet.Range;

/*
 * TODO: \Q ... \E For some reason Java throws an PatternSyntaxException: Unclosed character class on [\Q\E]
 * \ uhhhh (remove space)
 * (\Uhhhhhhhh not supported)
 * \0ooo There needn't be 3 o's 
 */

public class TransitionLabelParserRecursive {
	private static final int MIN_16UNICODE = 0;
	private static final int MAX_16UNICODE = 65536;

	private final Scanner labelScanner;
	private final String transitionLabelString;
	private String currentSymbol;
	private int index;
	private int depth;

	private CharacterPropertyParser characterPropertyParser;

	public TransitionLabelParserRecursive(String transitionLabelString) {
		this.transitionLabelString = transitionLabelString;
		this.labelScanner = new Scanner(transitionLabelString);
		labelScanner.useDelimiter("");
		this.index = 0;
		this.depth = 0;
	}

	private boolean consumeSymbol() {
		
		try {
			currentSymbol = labelScanner.next();
			index++;
		} catch (NoSuchElementException nse) {
			return false;
		}
		return true;
	}

	private void consumeSymbolIfHasNext() {
		if (labelScanner.hasNext()) {
			consumeSymbol();
		}
	}

	@SuppressWarnings("fallthrough")
	public TransitionLabel parseTransitionLabel() {
		TransitionLabel toReturn;
		RangeSet labelRanges;
		consumeSymbol();
		switch (currentSymbol) {
		case ".":
			labelRanges = CharacterClassTransitionLabel.predefinedRangeWildcard();
			toReturn = new CharacterClassTransitionLabel(labelRanges);
			break;
		case "[":
			/* parse character class */
			labelRanges = parseCharacterClass();
			if (depth != 0) {
				throw new PatternSyntaxException("Unclosed character class", transitionLabelString, index);
			}
			toReturn = new CharacterClassTransitionLabel(labelRanges);
			break;
		case "\\":
			/* parse predefined character class, or backslash */
			consumeSymbol();
			if (currentSymbol.equals("\\")) {
				toReturn = new CharacterClassTransitionLabel("\\");
			} else if (currentSymbol.equals("-")) {
				toReturn = new CharacterClassTransitionLabel("-");
			} else {
				RangeSet predefinedCharacterClassRangeSet = parsePredefinedCharacterClass(currentSymbol);
				toReturn = new CharacterClassTransitionLabel(predefinedCharacterClassRangeSet);
			}

			break;
		default:
			/*
			 * parse character, we send the entire string, for epsilon
			 * subscripts
			 */
			if (transitionLabelString.matches("ε\\d+")) {
				toReturn = new EpsilonTransitionLabel(transitionLabelString);
			} else {
				toReturn = new CharacterClassTransitionLabel(transitionLabelString);
			}

		}
		labelScanner.close();
		return toReturn;
	}
	@SuppressWarnings("fallthrough")
	private RangeSet parsePredefinedCharacterClass(String firstSymbol) {
		RangeSet toReturn = null;
		boolean complement = false;
		char c;
		switch (firstSymbol) {
		case "a":
			consumeSymbolIfHasNext();
			return parseCharacterRange("" + ((char) 7));
		case "e":
			consumeSymbolIfHasNext();
			return parseCharacterRange("" + ((char) 27));
		case "f":
			consumeSymbolIfHasNext();
			return parseCharacterRange("\f");
		case "n":
			consumeSymbolIfHasNext();
			return parseCharacterRange("\n");
		//case "b":
		//	consumeSymbolIfHasNext();
		//	return parseCharacterRange("\b");
		case "r":
			consumeSymbolIfHasNext();
			return parseCharacterRange("\r");
		case "t":
			consumeSymbolIfHasNext();
			return parseCharacterRange("\t");
		case "\\":
			consumeSymbolIfHasNext();
			return parseCharacterRange("\\");
		case "\'":
			consumeSymbolIfHasNext();
			return parseCharacterRange("\'");
		case "\"":
			consumeSymbolIfHasNext();
			return parseCharacterRange("\"");
		case "[":
			consumeSymbolIfHasNext();
			return parseCharacterRange("[");
		case "]":
			consumeSymbolIfHasNext();
			return parseCharacterRange("]");
		case "-":
			consumeSymbolIfHasNext();
			return parseCharacterRange("-");
		case "Q":
			return parseQuotedSequence();
		case "0":
			c = parseEscapedOctalCharacter();
			return parseCharacterRange("" + c);
		case "u":
			c = parseEscapedUnicodeCharacter();
			consumeSymbolIfHasNext();
			return parseCharacterRange("" + c);
		case "x":
			c = parseEscapedHexCharacter();
			consumeSymbolIfHasNext();
			return parseCharacterRange("" + c);
		case "c":
			consumeSymbol();
			int charCode = (((currentSymbol.charAt(0) - '@') % 128 + 128) % 128);
			c = (char) charCode;		
			consumeSymbol();
			return parseCharacterRange("" + c);
		case "D":
			/* predefined class: non-digits */
			complement = true;
		case "d":
			/* predefined class: digits */
			toReturn = CharacterClassTransitionLabel.predefinedRangeSetDigits();
			break;
		case "S":
			/* predefined class: non-whitespace */
			complement = true;
		case "s":
			/* predefined class: whitespace */
			toReturn = CharacterClassTransitionLabel.predefinedRangeSetWhiteSpaces();
			break;
		case "W":
			/* predefined class: non-word */
			complement = true;
		case "w":
			/* predefined class: word */
			toReturn = CharacterClassTransitionLabel.predefinedRangeSetWordCharacters();
			break;
		case "V":
			complement = true;
		case "v":
			/* predefined class: vertical tab */
			toReturn = CharacterClassTransitionLabel.predefinedRangeSetVerticalTab();
			break;
		case "H":
			complement = true;
		case "h":
			toReturn = CharacterClassTransitionLabel.predefinedRangeSetHorizontalTab();
			break;
		case "P":
			complement = true;
		case "p":
			toReturn = parsePropertyCharacterClass();
			break;
		default:
			/*
			 * TODO: It seems any symbol except letters and numbers can be
			 * escaped, (other than those for predefined character classes, or
			 * escape characters)
			 */
			if (currentSymbol.matches("[A-Za-z0-9]")) {
				throw new PatternSyntaxException("Illegal/unsupported escape sequence", transitionLabelString, index);
			} else {
				String symbol = currentSymbol;
				consumeSymbol();
				return parseCharacterRange(symbol);
			}

		}
		consumeSymbolIfHasNext();

		if (complement) {
			toReturn.complement();
		}
		return toReturn;
	}
	
	private RangeSet parseQuotedSequence() {
		RangeSet toReturn = new RangeSet(MIN_16UNICODE, MAX_16UNICODE);
		consumeSymbol();
		LinkedList<Range> symbolSequence = new LinkedList<Range>();
		String lastChar = currentSymbol;
		
		while (true) {
			if (currentSymbol.equals("\\")) {
				if (!consumeSymbol()) {
					throw new PatternSyntaxException("Unclosed character class", transitionLabelString, index);
				}
				if (currentSymbol.equals("E")) {
					if (!consumeSymbol()) {
						throw new PatternSyntaxException("Unclosed character class", transitionLabelString, index);
					}
					break;
				} else {
					symbolSequence.add(toReturn.createRange((int) "\\".charAt(0)));
				}
			}
			symbolSequence.add(toReturn.createRange((int) currentSymbol.charAt(0)));
			lastChar = currentSymbol;
			if (!consumeSymbol()) {
				throw new PatternSyntaxException("Unclosed character class", transitionLabelString, index);
			}
		}
		toReturn.union(symbolSequence);
		if (currentSymbol.equals("-")) {
			toReturn.union(parseCharacterRange(lastChar));
		}
		
		return toReturn;
	}
	
	private char parseEscapedOctalCharacter() {
		consumeSymbol();
		StringBuilder hexNumberStr = new StringBuilder();
		
		int i = 0;
		/* Read octal symbols until larger than allowed max up to a maximum of three characters */
		int tmpNum = 0;
		while (tmpNum < 0377 && currentSymbol.matches("[0-7]") && i < 3) {
			
			hexNumberStr.append(currentSymbol);			
			tmpNum = Integer.parseInt(hexNumberStr.toString(), 8);
			i++;
			if (!consumeSymbol()) {
				break;
			}
		}

		try {
			int hexNumber = Integer.parseInt(hexNumberStr.toString(), 8);

			if (hexNumber >= MAX_16UNICODE) {
				throw new PatternSyntaxException("Hexadecimal codepoint is too big", transitionLabelString, index);
			}
			return ((char) hexNumber);
		} catch (NumberFormatException nfe) {
			throw new PatternSyntaxException("Illegal hexadecimal escape sequence", transitionLabelString, index);
		}
	}
	
	private char parseEscapedUnicodeCharacter() {
		consumeSymbol();
		StringBuilder hexNumberStr = new StringBuilder();
		/* Read next four symbols as hex number */
		hexNumberStr.append(currentSymbol);
		for (int i = 0; i < 4; i++) {
			consumeSymbol();
			hexNumberStr.append(currentSymbol);			
			
		}

		try {
			int hexNumber = Integer.parseInt(hexNumberStr.toString(), 16);

			if (hexNumber >= MAX_16UNICODE) {
				throw new PatternSyntaxException("Hexadecimal codepoint is too big", transitionLabelString, index);
			}
			return ((char) hexNumber);
		} catch (NumberFormatException nfe) {
			throw new PatternSyntaxException("Illegal hexadecimal escape sequence", transitionLabelString, index);
		}
	}
	
	private char parseEscapedHexCharacter() {
		consumeSymbol();
		StringBuilder hexNumberStr = new StringBuilder();
		if (currentSymbol.equals("{")) {
			/* read until } is found */
			consumeSymbol();

			while (!currentSymbol.equals("}")) {
				hexNumberStr.append(currentSymbol);
				consumeSymbol();
			}
		} else {
			/* Read next two symbols as hex number */
			hexNumberStr.append(currentSymbol);
			consumeSymbol();
			hexNumberStr.append(currentSymbol);

		}
		try {
			int hexNumber = Integer.parseInt(hexNumberStr.toString(), 16);

			if (hexNumber >= MAX_16UNICODE) {
				throw new PatternSyntaxException("Hexadecimal codepoint is too big", transitionLabelString, index);
			}
			return ((char) hexNumber);
		} catch (NumberFormatException nfe) {
			throw new PatternSyntaxException("Illegal hexadecimal escape sequence", transitionLabelString, index);
		}
	}

	private RangeSet parsePropertyCharacterClass() {
		if (characterPropertyParser == null) {
			characterPropertyParser = new CharacterPropertyParser(transitionLabelString, index);
		} else {
			characterPropertyParser.setIndex(index);
		}

		RangeSet toReturn;

		consumeSymbol();
		if (!currentSymbol.equals("{")) {
			/* Single character character properties */
			toReturn = characterPropertyParser.parseCharacterProperty(currentSymbol);

		} else {
			StringBuilder sb = new StringBuilder();
			consumeSymbol(); /* eating the '{' */
			while (!currentSymbol.equals("}")) {
				sb.append(currentSymbol);
				consumeSymbol();
			}
			String characterPropertyString = sb.toString();
			toReturn = characterPropertyParser.parseCharacterProperty(characterPropertyString);
		}
		return toReturn;
	}

	private RangeSet parseCharacterClass() {
		depth++;
		/* eating [ */
		if (!consumeSymbol())  {
			throw new PatternSyntaxException("Unclosed character class", transitionLabelString, index);
		}
		boolean isComplement = false;
		if (currentSymbol.equals("^")) {
			isComplement = true;
			consumeSymbol(); /* eating ^ */
		}

		RangeSet characterClassRangeSet = new RangeSet(MIN_16UNICODE, MAX_16UNICODE);
		if (currentSymbol.equals("]")) {
			/*
			 * since we make the assumption that empty character classes i.e. []
			 * are not allowed, we treat ] as a literal character.
			 */
			characterClassRangeSet.union(createCharacterRange("]"));
			if (!consumeSymbol()) {
				throw new PatternSyntaxException("Unclosed character class", transitionLabelString, index);
			}
		}

		/*
		 * We, unlike Java, require that there must be at least one factor in
		 * the Character Class
		 */
		/* The ^ only applies to the first factor */
		characterClassRangeSet.union(parseCharacterClassFactor(characterClassRangeSet, isComplement));

		while (!currentSymbol.equals("]")) {
			/*
			 * this might be a problem, but the parseCharacterClassFactor will
			 * have to parse the &&
			 */
			RangeSet currentFactor = parseCharacterClassFactor(new RangeSet(MIN_16UNICODE, MAX_16UNICODE), false);
			characterClassRangeSet.intersection(currentFactor);
		}
		
		depth--;
		if (labelScanner.hasNext()) {
			consumeSymbol();
		} else if (depth != 0) {
			throw new PatternSyntaxException("Unclosed character class", transitionLabelString, index);
		}
		return characterClassRangeSet;
	}

	/* leaves after a && or on a ] */
	private RangeSet parseCharacterClassFactor(RangeSet characterClassFactorRangeSet, boolean isComplement) {

		boolean factorComplete = false;
		while (!factorComplete) {

			if (currentSymbol.equals("[")) {
				if (isComplement) {
					/* ^ only applies to first term of first factor */
					isComplement = false;
					/* ^ does not work if [ is directly after it */
					if (!characterClassFactorRangeSet.isEmpty()) { 
						characterClassFactorRangeSet.complement();
					}
				}
				RangeSet currentFactor = parseCharacterClass();
				characterClassFactorRangeSet.union(currentFactor);
			} else if (currentSymbol.equals("]")) {
				if (isComplement) {
					characterClassFactorRangeSet.complement();
				}
				/* leaving the ] for the parseCC to consume */
				factorComplete = true; 
			} else if (currentSymbol.equals("&")) {
				consumeSymbol(); /* eating the first & */

				if (currentSymbol.equals("&")) {

					/* we found &&, end of factor */
					factorComplete = true;
					if (isComplement) {
						characterClassFactorRangeSet.complement();
					}
					consumeSymbol(); /* eating the second & */
				} else {
					/* parsing the eaten & */
					characterClassFactorRangeSet.union(parseCharacterRange("&")); 
				}
			} else if (currentSymbol.equals("\\")) {
				consumeSymbol();
				/*
				 * for some reason predefined character classes do not count as
				 * nested character classes...
				 */
				characterClassFactorRangeSet.union(parsePredefinedCharacterClass(currentSymbol));

			} else {
				String firstSymbol = currentSymbol;
				consumeSymbol();
				characterClassFactorRangeSet.union(parseCharacterRange(firstSymbol));
			}
		}
		return characterClassFactorRangeSet;
	}

	/* firstSymbol is the symbol before currentSymbol */
	private RangeSet parseCharacterRange(String firstSymbol) {
		RangeSet characterRangeRangeSet;
		if (currentSymbol.equals("-")) {
			if (labelScanner.hasNext()) {
				consumeSymbol();
				if (currentSymbol.equals("\\")) {
					consumeSymbol();
					switch (currentSymbol) {
					case "a":
						characterRangeRangeSet = createCharacterRange(firstSymbol, "" + ((char) 7));
						consumeSymbol();
						break;
					case "e":
						characterRangeRangeSet = createCharacterRange(firstSymbol, "" + ((char) 27));
						consumeSymbol();
						break;
					case "f":
						characterRangeRangeSet = createCharacterRange(firstSymbol, "\f");
						consumeSymbol();
						break;
					case "n":
						characterRangeRangeSet = createCharacterRange(firstSymbol, "\n");
						consumeSymbol();
						break;
					case "r":
						characterRangeRangeSet = createCharacterRange(firstSymbol, "\r");
						consumeSymbol();
						break;
					case "t":
						characterRangeRangeSet = createCharacterRange(firstSymbol, "\t");
						consumeSymbol();
						break;
					case "[":
						characterRangeRangeSet = createCharacterRange(firstSymbol, "[");
						consumeSymbol();
						break;
					case "]":
						characterRangeRangeSet = createCharacterRange(firstSymbol, "]");
						consumeSymbol();
						break;
					case "\\":
						characterRangeRangeSet = createCharacterRange(firstSymbol, "\\");
						consumeSymbol();
						break;
					case "-":
						characterRangeRangeSet = createCharacterRange(firstSymbol, "-");
						consumeSymbol();
						break;
					case "0":
						char c = parseEscapedOctalCharacter();
						characterRangeRangeSet = createCharacterRange(firstSymbol, "" + c);
						break;
					case "u":
						c = parseEscapedUnicodeCharacter();
						characterRangeRangeSet = createCharacterRange(firstSymbol, "" + c);
						consumeSymbol();
						break;
					case "x":
						c = parseEscapedHexCharacter();
						characterRangeRangeSet = createCharacterRange(firstSymbol, "" + c);
						consumeSymbol();
						break;
					case "c":
						consumeSymbol();
						int charCode = (((currentSymbol.charAt(0) - '@') % 128 + 128) % 128);
						c = (char) charCode;		
						consumeSymbol();
						characterRangeRangeSet =  createCharacterRange(firstSymbol, "" + c);
						consumeSymbol();
						break;
					default:
						if (currentSymbol.matches("[A-Za-z0-9]")) {
							throw new PatternSyntaxException("Illegal character range", transitionLabelString, index);
						} else {
							characterRangeRangeSet = createCharacterRange(firstSymbol, currentSymbol);
							consumeSymbol();
						}
						
					}

				} else if (currentSymbol.equals("]") || currentSymbol.equals("[")) {
					characterRangeRangeSet = createCharacterRange(firstSymbol);
					characterRangeRangeSet.union(createCharacterRange("-"));
				} else {
					characterRangeRangeSet = createCharacterRange(firstSymbol, currentSymbol);
					consumeSymbol();
				}
			} else {
				throw new PatternSyntaxException("Illegal character range", transitionLabelString, index);
			}

		} else if (currentSymbol.equals("\\")) {

			consumeSymbol();
			characterRangeRangeSet = createCharacterRange(firstSymbol);
			characterRangeRangeSet.union(parsePredefinedCharacterClass(currentSymbol));
		} else {
			characterRangeRangeSet = createCharacterRange(firstSymbol);
		}

		return characterRangeRangeSet;
	}

	private RangeSet createCharacterRange(String symbol) {
		int currentSymbolInt = (int) symbol.charAt(0);
		RangeSet characterRangeSet = new RangeSet(MIN_16UNICODE, MAX_16UNICODE);
		Range characterRange = characterRangeSet.createRange(currentSymbolInt, currentSymbolInt + 1);
		characterRangeSet.union(characterRange);
		return characterRangeSet;
	}

	private RangeSet createCharacterRange(String symbol1, String symbol2) {
		int currentSymbolInt1 = (int) symbol1.charAt(0);
		int currentSymbolInt2 = (int) symbol2.charAt(0);
		RangeSet characterRangeRangeSet = new RangeSet(MIN_16UNICODE, MAX_16UNICODE);
		try {
			Range characterRange = characterRangeRangeSet.createRange(currentSymbolInt1, currentSymbolInt2 + 1);
			characterRangeRangeSet.union(characterRange);
			return characterRangeRangeSet;
		} catch (IllegalArgumentException iae) {
			throw new PatternSyntaxException("Illegal character range", transitionLabelString, index);
		}

	}

	public static void main(String[] args) {
		TransitionLabelParserRecursive tpr = new TransitionLabelParserRecursive(args[0]);
		TransitionLabel parseTransitionLabel = tpr.parseTransitionLabel();
		System.out.println(parseTransitionLabel);
	}
}