/*
 * Copyright 2015-2016 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.springframework.cloud.stream.app.twitter.sentiment.processor;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.concurrent.ConcurrentHashMap;

/**
 * @author Christian Tzolov
 */
public class WordVocabulary implements AutoCloseable {

	/**
	 *  This contains the word vocabulary used to train the TensorFlow model.
	 */
	private final ConcurrentHashMap<String, Integer> vocabulary;

	public WordVocabulary(InputStream vocabularyInputStream) throws IOException {
		vocabulary = buildVocabulary(vocabularyInputStream);
	}

	public int[][] vectorizeSentence(String sentence) {
		int[][] vectorizedText = new int[1][128];
		String[] words = clearText(sentence).split(" ");
		for (int i = 0; i < words.length; i++) {
			Integer v = vocabulary.get(words[i]);
			vectorizedText[0][i] = (v != null)? v : 0;
		}
		return vectorizedText;
	}

	private ConcurrentHashMap<String, Integer> buildVocabulary(InputStream input) throws IOException {

		ConcurrentHashMap<String, Integer> vocabulary = new ConcurrentHashMap<>();

		try (BufferedReader buffer = new BufferedReader(new InputStreamReader(input))) {
			String l = buffer.readLine();
			while (l != null ) {
				String p[] = l.split(",");
				if (p[1].length() > 1) {
					vocabulary.put(p[0], Integer.valueOf(p[1]));
				}
				l = buffer.readLine();
			}
		}
		return vocabulary;
	}

	private String clearText(String sentence) {
		return sentence
				.trim()
				.replaceAll("[^A-Za-z0-9(),!?\\'\\`]", " ")
				.replaceAll("(.)\\1+", "\\1\\1")
				.replaceAll("\\'s", " \\'s")
				.replaceAll("\\'ve", " \\'ve")
				.replaceAll("n\\'t", " n\\'t")
				.replaceAll("\\'re", " \\'re")
				.replaceAll("\\'d", " \\'d")
				.replaceAll("\\'ll", " \\'ll")
				.replaceAll(",", " , ")
				.replaceAll("!", " ! ")
				.replaceAll("\\(", " \\( ")
				.replaceAll("\\)", " \\) ")
				.replaceAll("\\?", " \\? ")
				.replaceAll("\\s{2,}", " ")
				.toLowerCase();
	}

	@Override
	public void close() throws Exception {
		vocabulary.clear();
	}
}