/*  Data Analysis with Java
 *  John R. Hubbard
 *  Aug 4, 2017
 */

package dawj.ch11;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;

public class Example1 {
    public static void main(String[] args) {
        try {
            File tempFile = new File("/data/Temp.dat");
            map("data/sonnets/", 80, tempFile);

            Map<String,StringBuilder> hashTable = new HashMap(2500);
            combine(tempFile, hashTable);

            File outFile = new File("/data/Output.dat");
            reduce(hashTable, outFile);
        } catch (IOException e) {
            System.err.println(e);
        }
    }

    public static void map(String src, int n, File temp) throws IOException {
        PrintWriter writer = new PrintWriter(temp);
        for (int i = 0; i < n; i++) {
            String filename = String.format("%sSonnet%03d.txt", src, i+1);
            map(filename, writer);
        }
        writer.close();
    }

    public static void combine(File temp, Map<String,StringBuilder> table) 
            throws IOException {
        Scanner scanner = new Scanner(temp);
        while (scanner.hasNext()) {
            String word = scanner.next();
            StringBuilder value = table.get(word);
            if (value == null) {
                value = new StringBuilder("");
            }
            table.put(word, value.append(" 1"));
            scanner.nextLine();  // scan past the rest of the line (a "1")
        }
        scanner.close();
    }
        
    public static void reduce(Map<String,StringBuilder> table, File out) 
            throws IOException {
        PrintWriter writer = new PrintWriter(out);
        for (Map.Entry<String, StringBuilder> entry : table.entrySet()) {
            String key = entry.getKey();  // e.g., "speak"
            String value = entry.getValue().toString();  // e.g., "1 1 1 1 1"
            reduce(key, value, writer);
        }
        writer.close();
    }

    
    /*  Writes the pair (word, 1) for each word in the specified file.
    */
    public static void map(String filename, PrintWriter writer) 
            throws IOException {
        Scanner input = new Scanner(new File(filename));
        input.useDelimiter("[.,:;()?!\"\\s]+");
        while (input.hasNext()) {
            String word = input.next();
            writer.printf("%s 1%n", word.toLowerCase());
        }
        input.close();
    }

    /*  Counts the 1s in the value argument and writes (key, count) to file.
    */
    public static void reduce(String key, String value, PrintWriter writer)
            throws IOException {
        int count = (value.length() + 1)/2;  // e.g. "1 1 1 1 1" => 5
        writer.printf("%s %d%n", key, count);
    }
    
    private static void sort(File file) throws IOException {
        Scanner input = new Scanner(file);
        List<String> list = new ArrayList();
        while (input.hasNext()) {
            list.add(input.nextLine());
        }
        input.close();
        Collections.sort(list);
        PrintWriter output = new PrintWriter(file);
        for (String string : list) {
            output.println(string);
        }
        output.close();
    }
}