/*  Data Analysis with Java
 *  John R. Hubbard
 *  July 12, 2017
 */

package dawj.ch09;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.Scanner;

public class Filter1 {
    private static int m;  //  number of users
    private static int n;  //  number of items

    public static void main(String[] args) {
        File purchasesFile = new File("data/Purchases1.dat");
        File utilityFile = new File("data/Utility1.dat");
        File similarityFile = new File("data/Similarity1.dat");
        try {
            int[][] u = computeUtilityMatrix(purchasesFile);
            storeUtilityMatrix(u, utilityFile);
            double[][] s = computeSimilarityMatrix(u);
            storeSimilarityMatrix(s, similarityFile);
        } catch (FileNotFoundException e) {
            System.err.println(e);
        }
    }
    
    public static int[][] computeUtilityMatrix(File file) 
            throws FileNotFoundException {
        Scanner in = new Scanner(file);
        //  Read the five header lines:
        m = in.nextInt();  in.nextLine();
        n = in.nextInt();  in.nextLine();
        in.nextLine();  in.nextLine();  in.nextLine();

        //  Read in the utility matrix:
        int[][] u = new int[m+1][n+1];
        while (in.hasNext()) {
            int i = in.nextInt();  // user
            int j = in.nextInt();  // item
            u[i][j] = 1;
        }
        in.close();
        return u;
    }

    public static void storeUtilityMatrix(int[][] u, File file)
            throws FileNotFoundException {
        PrintWriter out = new PrintWriter(file);
        out.printf("%d users%n", m);
        out.printf("%d items%n", n);
        for (int i = 1; i <= m; i++) {
            for (int j = 1; j <= n; j++) {
                out.printf("%2d", u[i][j]);
            }
            out.println();
        }
        out.close();
    }
    
    public static double[][] computeSimilarityMatrix(int[][] u) {
        double[][] s = new double[n+1][n+1];
        for (int j = 1; j <= n; j++) {
            for (int k = 1; k <= n; k++) {
                s[j][k] = cosine(u, j, k);
            }
        }
        return s;
    }

    public static void storeSimilarityMatrix(double[][] s, File file)
            throws FileNotFoundException {
        PrintWriter out = new PrintWriter(file);
        out.printf("%d items%n", n);
        for (int i = 1; i <= n; i++) {
            for (int j = 1; j <= n; j++) {
                out.printf("%6.2f", s[i][j]);
            }
            out.println();
        }
        out.close();
    }

    /*  Returns the cosine similarity of the jth and kth columns of u[][].
     */
    public static double cosine(int[][] u, int j, int k) {
        double denominator = norm(u,j)*norm(u,k);
        return (denominator == 0 ? 0 : dot(u,j,k)/denominator);
    }
    
    /*  Returns the dot product of the jth and kth columns of u[][].
     */
    public static double dot(int[][] u, int j, int k) {
        double sum = 0.0;
        for (int i = 0; i <= m; i++) {
            sum += u[i][j]*u[i][k];
        }
        return sum;
    }
    
    /*  Returns the norm of the jth column of u[][].
     */
    public static double norm(int[][] u, int j) {
        return Math.sqrt(dot(u,j,j));
    }
}