/*
 * pandoc.ts
 *
 * This module handles spawning Pandoc, passing it arguments, and streaming
 * to/from STDIN/STDOUT buffers if desired.
 *
 * Loosely based on https://github.com/eshinn/node-pandoc (MIT licensed)
 *
 */

import { stat, Stats } from 'fs';
import { spawn, ChildProcess } from 'child_process';
import * as path from 'path';
import * as fs from 'fs';
import { lookpath } from 'lookpath';

// Pandoc CLI syntax
// pandoc -f markdown -s -t html -o output.html input.md
// -f/--from: format of source file (listed at the end, if omitted it's STDIN)
// -t/--to: format of destination file (listed with -o or STDOUT)
// -s produces a standalone document (eg HEAD tags for HTML)

type AbsoluteFilePath = string;
type URLString = string;

// A list of markdown formats: markdown (Pandoc), commonmark, markdown_mmd (MultiMarkdown),
// gfm (GitHub markdown), commonmark_x (extended CommonMark)
// Not all input formats are here for now
// JSON is the JSON serialisation of the Pandoc AST which can be used for filtering
export type InputFormat = 'markdown' | 'commonmark' | 'docx' | 'csv' | 'html'
  | 'json' | 'latex' | 'odt';

export const inputExtensions = ['md', 'docx', 'csv', 'html', 'tex', 'odt'];

// Subset of output formats, will add more later
// Note: you need a `-o -` in the command to output odt, docx, epub or pdf output (presumably as they are binary formats or something)
export type OutputFormat = 'asciidoc' | 'beamer' | 'commonmark_x' | 'docx' | 'epub'
  | 'html' | 'pdf' | 'json' | 'latex' | 'odt' | 'pptx' | 'revealjs'
  | 'beamer' | 'rtf' | 'docuwiki' | 'mediawiki';

// List of [pretty name, pandoc format name, file extension, shortened pretty name]
export const outputFormats = [
    ['AsciiDoc (adoc)', 'asciidoc', 'adoc', 'AsciiDoc'],
    ['Word Document (docx)', 'docx', 'docx', 'Word'],
    ['Pandoc Markdown', 'markdown', 'pandoc.md', 'markdown'],  // X.md -> X.pandoc.md to avoid conflict
    ['HTML (without Pandoc)','html','html', 'HTML'],
    ['LaTeX', 'latex', 'tex', 'LaTeX'],
    ['OpenDocument (odt)', 'odt', 'odt', 'OpenDocument'],
    ['PowerPoint (pptx)', 'pptx', 'pptx', 'PowerPoint'],
    ['ePub', 'epub', 'epub', 'ePub'],
    ['PDF (via LaTeX)', 'pdf', 'pdf', 'PDF'],
    ['Reveal.js Slides', 'revealjs', 'reveal.html', 'Reveal.js'],
    ['Beamer Slides', 'beamer', 'beamer.tex', 'Beamer'],
    ['reStructured Text (RST)', 'rst', 'rst', 'RST'],
    ['DokuWiki', 'dokuwiki', 'txt', 'DokuWiki'],
    ['MediaWiki', 'mediawiki', 'mediawiki', 'MediaWiki'],
];

export interface PandocInput {
    file: AbsoluteFilePath | URLString | 'STDIN',  // if STDIN, the contents parameter must exist
    format?: InputFormat,  // -f/--from format, if left blank it's inferred by Pandoc
    contents?: string,
    metadataFile?: string,  // path to YAML file
    pandoc?: string, // optional path to Pandoc if it's not in the current PATH variable
    pdflatex?: string, // ditto for pdflatex
}

export interface PandocOutput {
    file: AbsoluteFilePath | 'STDOUT', // if STDOUT, the promise will resolve to a string
    format?: OutputFormat,  // -t/--to format, inferred if blank
}

export function needsLaTeX(format: OutputFormat): boolean {
    return format === 'pdf';
}

export function needsPandoc(format: OutputFormat): boolean {
    return format !== 'html';
}

export function needsStandaloneFlag(output: PandocOutput): boolean {
    return output.file.endsWith('html')
        || output.format === 'html'
        || output.format === 'revealjs'
        || output.format === 'latex'
        || output.format === 'beamer';
}

// Note: we apply Unicode stripping for STDIN, otherwise you're on your own
export function needsUnicodeStripped(output: PandocOutput): boolean {
    return output.format === 'latex'
        || output.format === 'pdf'
        || output.format === 'beamer';
}

// Note: extraParams is a list of strings like ['-o', 'file.md']
// This rejects if the file doesn't get created
export const pandoc = async (input: PandocInput, output: PandocOutput, extraParams?: string[])
    : Promise<{ result: string, command: string, error: string }> => new Promise(async (resolve, reject) => {
    const stdin = input.file === 'STDIN';
    const stdout = output.file === 'STDOUT';

    let pandoc: ChildProcess;
    let result = '';
    let error = '';

    const fileBaseName = (file: string): string => path.basename(file, path.extname(file));

    // Construct the Pandoc arguments list
    let args: string[] = [];

    if (input.format) {
        args.push('--from');
        args.push(input.format);
    }
    if (output.format) {
        args.push('--to');
        args.push(output.format);
    }
    if (needsStandaloneFlag(output))
        args.push('-s');
    if (!stdout) {
        args.push('-o');
        args.push(output.file);
    } else {
        args.push('-o');
        args.push('-');
    }
    // // Support Unicode in the PDF output if XeLaTeX is installed
    if (output.format === 'pdf' && await lookpath('xelatex'))
        args.push('--pdf-engine=xelatex');
    if (!stdin) {
        args.push(input.file);
    }
    // The metadata title is needed for ePub and standalone HTML formats
    // We use a metadata file to avoid being vulnerable to command injection
    if (input.metadataFile) args.push('--metadata-file', input.metadataFile);
    // Extra parameters
    if (extraParams) {
        extraParams = extraParams.flatMap(x => x.split(' ')).filter(x => x.length);
        args.push(...extraParams);
    }

    function start () {
        // Spawn a Pandoc child process
        // Assumes Pandoc is installed and that the arguments are valid
        // The arguments aren't sanitised, so be careful!
        const env = Object.assign(process.env);

        if (input.pdflatex) {
            // Workaround for Windows having different PATH delimiters
            // to *every other operating system in existence*
            // *sigh*
            if (process.platform === 'win32')
                env.PATH += ";"
            else
                env.PATH += ":";
            env.PATH += path.dirname(input.pdflatex);
        }
        pandoc = spawn(input.pandoc || 'pandoc', args, { env: process.env });

        if (stdin) {
            // TODO: strip some unicode characters but not others
            // Currently we're stripping footnote back arrows but no
            // other characters to avoid localisation issues
            const contents = input.contents.replace(/[\u21a9\ufe0e]/g, '');
            pandoc.stdin.write(contents);
            pandoc.stdin.end();
        }

        // Handlers
        pandoc.stdout.on('data', (data: any) => {
            result += data;
        });
        pandoc.stderr.on('data', (err: any) => {
            error += err;
        });
        pandoc.stdout.on('end', () => {
            const value = {
                result, error,
                command: 'pandoc ' + args.join(' ')
            };
            if (output.file !== 'STDOUT') {
                fs.stat(output.file, (err: NodeJS.ErrnoException | null, stats: fs.Stats) => {
                    // Call resolve if the file exists, reject otherwise
                    if (stats && stats.isFile()) {
                        resolve(value);
                    } else {
                        reject(error);
                    }
                });
            } else {
                // Call resolve iff there is a nonempty result
                (result.length ? resolve : reject)(value);
                if (result.length) {
                    resolve(value);
                } else {
                    reject(error);
                }
            }
        });
    }

    if (input.file === 'STDIN') {
        start();
    } else {
        // Check if the input file exists, and then start
        stat(input.file, (err: NodeJS.ErrnoException | null, stats: Stats) => {
            if (stats.isFile()) start();
            else reject(new Error('Input file does not exist'));
        });
    }
});