#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2014-2019 Bitergia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
#     Santiago DueƱas <sduenas@bitergia.com>
#

import json
import os
import shutil
import sys
import tempfile
import unittest
import unittest.mock

if '..' not in sys.path:
    sys.path.insert(0, '..')

from sortinghat import api
from sortinghat.command import CMD_SUCCESS
from sortinghat.cmd.unify import Unify
from sortinghat.exceptions import CODE_MATCHER_NOT_SUPPORTED_ERROR

from tests.base import TestCommandCaseBase


UNIFY_DEFAULT_OUTPUT_RECOVERY = """Loading matches from recovery file:.*
Unique identity 880b3dfcb3a08712e5831bddc3dfe81fc5d7b331 merged on 178315df7941fc76a6ffb06fd5b00f6932ad9c41
Total unique identities processed: 6
Total matches: 1
Total unique identities after merging: 5"""
UNIFY_DEFAULT_OUTPUT = """Unique identity 880b3dfcb3a08712e5831bddc3dfe81fc5d7b331 merged on 178315df7941fc76a6ffb06fd5b00f6932ad9c41
Total unique identities processed: 6
Total matches: 1
Total unique identities after merging: 5"""
UNIFY_SOURCES_OUTPUT = """Unique identity f30dc6a71730e37f03c7e27379febb219f7918de merged on 9cb28b6fb034393bbe4749081e0da6cc5a715b85
Total unique identities processed: 6
Total matches: 1
Total unique identities after merging: 5"""
UNIFY_NO_STRICT_OUTPUT = """Unique identity 9cb28b6fb034393bbe4749081e0da6cc5a715b85 merged on 54806f99212ac5de67684dabda6db139fc6507ee
Unique identity f30dc6a71730e37f03c7e27379febb219f7918de merged on 54806f99212ac5de67684dabda6db139fc6507ee
Unique identity 400fdfaab5918d1b7e0e0efba4797abdc378bd7d merged on 178315df7941fc76a6ffb06fd5b00f6932ad9c41
Unique identity 880b3dfcb3a08712e5831bddc3dfe81fc5d7b331 merged on 178315df7941fc76a6ffb06fd5b00f6932ad9c41
Total unique identities processed: 6
Total matches: 4
Total unique identities after merging: 2"""
UNIFY_EMAIL_NAME_OUTPUT = """Unique identity 400fdfaab5918d1b7e0e0efba4797abdc378bd7d merged on 178315df7941fc76a6ffb06fd5b00f6932ad9c41
Unique identity 880b3dfcb3a08712e5831bddc3dfe81fc5d7b331 merged on 178315df7941fc76a6ffb06fd5b00f6932ad9c41
Unique identity f30dc6a71730e37f03c7e27379febb219f7918de merged on 9cb28b6fb034393bbe4749081e0da6cc5a715b85
Total unique identities processed: 6
Total matches: 3
Total unique identities after merging: 3"""
UNIFY_EMPTY_OUTPUT = """Total unique identities processed: 0
Total matches: 0
Total unique identities after merging: 0"""


UNIFY_MATCHING_ERROR = "Error: mock identity matcher is not supported"


class TestUnifyCaseBase(TestCommandCaseBase):
    """Defines common setup and teardown methods on unify unit tests"""

    cmd_klass = Unify

    def load_test_dataset(self):
        # Add some unique identities

        uuid = api.add_identity(self.db, source='scm', email='jsmith@example.com', name='John Smith')
        api.add_identity(self.db, source='scm', name='John Smith', uuid=uuid)
        api.add_identity(self.db, source='scm', username='jsmith', uuid=uuid)

        uuid = api.add_identity(self.db, source='alt', name='J. Smith', username='john_smith')
        api.add_identity(self.db, source='alt', name='John Smith', username='jsmith', uuid=uuid)
        api.add_identity(self.db, source='alt', email='jsmith', uuid=uuid)

        uuid = api.add_identity(self.db, source='scm', name='Jane Rae')
        api.add_identity(self.db, source='mls', email='jane.rae@example.net', name='Jane Rae Doe', uuid=uuid)

        uuid = api.add_identity(self.db, source='scm', name='J. Smith', username='john_smith')
        api.add_identity(self.db, source='scm', username='john_smith', uuid=uuid)
        api.add_identity(self.db, source='mls', username='Smith. J', uuid=uuid)
        api.add_identity(self.db, source='mls', email='JSmith@example.com', name='Smith. J', uuid=uuid)

        uuid = api.add_identity(self.db, source='mls', email='jrae@example.net', name='Jane Rae Doe')
        api.add_identity(self.db, source='mls', name='jrae', uuid=uuid)

        uuid = api.add_identity(self.db, source='scm', name='jrae')


class TestUnifyCommand(TestUnifyCaseBase):
    """Unify command unit tests"""

    def setUp(self):
        super().setUp()
        self.recovery_path = os.path.join('/tmp', next(tempfile._get_candidate_names()))

    def tearDown(self):
        if os.path.exists(self.recovery_path):
            os.remove(self.recovery_path)

    def test_unify(self):
        """Test command"""

        code = self.cmd.run()
        self.assertEqual(code, CMD_SUCCESS)
        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_DEFAULT_OUTPUT)

    def test_unify_fast_matching(self):
        """Test command with fast matching"""

        code = self.cmd.run('--fast-matching')
        self.assertEqual(code, CMD_SUCCESS)
        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_DEFAULT_OUTPUT)

    def test_unify_no_strict(self):
        """Test command with no strict mode active"""

        code = self.cmd.run('--no-strict-matching', '--matching', 'email-name')
        self.assertEqual(code, CMD_SUCCESS)
        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_NO_STRICT_OUTPUT)

    def test_unify_sources_list(self):
        """Test command with a sources list"""

        code = self.cmd.run('--matching', 'email-name',
                            '--sources', 'mls', 'alt')
        self.assertEqual(code, CMD_SUCCESS)
        output = sys.stdout.getvalue().strip()
        # Only jrae identities are merged
        self.assertEqual(output, UNIFY_SOURCES_OUTPUT)

    def test_unify_email_name_matcher(self):
        """Test command using the email-name matcher"""

        code = self.cmd.run('--matching', 'email-name')
        self.assertEqual(code, CMD_SUCCESS)
        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_EMAIL_NAME_OUTPUT)

    def test_unify_load_matches_from_recovery_file(self):
        """Test command when loading matches from the recovery file"""

        original_log = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data/unify_matches.log')
        shutil.copyfile(original_log, self.recovery_path)

        with unittest.mock.patch('sortinghat.cmd.unify.RecoveryFile.location') as mock_location:
            mock_location.return_value = self.recovery_path

            self.assertTrue(os.path.exists(self.recovery_path))
            code = self.cmd.run('--recovery')
            self.assertEqual(code, CMD_SUCCESS)
            output = sys.stdout.getvalue().strip()
            self.assertRegex(output, UNIFY_DEFAULT_OUTPUT_RECOVERY)
            self.assertFalse(os.path.exists(self.recovery_path))

    def test_unify_disabled_recovery(self):
        """Test command when the recovery file exists but the recovery mode is not active"""

        original_log = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data/unify_matches.log')
        shutil.copyfile(original_log, self.recovery_path)

        with unittest.mock.patch('sortinghat.cmd.unify.RecoveryFile.location') as mock_location:
            mock_location.return_value = self.recovery_path

            self.assertTrue(os.path.exists(self.recovery_path))
            code = self.cmd.run()
            self.assertEqual(code, CMD_SUCCESS)
            output = sys.stdout.getvalue().strip()
            self.assertEqual(output, UNIFY_DEFAULT_OUTPUT)
            self.assertTrue(os.path.exists(self.recovery_path))

    def test_unify_success_no_recovery_file(self):
        """Test command when the recovery file does not exist, the recovery mode is active and the execution is ok"""

        with unittest.mock.patch('sortinghat.cmd.unify.RecoveryFile.location') as mock_location:
            mock_location.return_value = self.recovery_path
            self.assertFalse(os.path.exists(self.recovery_path))

            self.assertFalse(os.path.exists(self.recovery_path))
            self.cmd.run('--recovery')
            self.assertFalse(os.path.exists(self.recovery_path))

    @unittest.mock.patch('sortinghat.api.merge_unique_identities')
    def test_unify_no_success_no_recovery_file(self, mock_merge_unique_identities):
        """Test command when the recovery file does not exist, the recovery mode is active and the execution isn't ok"""

        mock_merge_unique_identities.side_effect = Exception

        with unittest.mock.patch('sortinghat.cmd.unify.RecoveryFile.location') as mock_location:
            mock_location.return_value = self.recovery_path

            self.assertFalse(os.path.exists(self.recovery_path))
            with self.assertRaises(Exception):
                self.cmd.run('--recovery')
            self.assertTrue(os.path.exists(self.recovery_path))

            with open(self.recovery_path, 'r') as f:
                count_objs = 0
                for line in f.readlines():
                    matches_obj = json.loads(line.strip("\n"))
                    self.assertTrue(all([isinstance(m, str) for m in matches_obj['identities']]))
                    self.assertFalse(matches_obj['processed'])
                    count_objs += 1

                self.assertEqual(count_objs, 1)

    @unittest.mock.patch('sortinghat.api.merge_unique_identities')
    def test_unify_no_success_no_recovery(self, mock_merge_unique_identities):
        """Test command when the the recovery mode is not active and the execution isn't ok"""

        mock_merge_unique_identities.side_effect = Exception

        with unittest.mock.patch('sortinghat.cmd.unify.RecoveryFile.location') as mock_location:
            mock_location.return_value = self.recovery_path

            self.assertFalse(os.path.exists(self.recovery_path))
            with self.assertRaises(Exception):
                self.cmd.run()
            self.assertFalse(os.path.exists(self.recovery_path))

    def test_empty_registry(self):
        """Check output when the registry is empty"""

        # Delete the contents of the database
        self.db.clear()

        code = self.cmd.run()
        self.assertEqual(code, CMD_SUCCESS)
        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_EMPTY_OUTPUT)


class TestUnify(TestUnifyCaseBase):
    """Unit tests for unify"""

    def setUp(self):
        super().setUp()
        self.recovery_path = os.path.join('/tmp', next(tempfile._get_candidate_names()))

    def tearDown(self):
        if os.path.exists(self.recovery_path):
            os.remove(self.recovery_path)

    def test_unify(self):
        """Test unify method using a default matcher"""

        before = api.unique_identities(self.db)
        self.assertEqual(len(before), 6)

        code = self.cmd.unify(matching='default')
        self.assertEqual(code, CMD_SUCCESS)

        after = api.unique_identities(self.db)
        self.assertEqual(len(after), 5)

        # jsmith identities with same email address
        jsmith = after[0]
        self.assertEqual(jsmith.uuid, '178315df7941fc76a6ffb06fd5b00f6932ad9c41')

        identities = jsmith.identities
        identities.sort(key=lambda x: x.id)

        self.assertEqual(len(identities), 7)

        id_ = identities[1]
        self.assertEqual(id_.email, 'JSmith@example.com')
        self.assertEqual(id_.source, 'mls')

        id_ = identities[3]
        self.assertEqual(id_.email, 'jsmith@example.com')
        self.assertEqual(id_.source, 'scm')

        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_DEFAULT_OUTPUT)

    def test_unify_from_recovery_file(self):
        """Test unify method when reading matches from the recovery file"""

        original_log = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data/unify_matches.log')
        shutil.copyfile(original_log, self.recovery_path)

        with unittest.mock.patch('sortinghat.cmd.unify.RecoveryFile.location') as mock_location:
            mock_location.return_value = self.recovery_path

            before = api.unique_identities(self.db)
            self.assertEqual(len(before), 6)

            self.assertTrue(os.path.exists(self.recovery_path))
            code = self.cmd.unify(matching='default', recovery=True)
            self.assertEqual(code, CMD_SUCCESS)

            after = api.unique_identities(self.db)
            self.assertEqual(len(after), 5)

            # jsmith identities with same email address
            jsmith = after[0]
            self.assertEqual(jsmith.uuid, '178315df7941fc76a6ffb06fd5b00f6932ad9c41')

            identities = jsmith.identities
            identities.sort(key=lambda x: x.id)

            self.assertEqual(len(identities), 7)

            id_ = identities[1]
            self.assertEqual(id_.email, 'JSmith@example.com')
            self.assertEqual(id_.source, 'mls')

            id_ = identities[3]
            self.assertEqual(id_.email, 'jsmith@example.com')
            self.assertEqual(id_.source, 'scm')

            output = sys.stdout.getvalue().strip()
            self.assertRegex(output, UNIFY_DEFAULT_OUTPUT_RECOVERY)
            self.assertFalse(os.path.exists(self.recovery_path))

    def test_unify_success_no_recovery_mode(self):
        """Test unify method when the recovery file exists but the recovery mode is not active"""

        original_log = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data/unify_matches.log')
        shutil.copyfile(original_log, self.recovery_path)

        with unittest.mock.patch('sortinghat.cmd.unify.RecoveryFile.location') as mock_location:
            mock_location.return_value = self.recovery_path

            before = api.unique_identities(self.db)
            self.assertEqual(len(before), 6)

            self.assertTrue(os.path.exists(self.recovery_path))
            code = self.cmd.unify(matching='default')
            self.assertEqual(code, CMD_SUCCESS)

            after = api.unique_identities(self.db)
            self.assertEqual(len(after), 5)

            # jsmith identities with same email address
            jsmith = after[0]
            self.assertEqual(jsmith.uuid, '178315df7941fc76a6ffb06fd5b00f6932ad9c41')

            identities = jsmith.identities
            identities.sort(key=lambda x: x.id)

            self.assertEqual(len(identities), 7)

            id_ = identities[1]
            self.assertEqual(id_.email, 'JSmith@example.com')
            self.assertEqual(id_.source, 'mls')

            id_ = identities[3]
            self.assertEqual(id_.email, 'jsmith@example.com')
            self.assertEqual(id_.source, 'scm')

            output = sys.stdout.getvalue().strip()
            self.assertEqual(output, UNIFY_DEFAULT_OUTPUT)
            self.assertTrue(os.path.exists(self.recovery_path))

    @unittest.mock.patch('sortinghat.api.merge_unique_identities')
    def test_unify_no_success_no_recovery_file(self, mock_merge_unique_identities):
        """Test command when the recovery file does not exist, the recovery mode is active and the execution isn't ok"""

        mock_merge_unique_identities.side_effect = Exception

        with unittest.mock.patch('sortinghat.cmd.unify.RecoveryFile.location') as mock_location:
            mock_location.return_value = self.recovery_path

            self.assertFalse(os.path.exists(self.recovery_path))
            with self.assertRaises(Exception):
                self.cmd.unify(matching='default', recovery=True)
            self.assertTrue(os.path.exists(self.recovery_path))

            with open(self.recovery_path, 'r') as f:
                count_objs = 0
                for line in f.readlines():
                    matches_obj = json.loads(line.strip("\n"))
                    self.assertTrue(all([isinstance(m, str) for m in matches_obj['identities']]))
                    self.assertFalse(matches_obj['processed'])
                    count_objs += 1

                self.assertEqual(count_objs, 1)

    @unittest.mock.patch('sortinghat.api.merge_unique_identities')
    def test_unify_no_success_no_recovery_mode(self, mock_merge_unique_identities):
        """Test command when the the recovery mode is not active and the execution isn't ok"""

        mock_merge_unique_identities.side_effect = Exception

        with unittest.mock.patch('sortinghat.cmd.unify.RecoveryFile.location') as mock_location:
            mock_location.return_value = self.recovery_path

            self.assertFalse(os.path.exists(self.recovery_path))
            with self.assertRaises(Exception):
                self.cmd.unify(matching='default')
            self.assertFalse(os.path.exists(self.recovery_path))

    def test_unify_fast_matching(self):
        """Test unify method using a default matcher and fast matching mode"""

        before = api.unique_identities(self.db)
        self.assertEqual(len(before), 6)

        code = self.cmd.unify(matching='default', fast_matching=True)
        self.assertEqual(code, CMD_SUCCESS)

        after = api.unique_identities(self.db)
        self.assertEqual(len(after), 5)

        # jsmith identities with same email address
        jsmith = after[0]
        self.assertEqual(jsmith.uuid, '178315df7941fc76a6ffb06fd5b00f6932ad9c41')

        identities = jsmith.identities
        identities.sort(key=lambda x: x.id)

        self.assertEqual(len(identities), 7)

        id_ = identities[1]
        self.assertEqual(id_.email, 'JSmith@example.com')
        self.assertEqual(id_.source, 'mls')

        id_ = identities[3]
        self.assertEqual(id_.email, 'jsmith@example.com')
        self.assertEqual(id_.source, 'scm')

        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_DEFAULT_OUTPUT)

    def test_unify_no_strict(self):
        """Test unify method with no strict mode set"""

        before = api.unique_identities(self.db)
        self.assertEqual(len(before), 6)

        code = self.cmd.unify(matching='email-name',
                              no_strict_matching=True)
        self.assertEqual(code, CMD_SUCCESS)

        after = api.unique_identities(self.db)
        self.assertEqual(len(after), 2)

        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_NO_STRICT_OUTPUT)

    def test_unify_with_blacklist(self):
        """Test unify method using a blacklist"""

        # Add some entries to the blacklist
        api.add_to_matching_blacklist(self.db, 'Jane Rae Doe')
        api.add_to_matching_blacklist(self.db, 'jsmith@example.com')

        before = api.unique_identities(self.db)
        self.assertEqual(len(before), 6)

        code = self.cmd.unify(matching='default')
        self.assertEqual(code, CMD_SUCCESS)

        # No match was found
        after = api.unique_identities(self.db)
        self.assertEqual(len(after), 6)

    def test_unify_with_sources_list(self):
        """Test unify method using a sources list"""

        sources = ['mls', 'alt']

        before = api.unique_identities(self.db)
        self.assertEqual(len(before), 6)

        code = self.cmd.unify(matching='email-name', sources=sources)
        self.assertEqual(code, CMD_SUCCESS)

        # Only jrae identities are merged
        after = api.unique_identities(self.db)
        self.assertEqual(len(after), 5)

        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_SOURCES_OUTPUT)

    def test_unify_email_name_matcher(self):
        """Test unify method using the email-name matcher"""

        before = api.unique_identities(self.db)
        self.assertEqual(len(before), 6)

        code = self.cmd.unify(matching='email-name')
        self.assertEqual(code, CMD_SUCCESS)

        after = api.unique_identities(self.db)
        self.assertEqual(len(after), 3)

        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_EMAIL_NAME_OUTPUT)

    def test_unify_email_name_matcher_with_blacklist(self):
        """Test unify method using a blacklist"""

        # Add some entries to the blacklist
        api.add_to_matching_blacklist(self.db, 'Jane Rae Doe')
        api.add_to_matching_blacklist(self.db, 'jsmith@example.com')

        before = api.unique_identities(self.db)
        self.assertEqual(len(before), 6)

        code = self.cmd.unify(matching='email-name')
        self.assertEqual(code, CMD_SUCCESS)

        after = api.unique_identities(self.db)
        self.assertEqual(len(after), 5)

        # Only two identities were merged due to the blacklist
        jsmith = after[0]
        self.assertEqual(jsmith.uuid, '178315df7941fc76a6ffb06fd5b00f6932ad9c41')
        self.assertEqual(len(jsmith.identities), 4)

        jsmith = after[1]
        self.assertEqual(jsmith.uuid, '400fdfaab5918d1b7e0e0efba4797abdc378bd7d')
        self.assertEqual(len(jsmith.identities), 6)

    def test_empty_registry(self):
        """Check output when the registry is empty"""

        # Delete the contents of the database
        self.db.clear()

        code = self.cmd.unify()
        self.assertEqual(code, CMD_SUCCESS)
        output = sys.stdout.getvalue().strip()
        self.assertEqual(output, UNIFY_EMPTY_OUTPUT)

    def test_invalid_matching_method(self):
        """Check if it fails when an invalid matching method is given"""

        code = self.cmd.unify(matching='mock')
        self.assertEqual(code, CODE_MATCHER_NOT_SUPPORTED_ERROR)
        output = sys.stderr.getvalue().strip()
        self.assertEqual(output, UNIFY_MATCHING_ERROR)


if __name__ == "__main__":
    unittest.main(buffer=True, exit=False)