#!/usr/bin/env python
# -*- coding: utf-8 -*-

from operator import eq, ge, gt

import numpy as np
import pytest

from recordlinkage.index import Full, Block, SortedNeighbourhood
from recordlinkage.contrib.index import NeighbourhoodBlock

from tests.test_indexing import TestData


class TestNeighbourhoodBlock(TestData):
    """General unittest for the NeighbourhoodBlocking indexing class."""

    @classmethod
    def setup_class(cls):

        TestData.setup_class()

        def incomplete_df_copy(df, nan_proportion=0.1):
            'copy of DataFrame with some cells set to NaN'
            nan_count = int(round(len(df) * nan_proportion))

            def with_nulls(vals):
                vals = vals.copy()
                vals.iloc[np.random.choice(
                    len(df), size=nan_count, replace=False)] = np.nan
                return vals

            return df.copy() if nan_count <= 0 else df.apply(with_nulls)

        np.random.seed(0)
        cls.incomplete_a = incomplete_df_copy(cls.a)
        cls.incomplete_b = incomplete_df_copy(cls.b)

    def assert_index_comparisons(self, pairwise_comparison, indexers, *args,
                                 **kwargs):
        indexes = [ndxr.index(*args, **kwargs) for ndxr in indexers]
        for index1, index2 in zip(indexes, indexes[1:]):
            pairs1, pairs2 = map(set, [index1, index2])
            assert (len(pairs1) == len(index1)) and (
                len(pairs2) == len(index2)) and pairwise_comparison(
                    pairs1, pairs2)

    def test_dedup_vs_full(self):
        indexers = [
            NeighbourhoodBlock(max_non_matches=len(self.a.columns)),
            Full(),
        ]
        self.assert_index_comparisons(eq, indexers, self.a)

    def test_link_vs_full(self):
        indexers = [
            NeighbourhoodBlock(max_non_matches=len(self.a.columns)),
            Full(),
        ]
        self.assert_index_comparisons(eq, indexers, self.a, self.b)

    def test_dedup_single_blocking_key_vs_block(self):
        indexers = [
            NeighbourhoodBlock('var_block10', max_nulls=1),
            NeighbourhoodBlock(
                left_on='var_block10', right_on='var_block10', max_nulls=1),
            Block('var_block10'),
        ]
        self.assert_index_comparisons(eq, indexers, self.a)
        self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a)

    def test_link_single_blocking_key_vs_block(self):
        indexers = [
            NeighbourhoodBlock('var_arange', max_nulls=1),
            NeighbourhoodBlock(
                left_on='var_arange', right_on='var_arange', max_nulls=1),
            Block('var_arange'),
        ]
        self.assert_index_comparisons(eq, indexers, self.a, self.b)
        self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a,
                                      self.incomplete_b)

    def test_dedup_multiple_blocking_keys_vs_block(self):
        indexers = [
            NeighbourhoodBlock(['var_single', 'var_block10'], max_nulls=1),
            NeighbourhoodBlock(
                left_on=['var_single', 'var_block10'],
                right_on=['var_single', 'var_block10'],
                max_nulls=1),
            Block(['var_single', 'var_block10']),
        ]
        self.assert_index_comparisons(eq, indexers, self.a)
        self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a)

    def test_link_multiple_blocking_keys_vs_block(self):
        indexers = [
            NeighbourhoodBlock(['var_arange', 'var_block10'], max_nulls=1),
            NeighbourhoodBlock(
                left_on=['var_arange', 'var_block10'],
                right_on=['var_arange', 'var_block10'],
                max_nulls=1),
            Block(['var_arange', 'var_block10']),
        ]
        self.assert_index_comparisons(eq, indexers, self.a, self.b)
        self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a,
                                      self.incomplete_b)

    @pytest.mark.parametrize("window", [3, 5, 7, 9, 11])
    def test_dedup_single_sorting_key_vs_sortedneighbourhood(self, window):
        indexers = [
            NeighbourhoodBlock('var_arange', max_nulls=1, windows=window),
            NeighbourhoodBlock(
                left_on='var_arange',
                right_on='var_arange',
                max_nulls=1,
                windows=window),
            SortedNeighbourhood('var_arange', window=window),
        ]
        self.assert_index_comparisons(eq, indexers, self.a)
        self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a)

    @pytest.mark.parametrize("window", [3, 5, 7, 9, 11])
    def test_link_single_sorting_key_vs_sortedneighbourhood(self, window):
        indexers = [
            NeighbourhoodBlock('var_arange', max_nulls=1, windows=window),
            NeighbourhoodBlock(
                left_on='var_arange',
                right_on='var_arange',
                max_nulls=1,
                windows=window),
            SortedNeighbourhood('var_arange', window=window),
        ]
        self.assert_index_comparisons(eq, indexers, self.a, self.b)
        self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a,
                                      self.incomplete_b)

    @pytest.mark.parametrize("window", [3, 5, 7, 9, 11])
    def test_dedup_with_blocking_vs_sortedneighbourhood(self, window):
        indexers = [
            NeighbourhoodBlock(
                ['var_arange', 'var_block10'],
                max_nulls=1,
                windows=[window, 1]),
            NeighbourhoodBlock(
                left_on=['var_arange', 'var_block10'],
                right_on=['var_arange', 'var_block10'],
                max_nulls=1,
                windows=[window, 1]),
            SortedNeighbourhood(
                'var_arange', block_on='var_block10', window=window),
        ]
        self.assert_index_comparisons(eq, indexers, self.a)
        self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a)

    @pytest.mark.parametrize("window", [3, 5, 7, 9, 11])
    def test_link_with_blocking_vs_sortedneighbourhood(self, window):
        indexers = [
            NeighbourhoodBlock(
                ['var_arange', 'var_block10'],
                max_nulls=1,
                windows=[window, 1]),
            NeighbourhoodBlock(
                left_on=['var_arange', 'var_block10'],
                right_on=['var_arange', 'var_block10'],
                max_nulls=1,
                windows=[window, 1]),
            SortedNeighbourhood(
                'var_arange', block_on='var_block10', window=window),
        ]
        self.assert_index_comparisons(eq, indexers, self.a, self.b)
        self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a,
                                      self.incomplete_b)