#  Copyright (c) 2019. stewu5. All rights reserved.
#  Use of this source code is governed by a BSD-style
#  license that can be found in the LICENSE file.e

from collections import Counter

import pandas as pd
from rdkit import Chem
from xenonpy.descriptor import Compositions
from xenonpy.descriptor.base import BaseFeaturizer


class OrganicCompDescriptor(BaseFeaturizer):

    def __init__(self, n_jobs=-1, *, featurizers='all', on_errors='raise', return_type='any'):
        """
        A featurizer for extracting XenonPy compositional descriptors from SMILES or MOL
        """
            
        # fix n_jobs to be 0 to skip automatic wrapper in XenonPy BaseFeaturizer class
        super().__init__(n_jobs=0, on_errors=on_errors, return_type=return_type)
        self._cal = Compositions(n_jobs=n_jobs, featurizers=featurizers, on_errors=on_errors)

    def featurize(self, x):
        # check if type(x) = list
        if isinstance(x, pd.Series):
            x = x.tolist()
        if not isinstance(x, list):
            x = [x]
        # check input format, assume SMILES if not RDKit-MOL
        if not isinstance(x[0], Chem.rdchem.Mol):
            x_mol = []
            for z in x:
                x_mol.append(Chem.MolFromSmiles(z))
                if x_mol[-1] is None:
                    raise ValueError('can not convert Mol from SMILES %s' % z)
        else:
            x_mol = x
        
        # convert to counting dictionary
        mol = [Chem.AddHs(z) for z in x_mol]
        d_list = [dict(Counter([atom.GetSymbol() for atom in z.GetAtoms()])) for z in mol]

        self.output = self._cal.transform(d_list)
        
        return self.output
    
    @property
    def feature_labels(self):
        return self.output.columns