"""
   This code belongs to James Davidson and is discussed here:
   
   http://www.mail-archive.com/rdkit-discuss@lists.sourceforge.net/msg01185.html
   http://www.mail-archive.com/rdkit-discuss@lists.sourceforge.net/msg01162.html
   http://www.mail-archive.com/rdkit-discuss@lists.sourceforge.net/msg01900.html   
"""


mb = [ """MolPort-000-036-699
  Marvin  05240819062D          

 39 44  0  0  0  0            999 V2000
   -0.9093   -1.0027    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.1895    0.3923    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.4328   -1.6783    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
   -0.6446   -0.2242    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.9279    1.1739    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.6970   -1.2486    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
   -0.9248   -2.3416    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
   -1.7064   -2.0738    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
   -1.9991    0.2335    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.4698    1.7936    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    0.1619   -0.0623    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    1.7811    0.2584    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
   -2.5409    0.8532    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.5876    0.4204    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.2762    1.6316    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.3768   -1.8309    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.1183    1.3358    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    3.1356   -0.1962    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.7100   -0.6819    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.4266    0.7193    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.5165   -0.5231    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.2362    0.8781    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.8523    1.2051    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.3505    0.6881    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.8243    2.2513    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.9452   -0.0342    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.6619    1.3639    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.8954    1.3078    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.6338    2.0925    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.2068    0.7473    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.8678   -0.9777    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.9155   -1.2082    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.6508   -2.6094    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -4.7019    1.1459    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.9266    2.1454    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -4.9635    0.3643    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.7250   -1.3577    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.4604   -2.7619    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.9991   -2.1361    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
  2  4  1  0  0  0  0
  3  1  4  0  0  0  0
  4  1  1  0  0  0  0
  5  2  4  0  0  0  0
  6  1  4  0  0  0  0
  7  3  4  0  0  0  0
  8  6  4  0  0  0  0
  9  2  4  0  0  0  0
 10  5  4  0  0  0  0
 11  4  1  0  0  0  0
 12 22  1  0  0  0  0
 13  9  4  0  0  0  0
 14 12  1  0  0  0  0
 15 10  4  0  0  0  0
 16  3  1  0  0  0  0
 17  5  2  0  0  0  0
 18 14  4  0  0  0  0
 19 11  1  0  0  0  0
 20 11  1  0  0  0  0
 21 19  1  0  0  0  0
 22 20  1  0  0  0  0
 23 14  4  0  0  0  0
 24 13  4  0  0  0  0
 25 15  4  0  0  0  0
 26 18  4  0  0  0  0
 27 23  4  0  0  0  0
 28 24  4  0  0  0  0
 29 25  4  0  0  0  0
 30 27  4  0  0  0  0
 31 18  1  0  0  0  0
 32 16  1  0  0  0  0
 33 16  1  0  0  0  0
 34 28  1  0  0  0  0
 35 27  1  0  0  0  0
 36 34  1  0  0  0  0
 37 32  1  0  0  0  0
 38 33  1  0  0  0  0
 39 37  1  0  0  0  0
  8  7  4  0  0  0  0
 12 21  1  0  0  0  0
 39 38  1  0  0  0  0
 15 13  4  0  0  0  0
 29 28  4  0  0  0  0
 30 26  4  0  0  0  0
M  END""",
 
"""MolPort-000-005-089
  Marvin  05210810082D          

 19 21  0  0  0  0            999 V2000
    1.4290    3.4338    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.4290    2.6088    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.1434    2.1963    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.8579    2.6088    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.8579    3.4338    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.1434    3.8463    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.6425    2.3538    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    4.1274    3.0213    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.6425    3.6888    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    4.9525    3.0213    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    3.8974    1.5693    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.3454    0.9562    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.6004    0.1715    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.4073    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    4.9594    0.6131    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    4.7044    1.3977    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.7145    3.8463    0.0000 N   0  3  0  0  0  0  0  0  0  0  0  0
    0.7145    4.6714    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    3.4338    0.0000 O   0  5  0  0  0  0  0  0  0  0  0  0
  1  2  4  0  0  0  0
  2  3  4  0  0  0  0
  3  4  4  0  0  0  0
  4  5  4  0  0  0  0
  5  6  4  0  0  0  0
  6  1  4  0  0  0  0
  4  7  4  0  0  0  0
  7  8  4  0  0  0  0
  8  9  4  0  0  0  0
  9  5  4  0  0  0  0
  8 10  2  0  0  0  0
  1 17  1  0  0  0  0
  7 11  1  0  0  0  0
 11 12  1  0  0  0  0
 12 13  1  0  0  0  0
 13 14  1  0  0  0  0
 14 15  1  0  0  0  0
 15 16  1  0  0  0  0
 16 11  1  0  0  0  0
 17 18  2  0  0  0  0
 17 19  1  0  0  0  0
M  CHG  2  17   1  19  -1
M  STY  1   1 DAT
M  SAL   1  1   9
M  SDT   1 MRV_IMPLICIT_H                                        
M  SDD   1     0.0000    0.0000    DR    ALL  0       0  
M  SED   1 IMPL_H1
M  END
> <PUBCHEM_EXT_DATASOURCE_REGID>
MolPort-000-005-089

> <PUBCHEM_EXT_SUBSTANCE_URL>
http://www.molport.com/buy-chemicals/molecule-link/MolPort-000-005-089

> <PUBCHEM_EXT_DATASOURCE_URL>
http://www.molport.com

$$$$""" ]


from rdkit import Chem
from rdkit.Chem import AllChem

def FragIndicesToMol(oMol,indices):
    em = Chem.EditableMol(Chem.Mol())

    newIndices={}
    for i,idx in enumerate(indices):
        em.AddAtom(oMol.GetAtomWithIdx(idx))
        newIndices[idx]=i

    for i,idx in enumerate(indices):
        at = oMol.GetAtomWithIdx(idx)
        for bond in at.GetBonds():
            if bond.GetBeginAtomIdx()==idx:
                oidx = bond.GetEndAtomIdx()
            else:
                oidx = bond.GetBeginAtomIdx()
            # make sure every bond only gets added once:
            if oidx<idx:
                continue
            em.AddBond(newIndices[idx],newIndices[oidx],bond.GetBondType())
    res = em.GetMol()
    res.ClearComputedProps()
    Chem.GetSymmSSSR(res)
    res.UpdatePropertyCache(False)
    res._idxMap=newIndices
    return res

def _recursivelyModifyNs(mol,matches,indices=None):
    if indices is None:
        indices=[]
    res=None
    while len(matches) and res is None:
        tIndices=indices[:]
        nextIdx = matches.pop(0)
        tIndices.append(nextIdx)
        nm = Chem.Mol(mol.ToBinary())
        nm.GetAtomWithIdx(nextIdx).SetNoImplicit(True)
        nm.GetAtomWithIdx(nextIdx).SetNumExplicitHs(1)
        cp = Chem.Mol(nm.ToBinary())
        try:
            Chem.SanitizeMol(cp)
        except ValueError:
            res,indices = _recursivelyModifyNs(nm,matches,indices=tIndices)
        else:
            indices=tIndices
            res=cp
    return res,indices

def AdjustAromaticNs(m,nitrogenPattern='[n&D2&H0;r5,r6]'):
    """
       default nitrogen pattern matches Ns in 5 rings and 6 rings in order to be able
       to fix: O=c1ccncc1
    """
    Chem.GetSymmSSSR(m)
    m.UpdatePropertyCache(False)

    # break non-ring bonds linking rings:
    em = Chem.EditableMol(m)
    linkers = m.GetSubstructMatches(Chem.MolFromSmarts('[r]!@[r]'))
    plsFix=set()
    for a,b in linkers:
        em.RemoveBond(a,b)
        plsFix.add(a)
        plsFix.add(b)
    nm = em.GetMol()
    for at in plsFix:
        at=nm.GetAtomWithIdx(at)
        if at.GetIsAromatic() and at.GetAtomicNum()==7:
            at.SetNumExplicitHs(1)
            at.SetNoImplicit(True)

    # build molecules from the fragments:
    fragLists = Chem.GetMolFrags(nm)
    frags = [FragIndicesToMol(nm,x) for x in fragLists]

    # loop through the fragments in turn and try to aromatize them:
    ok=True
    for i,frag in enumerate(frags):
        cp = Chem.Mol(frag.ToBinary())
        try:
            Chem.SanitizeMol(cp)
        except ValueError:
            matches = [x[0] for x in frag.GetSubstructMatches(Chem.MolFromSmarts(nitrogenPattern))]
            lres,indices=_recursivelyModifyNs(frag,matches)
            if not lres:
                #print 'frag %d failed (%s)'%(i,str(fragLists[i]))
                ok=False
                break
            else:
                revMap={}
                for k,v in frag._idxMap.iteritems():
                    revMap[v]=k
                for idx in indices:
                    oatom = m.GetAtomWithIdx(revMap[idx])
                    oatom.SetNoImplicit(True)
                    oatom.SetNumExplicitHs(1)
    if not ok:
        return None
    return m

if __name__=='__main__':
    ms= (
        Chem.MolFromMolBlock(mb[0],False),
        Chem.MolFromSmiles('O=c1ccc2ccccc2n1',False),
        Chem.MolFromSmiles('Cc1nnnn1C',False),
        Chem.MolFromSmiles('CCc1ccc2nc(=O)c(cc2c1)Cc1nnnn1C1CCCCC1',False),
        Chem.MolFromMolBlock(mb[1],False),
        Chem.MolFromSmiles('c1cnc2cc3ccnc3cc12',False),
        Chem.MolFromSmiles('c1cc2cc3ccnc3cc2n1',False),
        Chem.MolFromSmiles('O=c1ccnc(c1)-c1cnc2cc3ccnc3cc12',False),
        Chem.MolFromSmiles('O=c1ccnc(c1)-c1cc1',False),
        )
    for i,m in enumerate(ms):
        print '#---------------------'
        try:
            m.UpdatePropertyCache(False)
            cp = Chem.Mol(m.ToBinary())
            Chem.SanitizeMol(cp)
            m = cp
            print 'fine:',Chem.MolToSmiles(m)
        except ValueError:
            nm=AdjustAromaticNs(m)
            if nm is not None:
                Chem.SanitizeMol(nm)
                print 'fixed:',Chem.MolToSmiles(nm)
            else:
                print 'still broken:',i