Python rdkit.Chem.RemoveHs() Examples

The following are 25 code examples of rdkit.Chem.RemoveHs(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rdkit.Chem , or try the search function .
Example #1
Source File: molecule.py    From chemml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _extra_docs(self):
        # method: to_smiles
        self._to_smiles_core_names = ("rdkit.Chem.MolToSmarts",)
        self._to_smiles_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.inchi.html?highlight=inchi#rdkit.Chem.inchi.MolToInchi",)
        # method: to_smarts
        self._to_smarts_core_names = ("rdkit.Chem.MolToSmarts",)
        self._to_smarts_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.inchi.html?highlight=inchi#rdkit.Chem.inchi.MolToInchi",)
        # method: to_inchi
        self._to_inchi_core_names = ("rdkit.Chem.MolToInchi",)
        self._to_inchi_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.inchi.html?highlight=inchi#rdkit.Chem.inchi.MolToInchi",)
        # method: hydrogens
        self._hydrogens_core_names = ("rdkit.Chem.AddHs","rdkit.Chem.RemoveHs")
        self._hydrogens_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.AddHs",
                                    "http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.RemoveHs")
        #
        self._to_xyz_core_names = ("rdkit.Chem.AllChem.MMFFOptimizeMolecule","rdkit.Chem.AllChem.UFFOptimizeMolecule")
        self._to_xyz_core_docs =(
            "http://rdkit.org/docs/source/rdkit.Chem.rdForceFieldHelpers.html?highlight=mmff#rdkit.Chem.rdForceFieldHelpers.MMFFOptimizeMolecule",
            "http://rdkit.org/docs/source/rdkit.Chem.rdForceFieldHelpers.html?highlight=mmff#rdkit.Chem.rdForceFieldHelpers.UFFOptimizeMolecule"
        ) 
Example #2
Source File: PyPretreatMolutil.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def standardize(self, mol):
        """Return a standardized version the given molecule.

        The standardization process consists of the following stages: RDKit
        :rdkit:`RemoveHs <Chem.rdmolops-module.html#RemoveHs>`, RDKit
        :rdkit:`SanitizeMol <Chem.rdmolops-module.html#SanitizeMol>`, :class:`~molvs.metal.MetalDisconnector`,
        :class:`~molvs.normalize.Normalizer`, :class:`~molvs.charge.Reionizer`, RDKit
        :rdkit:`AssignStereochemistry <Chem.rdmolops-module.html#AssignStereochemistry>`.

        :param mol: The molecule to standardize.
        :type mol: :rdkit:`Mol <Chem.rdchem.Mol-class.html>`
        :returns: The standardized molecule.
        :rtype: :rdkit:`Mol <Chem.rdchem.Mol-class.html>`
        """
        mol = copy.deepcopy(mol)
        Chem.RemoveHs(mol)
        Chem.SanitizeMol(mol)
        mol = self.disconnect_metals(mol)
        mol = self.normalize(mol)
        mol = self.reionize(mol)
        Chem.AssignStereochemistry(mol, force=True, cleanIt=True)
        # TODO: Check this removes symmetric stereocenters
        return mol 
Example #3
Source File: standardize.py    From MolVS with MIT License 6 votes vote down vote up
def standardize(self, mol):
        """Return a standardized version the given molecule.

        The standardization process consists of the following stages: RDKit
        :py:func:`~rdkit.Chem.rdmolops.RemoveHs`, RDKit :py:func:`~rdkit.Chem.rdmolops.SanitizeMol`,
        :class:`~molvs.metal.MetalDisconnector`, :class:`~molvs.normalize.Normalizer`,
        :class:`~molvs.charge.Reionizer`, RDKit :py:func:`~rdkit.Chem.rdmolops.AssignStereochemistry`.

        :param mol: The molecule to standardize.
        :type mol: rdkit.Chem.rdchem.Mol
        :returns: The standardized molecule.
        :rtype: rdkit.Chem.rdchem.Mol
        """
        mol = copy.deepcopy(mol)
        Chem.SanitizeMol(mol)
        mol = Chem.RemoveHs(mol)
        mol = self.disconnect_metals(mol)
        mol = self.normalize(mol)
        mol = self.reionize(mol)
        Chem.AssignStereochemistry(mol, force=True, cleanIt=True)
        # TODO: Check this removes symmetric stereocenters
        return mol 
Example #4
Source File: test.py    From xyz2mol with MIT License 5 votes vote down vote up
def test_smiles_from_coord_huckel(smiles):

    # The answer
    mol = Chem.MolFromSmiles(smiles)
    charge = Chem.GetFormalCharge(mol)
    canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False)

    # generate forcefield coordinates
    atoms, coordinates = generate_structure_from_smiles(smiles)

    # Generate molobj from atoms, charge and coordinates
    mol = x2m.xyz2mol(atoms, coordinates, charge=charge, use_huckel=True)

    # For this test, remove chira. clean and canonical
    Chem.Kekulize(mol)
    mol = Chem.RemoveHs(mol)
    Chem.RemoveStereochemistry(mol)
    smiles = Chem.MolToSmiles(mol, isomericSmiles=False)

    # Please look away. A small hack that removes the explicit hydrogens
    mol = Chem.MolFromSmiles(smiles)
    smiles = Chem.MolToSmiles(mol)

    assert smiles == canonical_smiles

    return 
Example #5
Source File: data_utils.py    From MAT with MIT License 5 votes vote down vote up
def load_data_from_smiles(x_smiles, labels, add_dummy_node=True, one_hot_formal_charge=False):
    """Load and featurize data from lists of SMILES strings and labels.

    Args:
        x_smiles (list[str]): A list of SMILES strings.
        labels (list[float]): A list of the corresponding labels.
        add_dummy_node (bool): If True, a dummy node will be added to the molecular graph. Defaults to True.
        one_hot_formal_charge (bool): If True, formal charges on atoms are one-hot encoded. Defaults to False.

    Returns:
        A tuple (X, y) in which X is a list of graph descriptors (node features, adjacency matrices, distance matrices),
        and y is a list of the corresponding labels.
    """
    x_all, y_all = [], []

    for smiles, label in zip(x_smiles, labels):
        try:
            mol = MolFromSmiles(smiles)
            try:
                mol = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol, maxAttempts=5000)
                AllChem.UFFOptimizeMolecule(mol)
                mol = Chem.RemoveHs(mol)
            except:
                AllChem.Compute2DCoords(mol)

            afm, adj, dist = featurize_mol(mol, add_dummy_node, one_hot_formal_charge)
            x_all.append([afm, adj, dist])
            y_all.append([label])
        except ValueError as e:
            logging.warning('the SMILES ({}) can not be converted to a graph.\nREASON: {}'.format(smiles, e))

    return x_all, y_all 
Example #6
Source File: molecule.py    From chemml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def hydrogens(self, action='add', **kwargs):
        """
        This function adds/removes hydrogens to/from a prebuilt molecule object.

        Parameters
        ----------
        action: str
            Either 'add' or 'remove', to add hydrogns or remove them from the rdkit molecule.

        kwargs:
            The arguments that can be passed to the rdkit functions:
            - `Chem.AddHs`: documentation at http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.AddHs
            - `Chem.RemoveHs`: documentation at http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.RemoveHs

        Notes
        -----
            - The rdkit or pybel molecule object must be created in advance.
            - Only rdkit or pybel molecule object will be modified in place.
            - If you remove hydrogens from molecules, the atomic 3D coordinates might not be accurate for the conversion to xyz representation.

        """

        # molecule must exist
        _ = self._check_original_molecule()

        if action == 'add':
            if self.pybel_molecule:
                self.pybel_molecule.addh()
            # Note: just if not elif
            if self.rdkit_molecule:
                self.rdkit_molecule = Chem.AddHs(self.rdkit_molecule, **kwargs)
        elif action == 'remove':
            if self.pybel_molecule:
                self.pybel_molecule.removeh()
            if self.rdkit_molecule:
                self.rdkit_molecule = Chem.RemoveHs(self.rdkit_molecule, **kwargs)
        else:
            raise ValueError("The parameter 'action' must be either of 'add' or 'remove'.") 
Example #7
Source File: rdk.py    From oddt with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def removeh(self, **kwargs):
        """Remove hydrogens."""
        self.Mol = Chem.RemoveHs(self.Mol, **kwargs)
        self._clear_cache() 
Example #8
Source File: PyPretreatMolutil.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def rmhs(self, mol):
        from rdkit.Chem import RemoveHs

        return RemoveHs(mol) 
Example #9
Source File: PyPretreatMolutil.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def rmhs(self, mol):
        from rdkit.Chem import RemoveHs

        return RemoveHs(mol) 
Example #10
Source File: estate.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _CalculateEState(mol, skipH=1):
    """
    #################################################################
    **Internal used only**

    Get the EState value of each atom in a molecule
    #################################################################
    """
    mol = Chem.AddHs(mol)
    if skipH == 1:
        mol = Chem.RemoveHs(mol)
    tb1 = Chem.GetPeriodicTable()
    nAtoms = mol.GetNumAtoms()
    Is = numpy.zeros(nAtoms, numpy.float)
    for i in range(nAtoms):
        at = mol.GetAtomWithIdx(i)
        atNum = at.GetAtomicNum()
        d = at.GetDegree()
        if d > 0:
            h = at.GetTotalNumHs()
            dv = tb1.GetNOuterElecs(atNum) - h
            # dv=numpy.array(_AtomHKDeltas(at),'d')
            N = _GetPrincipleQuantumNumber(atNum)
            Is[i] = (4.0 / (N * N) * dv + 1) / d
    dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0)
    dists += 1
    accum = numpy.zeros(nAtoms, numpy.float)
    for i in range(nAtoms):
        for j in range(i + 1, nAtoms):
            p = dists[i, j]
            if p < 1e6:
                temp = (Is[i] - Is[j]) / (p * p)
                accum[i] += temp
                accum[j] -= temp
    res = accum + Is
    return res 
Example #11
Source File: cats2d.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def ContructLFromGraphSearch(mol):
    """
    #################################################################
    The last lipophilic pattern on page 55 of the book is realized as a graph
    search and not as a SMARTS search.

    "L" carbon atom adjacent only to carbon atoms.

    The result is a list format.
    #################################################################
    """

    AtomIndex = []
    Hmol = Chem.RemoveHs(mol)
    for atom in Hmol.GetAtoms():
        temp = []
        if atom.GetAtomicNum() == 6:
            for neighatom in atom.GetNeighbors():
                if neighatom.GetAtomicNum() == 6:
                    temp.append(0)
                elif neighatom.GetAtomicNum() == 1:
                    continue
                else:
                    temp.append(1)
            if sum(temp) == 0:
                AtomIndex.append(atom.GetIdx())

    return AtomIndex


################################### 
Example #12
Source File: test.py    From xyz2mol with MIT License 5 votes vote down vote up
def test_smiles_from_xyz_files(filename, charge, answer):

    charged_fragments = True
    quick = True

    atoms, charge_read, coordinates = x2m.read_xyz_file(filename)

    mol = x2m.xyz2mol(atoms, coordinates, charge=charge)
    mol = Chem.RemoveHs(mol)

    smiles = Chem.MolToSmiles(mol)

    assert smiles == answer

    return 
Example #13
Source File: test.py    From xyz2mol with MIT License 5 votes vote down vote up
def test_smiles_from_coord_vdw(smiles):

    # The answer
    mol = Chem.MolFromSmiles(smiles)
    charge = Chem.GetFormalCharge(mol)
    canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False)

    # generate forcefield coordinates
    atoms, coordinates = generate_structure_from_smiles(smiles)

    # Generate molobj from atoms, charge and coordinates
    mol = x2m.xyz2mol(atoms, coordinates, charge=charge)

    # For this test, remove chira. clean and canonical
    Chem.Kekulize(mol)
    mol = Chem.RemoveHs(mol)
    Chem.RemoveStereochemistry(mol)
    smiles = Chem.MolToSmiles(mol, isomericSmiles=False)

    # Please look away. A small hack that removes the explicit hydrogens
    mol = Chem.MolFromSmiles(smiles)
    smiles = Chem.MolToSmiles(mol)

    assert smiles == canonical_smiles

    return 
Example #14
Source File: test.py    From xyz2mol with MIT License 5 votes vote down vote up
def test_smiles_from_adjacent_matrix(smiles):

    charged_fragments = True
    quick = True

    # Cut apart the smiles
    mol = get_mol(smiles)
    atoms = get_atoms(mol)
    charge = Chem.GetFormalCharge(mol)
    adjacent_matrix = Chem.GetAdjacencyMatrix(mol)

    #
    mol = Chem.RemoveHs(mol)
    canonical_smiles = Chem.MolToSmiles(mol)

    # Define new molecule template from atoms
    new_mol = x2m.get_proto_mol(atoms)

    # reconstruct the molecule from adjacent matrix, atoms and total charge
    new_mol = x2m.AC2mol(new_mol, adjacent_matrix, atoms, charge, charged_fragments, quick)
    new_mol = Chem.RemoveHs(new_mol)
    new_mol_smiles = Chem.MolToSmiles(new_mol)

    assert new_mol_smiles == canonical_smiles

    return 
Example #15
Source File: PredX_MPNN.py    From dl4chem-geometry with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def getRMS(self, prb_mol, ref_pos, useFF=False):

        def optimizeWithFF(mol):

            molf = Chem.AddHs(mol, addCoords=True)
            AllChem.MMFFOptimizeMolecule(molf)
            molf = Chem.RemoveHs(molf)

            return molf

        n_est = prb_mol.GetNumAtoms()

        ref_cf = Chem.rdchem.Conformer(n_est)
        for k in range(n_est):
            ref_cf.SetAtomPosition(k, ref_pos[k].tolist())

        ref_mol = copy.deepcopy(prb_mol)
        ref_mol.RemoveConformer(0)
        ref_mol.AddConformer(ref_cf)

        if useFF:
            try:
                res = AllChem.AlignMol(prb_mol, optimizeWithFF(ref_mol))
            except:
                res = AllChem.AlignMol(prb_mol, ref_mol)
        else:
            res = AllChem.AlignMol(prb_mol, ref_mol)

        return res 
Example #16
Source File: fsapt_helper.py    From psikit with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def make_feat_data(mol, offset=1):
    res = []
    check_atom = set()
    nohmol = Chem.RemoveHs(mol)
    recap_res = Recap.RecapDecompose(nohmol)
    leaves = [key.replace('*','').replace('()','') for key in recap_res.GetLeaves().keys()]
    leaves = [leave.replace('[H]', '') for leave in leaves if leave != '[H]']
    leaves = sorted(leaves, key=lambda x: Chem.MolFromSmarts(x).GetNumAtoms(), reverse=True)
    if len(leaves) == 0:

        line = [i for i in range(mol.GetNumAtoms())]
        line = [str(n + offset) for n in line]
        line = [Chem.MolToSmiles(mol)] + line
        return [line]
    for leavsmi in leaves:
        leav = Chem.MolFromSmarts(leavsmi)
        matches = mol.GetSubstructMatches(leav)
        for i, match in enumerate(matches):
            line = list(match)
            if len(check_atom & set(line)) > 0:
                continue
            check_atom = check_atom|set(line)
            for idx in match:
                nei = get_neighbor_h(idx, mol)
                line += nei
            line = [str(j + offset) for j in line]
            line = [leavsmi + '_' + str(i)] + line
            res.append(line)
    return res 
Example #17
Source File: evaluate.py    From GLN with MIT License 5 votes vote down vote up
def canonicalize(smiles):
    try:        
        tmp = Chem.MolFromSmiles(smiles)
    except:
        print('no mol')
        return smiles
    if tmp is None:
        return smiles
    tmp = Chem.RemoveHs(tmp)
    [a.ClearProp('molAtomMapNumber') for a in tmp.GetAtoms()]
    return Chem.MolToSmiles(tmp) 
Example #18
Source File: mol_utils.py    From GLN with MIT License 5 votes vote down vote up
def cano_smiles(smiles):
    try:
        tmp = Chem.MolFromSmiles(smiles)
        if tmp is None:
            return None, smiles        
        tmp = Chem.RemoveHs(tmp)
        if tmp is None:
            return None, smiles
        [a.ClearProp('molAtomMapNumber') for a in tmp.GetAtoms()]
        return tmp, Chem.MolToSmiles(tmp)            
    except:
        return None, smiles 
Example #19
Source File: test_coulomb_matrices.py    From deepchem with MIT License 5 votes vote down vote up
def test_coulomb_matrix_no_hydrogens(self):
    """
        Test hydrogen removal.
        """
    from rdkit import Chem
    mol = Chem.RemoveHs(self.mol)
    assert mol.GetNumAtoms() < self.mol.GetNumAtoms()
    f = cm.CoulombMatrix(
        max_atoms=mol.GetNumAtoms(), remove_hydrogens=True, upper_tri=True)
    rval = f([self.mol])  # use the version with hydrogens
    size = np.triu_indices(mol.GetNumAtoms())[0].size
    assert rval.shape == (1, mol.GetNumConformers(), size) 
Example #20
Source File: coulomb_matrices.py    From deepchem with MIT License 5 votes vote down vote up
def coulomb_matrix(self, mol):
    """
    Generate Coulomb matrices for each conformer of the given molecule.

    Parameters
    ----------
    mol : RDKit Mol
        Molecule.
    """
    from rdkit import Chem
    if self.remove_hydrogens:
      mol = Chem.RemoveHs(mol)
    n_atoms = mol.GetNumAtoms()
    z = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
    rval = []
    for conf in mol.GetConformers():
      d = self.get_interatomic_distances(conf)
      m = np.zeros((n_atoms, n_atoms))
      for i in range(mol.GetNumAtoms()):
        for j in range(mol.GetNumAtoms()):
          if i == j:
            m[i, j] = 0.5 * z[i]**2.4
          elif i < j:
            m[i, j] = (z[i] * z[j]) / d[i, j]
            m[j, i] = m[i, j]
          else:
            continue
      if self.randomize:
        for random_m in self.randomize_coulomb_matrix(m):
          random_m = pad_array(random_m, self.max_atoms)
          rval.append(random_m)
      else:
        m = pad_array(m, self.max_atoms)
        rval.append(m)
    rval = np.asarray(rval)
    return rval 
Example #21
Source File: dataset.py    From 3DGCN with MIT License 5 votes vote down vote up
def tensorize(self, batch_x, batch_c):
        atom_tensor = np.zeros((len(batch_x), self.num_atoms, self.get_num_features()))
        adjm_tensor = np.zeros((len(batch_x), self.num_atoms, self.num_atoms))
        posn_tensor = np.zeros((len(batch_x), self.num_atoms, self.num_atoms, 3))

        for mol_idx, mol in enumerate(batch_x):
            Chem.RemoveHs(mol)
            mol_atoms = mol.GetNumAtoms()

            # Atom features
            atom_tensor[mol_idx, :mol_atoms, :] = self.get_atom_features(mol)

            # Adjacency matrix
            adjms = np.array(rdmolops.GetAdjacencyMatrix(mol), dtype="float")

            # Normalize adjacency matrix by D^(-1/2) * A_hat * D^(-1/2), Kipf et al. 2016
            adjms += np.eye(mol_atoms)
            degree = np.array(adjms.sum(1))
            deg_inv_sqrt = np.power(degree, -0.5)
            deg_inv_sqrt[np.isinf(deg_inv_sqrt)] = 0.
            deg_inv_sqrt = np.diag(deg_inv_sqrt)

            adjms = np.matmul(np.matmul(deg_inv_sqrt, adjms), deg_inv_sqrt)

            adjm_tensor[mol_idx, : mol_atoms, : mol_atoms] = adjms

            # Relative position matrix
            for atom_idx in range(mol_atoms):
                pos_c = batch_c[mol_idx][atom_idx]

                for neighbor_idx in range(mol_atoms):
                    pos_n = batch_c[mol_idx][neighbor_idx]

                    # Direction should be Neighbor -> Center
                    n_to_c = [pos_c[0] - pos_n[0], pos_c[1] - pos_n[1], pos_c[2] - pos_n[2]]
                    posn_tensor[mol_idx, atom_idx, neighbor_idx, :] = n_to_c

        return [atom_tensor, adjm_tensor, posn_tensor] 
Example #22
Source File: converter.py    From 3DGCN with MIT License 4 votes vote down vote up
def optimize_conformer(idx, m, prop, algo="MMFF"):
    print("Calculating {}: {} ...".format(idx, Chem.MolToSmiles(m)))

    mol = Chem.AddHs(m)

    if algo == "ETKDG":
        # Landrum et al. DOI: 10.1021/acs.jcim.5b00654
        k = AllChem.EmbedMolecule(mol, AllChem.ETKDG())

        if k != 0:
            return None, None

    elif algo == "UFF":
        # Universal Force Field
        AllChem.EmbedMultipleConfs(mol, 50, pruneRmsThresh=0.5)
        try:
            arr = AllChem.UFFOptimizeMoleculeConfs(mol, maxIters=2000)
        except ValueError:
            return None, None

        if not arr:
            return None, None

        else:
            arr = AllChem.UFFOptimizeMoleculeConfs(mol, maxIters=20000)
            idx = np.argmin(arr, axis=0)[1]
            conf = mol.GetConformers()[idx]
            mol.RemoveAllConformers()
            mol.AddConformer(conf)

    elif algo == "MMFF":
        # Merck Molecular Force Field
        AllChem.EmbedMultipleConfs(mol, 50, pruneRmsThresh=0.5)
        try:
            arr = AllChem.MMFFOptimizeMoleculeConfs(mol, maxIters=2000)
        except ValueError:
            return None, None

        if not arr:
            return None, None

        else:
            arr = AllChem.MMFFOptimizeMoleculeConfs(mol, maxIters=20000)
            idx = np.argmin(arr, axis=0)[1]
            conf = mol.GetConformers()[idx]
            mol.RemoveAllConformers()
            mol.AddConformer(conf)

    mol = Chem.RemoveHs(mol)

    return mol, prop 
Example #23
Source File: test_rdkitfixer.py    From oddt with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_add_missing_atoms():
    # add missing atom at tryptophan
    molfile = os.path.join(test_dir, '5dhh_missingatomTRP.pdb')
    mol = Chem.MolFromPDBFile(molfile, sanitize=True)
    mol = Chem.RemoveHs(mol, sanitize=False)

    assert mol.GetNumAtoms() == 26
    mol = PreparePDBMol(mol, add_missing_atoms=True)
    assert mol.GetNumAtoms() == 27

    atom = mol.GetAtomWithIdx(21)
    assert atom.GetAtomicNum() == 6
    info = atom.GetPDBResidueInfo()
    assert info.GetResidueName() == 'TRP'
    assert info.GetResidueNumber() == 175
    assert info.GetName().strip() == 'C9'
    assert atom.IsInRing()
    assert atom.GetIsAromatic()
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE

    # add whole ring to tyrosine
    molfile = os.path.join(test_dir, '3cx9_TYR.pdb')
    mol = Chem.MolFromPDBFile(molfile, sanitize=True)
    mol = Chem.RemoveHs(mol, sanitize=False)

    assert mol.GetNumAtoms() == 23
    mol = PreparePDBMol(mol, add_missing_atoms=True)
    assert mol.GetNumAtoms() == 29

    atom = mol.GetAtomWithIdx(17)
    assert atom.GetAtomicNum() == 6
    info = atom.GetPDBResidueInfo()
    assert info.GetResidueName() == 'TYR'
    assert info.GetResidueNumber() == 138
    assert info.GetName().strip() == 'C6'
    assert atom.IsInRing()
    assert atom.GetIsAromatic()
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE

    # missing protein backbone atoms
    molfile = os.path.join(test_dir, '5ar7_HIS.pdb')
    mol = Chem.MolFromPDBFile(molfile, sanitize=False)
    mol = Chem.RemoveHs(mol, sanitize=False)

    assert mol.GetNumAtoms() == 21
    assert mol.GetNumBonds() == 19
    mol = PreparePDBMol(mol, add_missing_atoms=True)
    assert mol.GetNumAtoms() == 25
    assert mol.GetNumBonds() == 25

    # missing nucleotide backbone atoms
    molfile = os.path.join(test_dir, '1bpx_missingBase.pdb')
    mol = Chem.MolFromPDBFile(molfile, sanitize=False)
    mol = Chem.RemoveHs(mol, sanitize=False)

    assert mol.GetNumAtoms() == 301
    assert mol.GetNumBonds() == 333
    mol = PreparePDBMol(mol, add_missing_atoms=True)
    assert mol.GetNumAtoms() == 328
    assert mol.GetNumBonds() == 366 
Example #24
Source File: converter.py    From ARC with MIT License 4 votes vote down vote up
def to_rdkit_mol(mol, remove_h=False, sanitize=True):
    """
    Convert a molecular structure to an RDKit RDMol object. Uses
    `RDKit <http://rdkit.org/>`_ to perform the conversion.
    Perceives aromaticity.
    Adopted from rmgpy/molecule/converter.py

    Args:
        mol (Molecule): An RMG Molecule object for the conversion.
        remove_h (bool, optional): Whether to remove hydrogen atoms from the molecule, ``True`` to remove.
        sanitize (bool, optional): Whether to sanitize the RDKit molecule, ``True`` to sanitize.

    Returns:
        RDMol: An RDKit molecule object corresponding to the input RMG Molecule object.
    """
    atom_id_map = dict()

    # only manipulate a copy of ``mol``
    mol_copy = mol.copy(deep=True)
    if not mol_copy.atom_ids_valid():
        mol_copy.assign_atom_ids()
    for i, atom in enumerate(mol_copy.atoms):
        atom_id_map[atom.id] = i  # keeps the original atom order before sorting
    # mol_copy.sort_atoms()  # Sort the atoms before converting to ensure output is consistent between different runs
    atoms_copy = mol_copy.vertices

    rd_mol = Chem.rdchem.EditableMol(Chem.rdchem.Mol())
    for rmg_atom in atoms_copy:
        rd_atom = Chem.rdchem.Atom(rmg_atom.element.symbol)
        if rmg_atom.element.isotope != -1:
            rd_atom.SetIsotope(rmg_atom.element.isotope)
        rd_atom.SetNumRadicalElectrons(rmg_atom.radical_electrons)
        rd_atom.SetFormalCharge(rmg_atom.charge)
        if rmg_atom.element.symbol == 'C' and rmg_atom.lone_pairs == 1 and mol_copy.multiplicity == 1:
            # hard coding for carbenes
            rd_atom.SetNumRadicalElectrons(2)
        if not (remove_h and rmg_atom.symbol == 'H'):
            rd_mol.AddAtom(rd_atom)

    rd_bonds = Chem.rdchem.BondType
    orders = {'S': rd_bonds.SINGLE, 'D': rd_bonds.DOUBLE, 'T': rd_bonds.TRIPLE, 'B': rd_bonds.AROMATIC,
              'Q': rd_bonds.QUADRUPLE}
    # Add the bonds
    for atom1 in atoms_copy:
        for atom2, bond12 in atom1.edges.items():
            if bond12.is_hydrogen_bond():
                continue
            if atoms_copy.index(atom1) < atoms_copy.index(atom2):
                rd_mol.AddBond(atom_id_map[atom1.id], atom_id_map[atom2.id], orders[bond12.get_order_str()])

    # Make editable mol and rectify the molecule
    rd_mol = rd_mol.GetMol()
    if sanitize:
        Chem.SanitizeMol(rd_mol)
    if remove_h:
        rd_mol = Chem.RemoveHs(rd_mol, sanitize=sanitize)
    return rd_mol 
Example #25
Source File: chemistry.py    From guacamol with MIT License 4 votes vote down vote up
def filter_and_canonicalize(smiles: str, holdout_set, holdout_fps, neutralization_rxns, tanimoto_cutoff=0.5,
                            include_stereocenters=False):
    """
    Args:
        smiles: the molecule to process
        holdout_set: smiles of the holdout set
        holdout_fps: ECFP4 fingerprints of the holdout set
        neutralization_rxns: neutralization rdkit reactions
        tanimoto_cutoff: Remove molecules with a higher ECFP4 tanimoto similarity than this cutoff from the set
        include_stereocenters: whether to keep stereocenters during canonicalization

    Returns:
        list with canonical smiles as a list with one element, or a an empty list. This is to perform a flatmap:
    """
    try:
        # Drop out if too long
        if len(smiles) > 200:
            return []
        mol = Chem.MolFromSmiles(smiles)
        # Drop out if invalid
        if mol is None:
            return []
        mol = Chem.RemoveHs(mol)

        # We only accept molecules consisting of H, B, C, N, O, F, Si, P, S, Cl, aliphatic Se, Br, I.
        metal_smarts = Chem.MolFromSmarts('[!#1!#5!#6!#7!#8!#9!#14!#15!#16!#17!#34!#35!#53]')

        has_metal = mol.HasSubstructMatch(metal_smarts)

        # Exclude molecules containing the forbidden elements.
        if has_metal:
            print(f'metal {smiles}')
            return []

        canon_smi = Chem.MolToSmiles(mol, isomericSmiles=include_stereocenters)

        # Drop out if too long canonicalized:
        if len(canon_smi) > 100:
            return []
        # Balance charges if unbalanced
        if canon_smi.count('+') - canon_smi.count('-') != 0:
            new_mol, changed = neutralise_charges(mol, reactions=neutralization_rxns)
            if changed:
                mol = new_mol
                canon_smi = Chem.MolToSmiles(mol, isomericSmiles=include_stereocenters)

        # Get most similar to holdout fingerprints, and exclude too similar molecules.
        max_tanimoto = highest_tanimoto_precalc_fps(mol, holdout_fps)
        if max_tanimoto < tanimoto_cutoff and canon_smi not in holdout_set:
            return [canon_smi]
        else:
            print("Exclude: {} {}".format(canon_smi, max_tanimoto))
    except Exception as e:
        print(e)
    return []