Python rdkit.Chem.AllChem.MolFromSmiles() Examples

The following are 23 code examples of rdkit.Chem.AllChem.MolFromSmiles(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rdkit.Chem.AllChem , or try the search function .
Example #1
Source File: encoder.py    From mhfp with MIT License 6 votes vote down vote up
def secfp_from_smiles(
        in_smiles, length=2048, radius=3, rings=True, kekulize=True, sanitize=False
    ):
        """Creates a folded binary vector fingerprint of a input SMILES string.

    Arguments:
      in_smiles {string} -- A valid SMILES string
      length {int} -- The length of the folded fingerprint (default: {2048})
      radius {int} -- The MHFP radius (a radius of 3 corresponds to SECFP6)  (default: {3})
      rings {boolean} -- Whether or not to include rings in the shingling (default: {True})
      kekulize {boolean} -- Whether or not to kekulize the extracted SMILES (default: {True})
      sanitize {boolean} -- Whether or not to sanitize the SMILES when parsing it using RDKit  (default: {False})
    
    Returns:
      numpy.ndarray -- The folded fingerprint.
    """
        return MHFPEncoder.secfp_from_mol(
            AllChem.MolFromSmiles(in_smiles, sanitize=sanitize),
            length=length,
            radius=radius,
            rings=rings,
            kekulize=kekulize,
        ) 
Example #2
Source File: encoder.py    From mhfp with MIT License 6 votes vote down vote up
def shingling_from_smiles(
        in_smiles, radius=3, rings=True, kekulize=True, min_radius=1, sanitize=False
    ):
        """Creates a molecular shingling from a SMILES string.
    
    Arguments:
      in_smiles {string} -- A valid SMILES string
      radius {int} -- The MHFP radius (a radius of 3 corresponds to MHFP6)  (default: {3})
      rings {boolean} -- Whether or not to include rings in the shingling (default: {True})
      kekulize {boolean} -- Whether or not to kekulize the extracted SMILES (default: {True})
      min_radius {int} -- The minimum radius that is used to extract n-grams (default: {1})
      sanitize {boolean} -- Whether or not to sanitize the SMILES when parsing it using RDKit  (default: {False})
    
    Returns:
      list -- The molecular shingling.
    """

        return MHFPEncoder.shingling_from_mol(
            AllChem.MolFromSmiles(in_smiles, sanitize=sanitize),
            rings=rings,
            radius=radius,
            kekulize=True,
            min_radius=min_radius,
        ) 
Example #3
Source File: mol_metrics.py    From ORGAN with GNU General Public License v2.0 6 votes vote down vote up
def NP_score(smile):
    mol = Chem.MolFromSmiles(smile)
    fp = Chem.GetMorganFingerprint(mol, 2)
    bits = fp.GetNonzeroElements()

    # calculating the score
    score = 0.
    for bit in bits:
        score += NP_model.get(bit, 0)
    score /= float(mol.GetNumAtoms())

    # preventing score explosion for exotic molecules
    if score > 4:
        score = 4. + math.log10(score - 4. + 1.)
    if score < -4:
        score = -4. - math.log10(-4. - score + 1.)
    val = np.clip(remap(score, -3, 1), 0.0, 1.0)
    return val 
Example #4
Source File: mol_metrics.py    From ORGAN with GNU General Public License v2.0 6 votes vote down vote up
def batch_mixed_diversity(smiles, set_smiles):
    # set smiles
    rand_smiles = random.sample(set_smiles, 100)
    rand_mols = [Chem.MolFromSmiles(s) for s in rand_smiles]
    fps = [Chem.GetMorganFingerprintAsBitVect(
        m, 4, nBits=2048) for m in rand_mols]
    # gen smiles
    rand_gen_smiles = random.sample(smiles, 500)

    gen_mols = [Chem.MolFromSmiles(s) for s in smiles]
    fps = [Chem.GetMorganFingerprintAsBitVect(
        m, 4, nBits=2048) for m in gen_mols]

    vals = [diversity(s, fps) + diversity(s, fps) if verify_sequence(s)
            else 0.0 for s in smiles]

    return vals 
Example #5
Source File: mol_utils.py    From chemical_vae with Apache License 2.0 5 votes vote down vote up
def canon_smiles(smi):
    return Chem.MolToSmiles(Chem.MolFromSmiles(smi), isomericSmiles=True, canonical=True) 
Example #6
Source File: mol_utils.py    From chemical_vae with Apache License 2.0 5 votes vote down vote up
def CheckSmiFeasible(smi):
    # See if you can make a smiles with mol object
    #    if you can't, then skip
    try:
        get_molecule_smi(Chem.MolFromSmiles(smi))
    except:
        return False
    return True 
Example #7
Source File: mol_utils.py    From chemical_vae with Apache License 2.0 5 votes vote down vote up
def verify_smiles(smile):
    return (smile != '') and pd.notnull(smile) and (Chem.MolFromSmiles(smile) is not None) 
Example #8
Source File: mol_utils.py    From chemical_vae with Apache License 2.0 5 votes vote down vote up
def smiles_to_mol(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        return mol
    except:
        pass
    return None 
Example #9
Source File: 2_to_fingerprint.py    From mhfp with MIT License 5 votes vote down vote up
def convert(subset):
    target = '/cluster/chembl/chembl.' + str(subset) + '.smi'
    actives = pd.read_csv(target, sep=' ', usecols=[0], header=None)
    
    mh = MHFPEncoder()

    with open('/cluster/chembl/chembl.' + str(subset) + '.mhfp6', 'w+') as f:
        for _, row in actives.iterrows():
            mol = AllChem.MolFromSmiles(row[0])
            if mol:
                fp_vals = ','.join(map(str, mh.encode_mol(mol)))

                f.write(fp_vals + '\n')

    with open('/cluster/chembl/chembl.' + str(subset) + '.mhecfp4', 'w+') as f:
        for _, row in actives.iterrows():
            mol = AllChem.MolFromSmiles(row[0])
            if mol:
                fp_vals = ','.join(map(str, mh.from_sparse_array([*AllChem.GetMorganFingerprint(mol, 2).GetNonzeroElements()])))

                f.write(fp_vals + '\n')

    with open('/cluster/chembl/chembl.' + str(subset) + '.ecfp4', 'w+') as f:
        for _, row in actives.iterrows():
            mol = AllChem.MolFromSmiles(row[0])
            if mol:
                fp_vals = ','.join(map(str, AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048)))

                f.write(fp_vals + '\n') 
Example #10
Source File: mol_metrics.py    From ORGAN with GNU General Public License v2.0 5 votes vote down vote up
def substructure_match(smile, train_smiles=None, sub_mol=None):
    mol = Chem.MolFromSmiles(smile)
    val = mol.HasSubstructMatch(sub_mol)
    return int(val)

#====== NP-likeliness 
Example #11
Source File: mol_metrics.py    From ORGAN with GNU General Public License v2.0 5 votes vote down vote up
def druglikeliness(smile, train_smiles):
    try:
        val = qed(Chem.MolFromSmiles(smile))
        return val
    except:
        return 0.0
    return val 
Example #12
Source File: mol_metrics.py    From ORGAN with GNU General Public License v2.0 5 votes vote down vote up
def diversity(smile, fps):
    val = 0.0
    low_rand_dst = 0.9
    mean_div_dst = 0.945
    ref_mol = Chem.MolFromSmiles(smile)
    ref_fps = Chem.GetMorganFingerprintAsBitVect(ref_mol, 4, nBits=2048)
    dist = DataStructs.BulkTanimotoSimilarity(
        ref_fps, fps, returnDistance=True)
    mean_dist = np.mean(np.array(dist))
    val = remap(mean_dist, low_rand_dst, mean_div_dst)
    val = np.clip(val, 0.0, 1.0)
    return val

#============== 
Example #13
Source File: mol_metrics.py    From ORGAN with GNU General Public License v2.0 5 votes vote down vote up
def batch_diversity(smiles, set_smiles):
    rand_smiles = random.sample(set_smiles, 100)
    rand_mols = [Chem.MolFromSmiles(s) for s in rand_smiles]
    fps = [Chem.GetMorganFingerprintAsBitVect(
        m, 4, nBits=2048) for m in rand_mols]
    vals = [diversity(s, fps) if verify_sequence(s)
            else 0.0 for s in smiles]
    return vals 
Example #14
Source File: mol_metrics.py    From ORGAN with GNU General Public License v2.0 5 votes vote down vote up
def verify_sequence(smile):
    mol = Chem.MolFromSmiles(smile)
    return smile != '' and mol is not None and mol.GetNumAtoms() > 1


# def build_vocab(smiles, pad_char='_', start_char='^'):
#     i = 1
#     char_dict, ord_dict = {start_char: 0}, {0: start_char}
#     for smile in smiles:
#         for c in smile:
#             if c not in char_dict:
#                 char_dict[c] = i
#                 ord_dict[i] = c
#                 i += 1
#     char_dict[pad_char], ord_dict[i] = i, pad_char
#     return char_dict, ord_dict


# def pad(smile, n, pad_char='_'):
#     if n < len(smile):
#         return smile
#     return smile + pad_char * (n - len(smile))


# def unpad(smile, pad_char='_'): return smile.rstrip(pad_char)


# def encode(smile, max_len, char_dict): return [
#     char_dict[c] for c in pad(smile, max_len)]


# def decode(ords, ord_dict): return unpad(
#     ''.join([ord_dict[o] for o in ords])) 
Example #15
Source File: mol_metrics.py    From ORGAN with GNU General Public License v2.0 5 votes vote down vote up
def canon_smile(smile):
    return MolToSmiles(MolFromSmiles(smile)) 
Example #16
Source File: tests.py    From django-rdkit with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setUp(self):
        mol = Chem.MolFromSmiles('c1cocc1')
        CtabModel.objects.create(ctab=Chem.MolToMolBlock(mol))
        CtabModel.objects.create(ctab='rubbish') 
Example #17
Source File: tests.py    From django-rdkit with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_pkl_io(self):
        bfps = {}
        for smiles in SMILES_SAMPLE:
            mol = Chem.MolFromSmiles(smiles)
            bfp = Chem.GetMorganFingerprintAsBitVect(mol, 2, 512)
            obj = BfpModel.objects.create(bfp=bfp)
            bfps[obj.pk] = bfp

        for obj in BfpModel.objects.all():
            self.assertTrue(obj.pk in bfps)
            ibfp = bfps[obj.pk]
            obfp = obj.bfp
            self.assertEqual(list(ibfp.GetOnBits()),
                             list(obfp.GetOnBits())) 
Example #18
Source File: tests.py    From django-rdkit with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_issubstruct_lookup(self):

        objs = MoleculeModel.objects.filter(molecule__issubstruct='CCN1c2ccccc2Sc2ccccc21')
        self.assertEqual(objs.count(), 2)

        objs = MoleculeModel.objects.filter(molecule__issubstruct='CC[N+]([O-])(CC)CCCN1c2ccccc2S(=O)c2ccccc21')
        self.assertEqual(objs.count(), 4)

        objs = MoleculeModel.objects.filter(molecule__issubstruct=Chem.MolFromSmiles('CC[N+]([O-])(CC)CCCN1c2ccccc2S(=O)c2ccccc21'))
        self.assertEqual(objs.count(), 4) 
Example #19
Source File: tests.py    From django-rdkit with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_hassubstruct_lookup(self):

        objs = MoleculeModel.objects.filter(molecule__hassubstruct='C1=C(C)C=CC=C1')
        self.assertEqual(objs.count(), 61)

        objs = MoleculeModel.objects.filter(
            molecule__hassubstruct=MOL_FROM_SMILES(Value('C1=C(C)C=CC=C1')))
        self.assertEqual(objs.count(), 61)

        objs = MoleculeModel.objects.filter(molecule__hassubstruct='C1=CC=CC=C1')
        cnt1 = objs.count()
        self.assertEqual(cnt1, 70)

        objs = MoleculeModel.objects.filter(molecule__hassubstruct='C1=CN=CC=C1')
        cnt2 = objs.count()
        self.assertEqual(cnt2, 7)

        objs = MoleculeModel.objects.filter(
            Q(molecule__hassubstruct='C1=CC=CC=C1') |
            Q(molecule__hassubstruct='C1=CN=CC=C1'),
        )
        cnt3 = objs.count()
        self.assertEqual(cnt3, 73)
        self.assertTrue(cnt3 <= cnt1 + cnt2)

        qmol = QMOL(Value('c1[c,n]cccc1'))
        objs = MoleculeModel.objects.filter(molecule__hassubstruct=qmol)
        self.assertEqual(objs.count(), cnt3)

        objs = MoleculeModel.objects.filter(molecule__hassubstruct=Chem.MolFromSmiles('C1=CN=CC=C1'))
        cnt4 = objs.count()
        self.assertEqual(cnt2, 7) 
Example #20
Source File: tests.py    From django-rdkit with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_exact_lookup(self):

        objs = MoleculeModel.objects.filter(molecule='COC(c1ccccc1)c1ccccc1')
        self.assertEqual(objs.count(), 1)

        objs = MoleculeModel.objects.filter(molecule='Nc1ccc(Cl)nc1')
        self.assertEqual(objs.count(), 1)

        objs = MoleculeModel.objects.filter(molecule=Chem.MolFromSmiles('Nc1ccc(Cl)nc1'))
        self.assertEqual(objs.count(), 1)

        objs = MoleculeModel.objects.filter(molecule=MOL_FROM_SMILES(Value('Nc1ccc(Cl)nc1')))
        self.assertEqual(objs.count(), 1) 
Example #21
Source File: neural_fp.py    From conv_qsar_fast with MIT License 5 votes vote down vote up
def sizeAttributeVectors(molecular_attributes = False):
	m = AllChem.MolFromSmiles('CC')
	g = molToGraph(m, molecular_attributes = molecular_attributes)
	a = g.nodes[0]
	b = g.edges[0]
	return len(a.attributes), len(b.attributes) 
Example #22
Source File: neural_fp.py    From conv_qsar_fast with MIT License 5 votes vote down vote up
def sizeAttributeVector(molecular_attributes = False):
	m = AllChem.MolFromSmiles('CC')
	g = molToGraph(m, molecular_attributes = molecular_attributes)
	a = g.nodes[0]
	b = g.edges[0]
	return len(a.attributes) + len(b.attributes) 
Example #23
Source File: load_lowe_examples_into_db_details.py    From ochem_predict_nn with MIT License 4 votes vote down vote up
def mol_to_dic(node, withAmounts = False):
	'''Converts a node containing molecule information into a
	dictionary'''
	dic = {}
	# Get name
	dic['name'] = str(node.getElementsByTagName('name')[0].firstChild.nodeValue)
	# If exact entity match, more data is available
	#print(node.toprettyxml())
	#entityType = node.getElementsByTagName('dl:entityType')[0].firstChild.nodeValue
	#if entityType == 'exact' or entityType == 'definiteReference':
	identifiers = {
		child.attributes.getNamedItem('dictRef').value : \
		child.attributes.getNamedItem('value').value \
		for child in node.getElementsByTagName('identifier')
	}
	if 'cml:inchi' in identifiers.keys():
		mol = AllChem.MolFromInchi(str(identifiers['cml:inchi']))
	elif 'cml:smiles' in identifiers.keys():
		mol = AllChem.MolFromSmiles(str(identifiers['cml:smiles']))
	else:
		print('identifiers: {}'.format(identifiers.keys()))
		raise ValueError('No molecular identifier for {}'.format(dic['name']))
	if not mol: raise ValueError('Couldnt parse molecule: {}'.format(identifiers))

	Chem.SanitizeMol(mol)
	dic['smiles'] = AllChem.MolToSmiles(mol, isomericSmiles=True)
	dic['inchi'] = AllChem.MolToInchi(mol)
	# elif entityType == 'chemicalClass':
	# 	pass # name is all we get
	# else:
	# 	raise ValueError('Unknown entityType for molecule: {}'.format(entityType))
	# Quantity?
	if withAmounts:
		amounts = {
			child.attributes.getNamedItem('units').value : \
			child.firstChild.nodeValue \
			for child in node.getElementsByTagName('amount')
		}
		if 'unit:percentYield' in amounts.keys():
			dic['yield'] = float(amounts['unit:percentYield'])
		if 'unit:g' in amounts.keys():
			dic['amount(g)'] = float(amounts['unit:g'])
	return dic