Python rdkit.Chem.AllChem.MolFromSmiles() Examples

The following are 23 code examples of rdkit.Chem.AllChem.MolFromSmiles(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rdkit.Chem.AllChem , or try the search function

Example #1

Source File: encoder.py From mhfp with MIT License

6 votes

def secfp_from_smiles(
        in_smiles, length=2048, radius=3, rings=True, kekulize=True, sanitize=False
    ):
        """Creates a folded binary vector fingerprint of a input SMILES string.

    Arguments:
      in_smiles {string} -- A valid SMILES string
      length {int} -- The length of the folded fingerprint (default: {2048})
      radius {int} -- The MHFP radius (a radius of 3 corresponds to SECFP6)  (default: {3})
      rings {boolean} -- Whether or not to include rings in the shingling (default: {True})
      kekulize {boolean} -- Whether or not to kekulize the extracted SMILES (default: {True})
      sanitize {boolean} -- Whether or not to sanitize the SMILES when parsing it using RDKit  (default: {False})
    
    Returns:
      numpy.ndarray -- The folded fingerprint.
    """
        return MHFPEncoder.secfp_from_mol(
            AllChem.MolFromSmiles(in_smiles, sanitize=sanitize),
            length=length,
            radius=radius,
            rings=rings,
            kekulize=kekulize,
        )

Example #2

Source File: encoder.py From mhfp with MIT License

6 votes

def shingling_from_smiles(
        in_smiles, radius=3, rings=True, kekulize=True, min_radius=1, sanitize=False
    ):
        """Creates a molecular shingling from a SMILES string.
    
    Arguments:
      in_smiles {string} -- A valid SMILES string
      radius {int} -- The MHFP radius (a radius of 3 corresponds to MHFP6)  (default: {3})
      rings {boolean} -- Whether or not to include rings in the shingling (default: {True})
      kekulize {boolean} -- Whether or not to kekulize the extracted SMILES (default: {True})
      min_radius {int} -- The minimum radius that is used to extract n-grams (default: {1})
      sanitize {boolean} -- Whether or not to sanitize the SMILES when parsing it using RDKit  (default: {False})
    
    Returns:
      list -- The molecular shingling.
    """

        return MHFPEncoder.shingling_from_mol(
            AllChem.MolFromSmiles(in_smiles, sanitize=sanitize),
            rings=rings,
            radius=radius,
            kekulize=True,
            min_radius=min_radius,
        )

Example #3

Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0

6 votes

def NP_score(smile):
    mol = Chem.MolFromSmiles(smile)
    fp = Chem.GetMorganFingerprint(mol, 2)
    bits = fp.GetNonzeroElements()

    # calculating the score
    score = 0.
    for bit in bits:
        score += NP_model.get(bit, 0)
    score /= float(mol.GetNumAtoms())

    # preventing score explosion for exotic molecules
    if score > 4:
        score = 4. + math.log10(score - 4. + 1.)
    if score < -4:
        score = -4. - math.log10(-4. - score + 1.)
    val = np.clip(remap(score, -3, 1), 0.0, 1.0)
    return val

Example #4

Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0

6 votes

def batch_mixed_diversity(smiles, set_smiles):
    # set smiles
    rand_smiles = random.sample(set_smiles, 100)
    rand_mols = [Chem.MolFromSmiles(s) for s in rand_smiles]
    fps = [Chem.GetMorganFingerprintAsBitVect(
        m, 4, nBits=2048) for m in rand_mols]
    # gen smiles
    rand_gen_smiles = random.sample(smiles, 500)

    gen_mols = [Chem.MolFromSmiles(s) for s in smiles]
    fps = [Chem.GetMorganFingerprintAsBitVect(
        m, 4, nBits=2048) for m in gen_mols]

    vals = [diversity(s, fps) + diversity(s, fps) if verify_sequence(s)
            else 0.0 for s in smiles]

    return vals

Example #5

Source File: mol_utils.py From chemical_vae with Apache License 2.0

5 votes

def canon_smiles(smi):
    return Chem.MolToSmiles(Chem.MolFromSmiles(smi), isomericSmiles=True, canonical=True)

Example #6

Source File: mol_utils.py From chemical_vae with Apache License 2.0

5 votes

def CheckSmiFeasible(smi):
    # See if you can make a smiles with mol object
    #    if you can't, then skip
    try:
        get_molecule_smi(Chem.MolFromSmiles(smi))
    except:
        return False
    return True

Example #7

Source File: mol_utils.py From chemical_vae with Apache License 2.0

5 votes

def verify_smiles(smile):
    return (smile != '') and pd.notnull(smile) and (Chem.MolFromSmiles(smile) is not None)

Example #8

Source File: mol_utils.py From chemical_vae with Apache License 2.0

5 votes

def smiles_to_mol(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        return mol
    except:
        pass
    return None

Example #9

Source File: 2_to_fingerprint.py From mhfp with MIT License

5 votes

def convert(subset):
    target = '/cluster/chembl/chembl.' + str(subset) + '.smi'
    actives = pd.read_csv(target, sep=' ', usecols=[0], header=None)
    
    mh = MHFPEncoder()

    with open('/cluster/chembl/chembl.' + str(subset) + '.mhfp6', 'w+') as f:
        for _, row in actives.iterrows():
            mol = AllChem.MolFromSmiles(row[0])
            if mol:
                fp_vals = ','.join(map(str, mh.encode_mol(mol)))

                f.write(fp_vals + '\n')

    with open('/cluster/chembl/chembl.' + str(subset) + '.mhecfp4', 'w+') as f:
        for _, row in actives.iterrows():
            mol = AllChem.MolFromSmiles(row[0])
            if mol:
                fp_vals = ','.join(map(str, mh.from_sparse_array([*AllChem.GetMorganFingerprint(mol, 2).GetNonzeroElements()])))

                f.write(fp_vals + '\n')

    with open('/cluster/chembl/chembl.' + str(subset) + '.ecfp4', 'w+') as f:
        for _, row in actives.iterrows():
            mol = AllChem.MolFromSmiles(row[0])
            if mol:
                fp_vals = ','.join(map(str, AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048)))

                f.write(fp_vals + '\n')

Example #10

Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0

5 votes

def substructure_match(smile, train_smiles=None, sub_mol=None):
    mol = Chem.MolFromSmiles(smile)
    val = mol.HasSubstructMatch(sub_mol)
    return int(val)

#====== NP-likeliness

Example #11

Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0

5 votes

def druglikeliness(smile, train_smiles):
    try:
        val = qed(Chem.MolFromSmiles(smile))
        return val
    except:
        return 0.0
    return val

Example #12

Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0

5 votes

def diversity(smile, fps):
    val = 0.0
    low_rand_dst = 0.9
    mean_div_dst = 0.945
    ref_mol = Chem.MolFromSmiles(smile)
    ref_fps = Chem.GetMorganFingerprintAsBitVect(ref_mol, 4, nBits=2048)
    dist = DataStructs.BulkTanimotoSimilarity(
        ref_fps, fps, returnDistance=True)
    mean_dist = np.mean(np.array(dist))
    val = remap(mean_dist, low_rand_dst, mean_div_dst)
    val = np.clip(val, 0.0, 1.0)
    return val

#==============

Example #13

Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0

5 votes

def batch_diversity(smiles, set_smiles):
    rand_smiles = random.sample(set_smiles, 100)
    rand_mols = [Chem.MolFromSmiles(s) for s in rand_smiles]
    fps = [Chem.GetMorganFingerprintAsBitVect(
        m, 4, nBits=2048) for m in rand_mols]
    vals = [diversity(s, fps) if verify_sequence(s)
            else 0.0 for s in smiles]
    return vals

Example #14

Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0

5 votes

def verify_sequence(smile):
    mol = Chem.MolFromSmiles(smile)
    return smile != '' and mol is not None and mol.GetNumAtoms() > 1


# def build_vocab(smiles, pad_char='_', start_char='^'):
#     i = 1
#     char_dict, ord_dict = {start_char: 0}, {0: start_char}
#     for smile in smiles:
#         for c in smile:
#             if c not in char_dict:
#                 char_dict[c] = i
#                 ord_dict[i] = c
#                 i += 1
#     char_dict[pad_char], ord_dict[i] = i, pad_char
#     return char_dict, ord_dict


# def pad(smile, n, pad_char='_'):
#     if n < len(smile):
#         return smile
#     return smile + pad_char * (n - len(smile))


# def unpad(smile, pad_char='_'): return smile.rstrip(pad_char)


# def encode(smile, max_len, char_dict): return [
#     char_dict[c] for c in pad(smile, max_len)]


# def decode(ords, ord_dict): return unpad(
#     ''.join([ord_dict[o] for o in ords]))

Example #15

Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0

5 votes

def canon_smile(smile):
    return MolToSmiles(MolFromSmiles(smile))

Example #16

Source File: tests.py From django-rdkit with BSD 3-Clause "New" or "Revised" License

5 votes

def setUp(self):
        mol = Chem.MolFromSmiles('c1cocc1')
        CtabModel.objects.create(ctab=Chem.MolToMolBlock(mol))
        CtabModel.objects.create(ctab='rubbish')

Example #17

Source File: tests.py From django-rdkit with BSD 3-Clause "New" or "Revised" License

5 votes

def test_pkl_io(self):
        bfps = {}
        for smiles in SMILES_SAMPLE:
            mol = Chem.MolFromSmiles(smiles)
            bfp = Chem.GetMorganFingerprintAsBitVect(mol, 2, 512)
            obj = BfpModel.objects.create(bfp=bfp)
            bfps[obj.pk] = bfp

        for obj in BfpModel.objects.all():
            self.assertTrue(obj.pk in bfps)
            ibfp = bfps[obj.pk]
            obfp = obj.bfp
            self.assertEqual(list(ibfp.GetOnBits()),
                             list(obfp.GetOnBits()))

Example #18

Source File: tests.py From django-rdkit with BSD 3-Clause "New" or "Revised" License

5 votes

def test_issubstruct_lookup(self):

        objs = MoleculeModel.objects.filter(molecule__issubstruct='CCN1c2ccccc2Sc2ccccc21')
        self.assertEqual(objs.count(), 2)

        objs = MoleculeModel.objects.filter(molecule__issubstruct='CC[N+]([O-])(CC)CCCN1c2ccccc2S(=O)c2ccccc21')
        self.assertEqual(objs.count(), 4)

        objs = MoleculeModel.objects.filter(molecule__issubstruct=Chem.MolFromSmiles('CC[N+]([O-])(CC)CCCN1c2ccccc2S(=O)c2ccccc21'))
        self.assertEqual(objs.count(), 4)

Example #19

Source File: tests.py From django-rdkit with BSD 3-Clause "New" or "Revised" License

5 votes

def test_hassubstruct_lookup(self):

        objs = MoleculeModel.objects.filter(molecule__hassubstruct='C1=C(C)C=CC=C1')
        self.assertEqual(objs.count(), 61)

        objs = MoleculeModel.objects.filter(
            molecule__hassubstruct=MOL_FROM_SMILES(Value('C1=C(C)C=CC=C1')))
        self.assertEqual(objs.count(), 61)

        objs = MoleculeModel.objects.filter(molecule__hassubstruct='C1=CC=CC=C1')
        cnt1 = objs.count()
        self.assertEqual(cnt1, 70)

        objs = MoleculeModel.objects.filter(molecule__hassubstruct='C1=CN=CC=C1')
        cnt2 = objs.count()
        self.assertEqual(cnt2, 7)

        objs = MoleculeModel.objects.filter(
            Q(molecule__hassubstruct='C1=CC=CC=C1') |
            Q(molecule__hassubstruct='C1=CN=CC=C1'),
        )
        cnt3 = objs.count()
        self.assertEqual(cnt3, 73)
        self.assertTrue(cnt3 <= cnt1 + cnt2)

        qmol = QMOL(Value('c1[c,n]cccc1'))
        objs = MoleculeModel.objects.filter(molecule__hassubstruct=qmol)
        self.assertEqual(objs.count(), cnt3)

        objs = MoleculeModel.objects.filter(molecule__hassubstruct=Chem.MolFromSmiles('C1=CN=CC=C1'))
        cnt4 = objs.count()
        self.assertEqual(cnt2, 7)

Example #20

Source File: tests.py From django-rdkit with BSD 3-Clause "New" or "Revised" License

5 votes

def test_exact_lookup(self):

        objs = MoleculeModel.objects.filter(molecule='COC(c1ccccc1)c1ccccc1')
        self.assertEqual(objs.count(), 1)

        objs = MoleculeModel.objects.filter(molecule='Nc1ccc(Cl)nc1')
        self.assertEqual(objs.count(), 1)

        objs = MoleculeModel.objects.filter(molecule=Chem.MolFromSmiles('Nc1ccc(Cl)nc1'))
        self.assertEqual(objs.count(), 1)

        objs = MoleculeModel.objects.filter(molecule=MOL_FROM_SMILES(Value('Nc1ccc(Cl)nc1')))
        self.assertEqual(objs.count(), 1)

Example #21

Source File: neural_fp.py From conv_qsar_fast with MIT License

5 votes

def sizeAttributeVectors(molecular_attributes = False):
	m = AllChem.MolFromSmiles('CC')
	g = molToGraph(m, molecular_attributes = molecular_attributes)
	a = g.nodes[0]
	b = g.edges[0]
	return len(a.attributes), len(b.attributes)

Example #22

Source File: neural_fp.py From conv_qsar_fast with MIT License

5 votes

def sizeAttributeVector(molecular_attributes = False):
	m = AllChem.MolFromSmiles('CC')
	g = molToGraph(m, molecular_attributes = molecular_attributes)
	a = g.nodes[0]
	b = g.edges[0]
	return len(a.attributes) + len(b.attributes)

Example #23

Source File: load_lowe_examples_into_db_details.py From ochem_predict_nn with MIT License

4 votes

def mol_to_dic(node, withAmounts = False):
	'''Converts a node containing molecule information into a
	dictionary'''
	dic = {}
	# Get name
	dic['name'] = str(node.getElementsByTagName('name')[0].firstChild.nodeValue)
	# If exact entity match, more data is available
	#print(node.toprettyxml())
	#entityType = node.getElementsByTagName('dl:entityType')[0].firstChild.nodeValue
	#if entityType == 'exact' or entityType == 'definiteReference':
	identifiers = {
		child.attributes.getNamedItem('dictRef').value : \
		child.attributes.getNamedItem('value').value \
		for child in node.getElementsByTagName('identifier')
	}
	if 'cml:inchi' in identifiers.keys():
		mol = AllChem.MolFromInchi(str(identifiers['cml:inchi']))
	elif 'cml:smiles' in identifiers.keys():
		mol = AllChem.MolFromSmiles(str(identifiers['cml:smiles']))
	else:
		print('identifiers: {}'.format(identifiers.keys()))
		raise ValueError('No molecular identifier for {}'.format(dic['name']))
	if not mol: raise ValueError('Couldnt parse molecule: {}'.format(identifiers))

	Chem.SanitizeMol(mol)
	dic['smiles'] = AllChem.MolToSmiles(mol, isomericSmiles=True)
	dic['inchi'] = AllChem.MolToInchi(mol)
	# elif entityType == 'chemicalClass':
	# 	pass # name is all we get
	# else:
	# 	raise ValueError('Unknown entityType for molecule: {}'.format(entityType))
	# Quantity?
	if withAmounts:
		amounts = {
			child.attributes.getNamedItem('units').value : \
			child.firstChild.nodeValue \
			for child in node.getElementsByTagName('amount')
		}
		if 'unit:percentYield' in amounts.keys():
			dic['yield'] = float(amounts['unit:percentYield'])
		if 'unit:g' in amounts.keys():
			dic['amount(g)'] = float(amounts['unit:g'])
	return dic