Python rdkit.Chem.SDWriter() Examples

The following are 13 code examples of rdkit.Chem.SDWriter(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rdkit.Chem , or try the search function .
Example #1
Source File: converter.py    From 3DGCN with MIT License 7 votes vote down vote up
def rotate_molecule(path, target_path, count=10):
    # Load dataset
    mols = Chem.SDMolSupplier(path)
    rotated_mols = []

    print("Loaded {} Molecules from {}".format(len(mols), path))

    print("Rotating Molecules...")
    for mol in mols:
        for _ in range(count):
            for atom in mol.GetAtoms():
                atom_idx = atom.GetIdx()

                pos = list(mol.GetConformer().GetAtomPosition(atom_idx))
                pos_rotated = np.matmul(random_rotation_matrix(), pos)

                mol.GetConformer().SetAtomPosition(atom_idx, pos_rotated)

            rotated_mols.append(mol)

    w = Chem.SDWriter(target_path)
    for m in rotated_mols:
        if m is not None:
            w.write(m)
    print("Saved {} Molecules to {}".format(len(rotated_mols), target_path)) 
Example #2
Source File: operations.py    From ScaffoldGraph with MIT License 6 votes vote down vote up
def __init__(self, args):

        self.args = args
        self.inputs = args.input

        if args.sdf:
            rdlogger.setLevel(4)
            self.output = SDWriter(args.output)
        else:
            self.output = open(args.output, 'w')

        self.mol_map = open(args.map_mols, 'w') if args.map_mols else None
        if self.mol_map:
            self.mol_map.write('MOLECULE_ID\tSCAFFOLD_ID\n')

        self.ann_map = open(args.map_annotations, 'w') if args.map_annotations else None
        if self.ann_map:
            self.ann_map.write('SCAFFOLD_ID\tANNOTATIONS\n')

        self.current_id = 0
        self.duplicates = 0
        self.table = {} 
Example #3
Source File: sdf.py    From ScaffoldGraph with MIT License 6 votes vote down vote up
def write_sdf_file(scaffold_graph, output_file):
    """Write an SDF file from a scaffoldgraph

    Parameters
    ----------
    scaffold_graph (sg.ScaffoldGraph): graph to be converted
    output_file (str): path to output file
    """

    N = scaffold_graph.num_scaffold_nodes
    sorted_scaffolds = sorted(scaffold_graph.get_scaffold_nodes(data=True), key=lambda x: x[1]['hierarchy'])
    mapping = dict(zip([s[0] for s in sorted_scaffolds], range(0, N)))
    writer = SDWriter(output_file)
    for scaffold, data in sorted_scaffolds:
        molecule = MolFromSmiles(scaffold)
        if molecule is not None:
            subscaffolds = list(scaffold_graph.predecessors(scaffold))
            molecule.SetProp('_Name', mapping[scaffold])
            molecule.SetIntProp('HIERARCHY', scaffold_graph.nodes[scaffold]['HIERARCHY'])
            molecule.SetProp('SMILES', scaffold)
            molecule.SetProp('SUBSCAFFOLDS', ', '.join([str(mapping[s]) for s in subscaffolds]))
            writer.write(molecule)
    writer.close() 
Example #4
Source File: rdk.py    From oddt with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, format, filename, overwrite=False):
        self.format = format
        self.filename = filename
        if not overwrite and os.path.isfile(self.filename):
            raise IOError("%s already exists. Use 'overwrite=True' to overwrite it." % self.filename)
        if format == "sdf":
            self._writer = Chem.SDWriter(self.filename)
        elif format == "smi":
            self._writer = Chem.SmilesWriter(self.filename, isomericSmiles=True, includeHeader=False)
        elif format in ('inchi', 'inchikey') and Chem.INCHI_AVAILABLE:
            self._writer = open(filename, 'w')
        elif format in ('mol2', 'pdbqt'):
            self._writer = gzip.open(filename, 'w') if filename.split('.')[-1] == 'gz' else open(filename, 'w')
        elif format == "pdb":
            self._writer = Chem.PDBWriter(self.filename)
        else:
            raise ValueError("%s is not a recognised RDKit format" % format)
        self.total = 0  # The total number of molecules written to the file 
Example #5
Source File: dataset.py    From 3DGCN with MIT License 5 votes vote down vote up
def save_dataset(self, path, pred=None, target="test", filename=None):
        mols = []
        for idx, (x, c, y) in enumerate(zip(self.x[target], self.c[target], self.y[target])):
            x.SetProp("true", str(y * self.std + self.mean))
            if pred is not None:
                x.SetProp("pred", str(pred[idx][0] * self.std + self.mean))
            mols.append(x)

        if filename is not None:
            w = Chem.SDWriter(path + filename + ".sdf")
        else:
            w = Chem.SDWriter(path + target + ".sdf")
        for mol in mols:
            if mol is not None:
                w.write(mol) 
Example #6
Source File: scatter_plot.py    From 3DGCN with MIT License 5 votes vote down vote up
def find_confusion(dataset, base_path):
    for i in range(1, 11):
        path = base_path + "trial_{}/".format(i)

        # Load true, pred value
        true_y, pred_y, diff_y = [], [], []

        mols = Chem.SDMolSupplier(path + "test.sdf")
        for mol in mols:
            diff_y.append(float(mol.GetProp("true")) - float(mol.GetProp("pred")))

        diff_y = np.array(diff_y, dtype=float)

        # Find largest, smallest error molecules
        idx = np.argsort(diff_y)
        top_1 = mols[int(idx[-1])]
        top_2 = mols[int(idx[-2])]
        btm_1 = mols[int(idx[0])]
        btm_2 = mols[int(idx[1])]

        best_idx = np.argsort(np.abs(diff_y))
        best = mols[int(best_idx[0])]

        # Save example molecules
        writer = Chem.SDWriter(path + "confusion_examples_" + dataset + "_trial" + str(i) + ".sdf")
        for mol in [top_1, top_2, btm_1, btm_2, best]:
            writer.write(mol) 
Example #7
Source File: operations.py    From ScaffoldGraph with MIT License 5 votes vote down vote up
def __init__(self, args):

        self.args = args
        self.q_input = args.input_query
        self.g_input = open(args.input_graph, 'r')

        if args.sdf:
            rdlogger.setLevel(4)
            self.output = SDWriter(args.output)
        else:
            self.output = open(args.output, 'w')

        self.query = set()
        self.matching_parents = set()
        self.count = 0 
Example #8
Source File: __init__.py    From ScaffoldGraph with MIT License 5 votes vote down vote up
def mock_sdf(tmp_path):
    d = tmp_path / "test_data"
    d.mkdir()
    p = d / "test.sdf"
    writer = Chem.SDWriter(str(p))
    writer.write(Chem.MolFromSmiles('CN1C(=O)CN=C(C2=C1C=CC(=C2)Cl)C3=CC=CC=C3'))
    writer.write(Chem.MolFromSmiles('CCC1=CC2=C(S1)N(C(=O)CN=C2C3=CC=CC=C3Cl)C'))
    writer.close()
    return str(p) 
Example #9
Source File: __init__.py    From ScaffoldGraph with MIT License 5 votes vote down vote up
def mock_sdf_2(tmp_path):
    d = tmp_path / "test_data"
    try:
        d.mkdir()
    except FileExistsError:
        pass
    p = d / "test_2.sdf"
    writer = Chem.SDWriter(str(p))
    writer.write(Chem.MolFromSmiles('C1C(=O)NC2=C(C=C(C=C2)Br)C(=N1)C3=CC=CC=N3'))
    writer.write(Chem.MolFromSmiles('CC1=NN(C2=C1C(=NCC(=O)N2C)C3=CC=CC=C3F)C'))
    writer.close()
    return str(p) 
Example #10
Source File: test_sdf_file_parser.py    From chainer-chemistry with MIT License 5 votes vote down vote up
def sdf_file(tmpdir, mols):
    # Chem.AllChem.Compute2DCoords(mol1)
    fname = os.path.join(str(tmpdir), 'test.sdf')
    writer = Chem.SDWriter(fname)
    for mol in mols:
        writer.write(mol)
    return fname 
Example #11
Source File: test_sdf_file_parser.py    From chainer-chemistry with MIT License 5 votes vote down vote up
def sdf_file_long(tmpdir):
    """SDFFile with long smiles (ccc...)"""
    fname = os.path.join(str(tmpdir), 'test_long.sdf')
    writer = Chem.SDWriter(fname)
    for smiles in ['CCCCCCCCCCCC', 'CN=C=O', 'CCCCCCCCCCCCCCCC',
                   'Cc1ccccc1', 'CC1=CC2CC(CC1)O2']:
        mol = Chem.MolFromSmiles(smiles)
        writer.write(mol)
    return fname 
Example #12
Source File: converter.py    From 3DGCN with MIT License 4 votes vote down vote up
def converter(path, target_path, name, target_name, process=20):
    # Load dataset
    print("Loading Dataset...")
    if ".csv" in path:
        x, y = load_csv(path, name, target_name)
        mols, props = [], []
        for smi, prop in zip(x, y):
            mol = Chem.MolFromSmiles(smi)
            if mol is not None:
                mols.append(mol)
                props.append(prop)
        mol_idx = list(range(len(mols)))

    elif ".sdf" in path:
        mols = Chem.SDMolSupplier(path)

        props = []
        for mol in mols:
            props.append(mol.GetProp(target_name))
        mol_idx = list(range(len(mols)))

    else:
        raise ValueError("Unsupported file type.")
    print("Loaded {} Molecules from {}".format(len(mols), path))

    # Optimize coordinate using multiprocessing
    print("Optimizing Conformers...")
    pool = mp.Pool(process)
    results = pool.starmap(optimize_conformer, zip(mol_idx, mols, props))

    # Collect results
    mol_list, prop_list = [], []
    for mol, prop in results:
        mol_list.append(mol)
        prop_list.append(prop)

    # Remove None and add properties
    mol_list_filtered = []
    for mol, prop in zip(mol_list, prop_list):
        if mol is not None:
            mol.SetProp("target", str(prop))
            mol_list_filtered.append(mol)
    print("{} Molecules Optimized".format(len(mol_list_filtered)))

    # Save molecules
    print("Saving File...")
    w = Chem.SDWriter(target_path)
    for m in mol_list_filtered:
        w.write(m)
    print("Saved {} Molecules to {}".format(len(mol_list_filtered), target_path)) 
Example #13
Source File: rdkit_util.py    From deepchem with MIT License 4 votes vote down vote up
def write_molecule(mol, outfile, is_protein=False):
  """Write molecule to a file

  This function writes a representation of the provided molecule to
  the specified `outfile`. Doesn't return anything.

  Parameters
  ----------
  mol: rdkit Mol
    Molecule to write
  outfile: str
    Filename to write mol to
  is_protein: bool, optional
    Is this molecule a protein?

  Note
  ----
  This function requires RDKit to be installed.

  Raises
  ------
  ValueError: if `outfile` isn't of a supported format.
  """
  from rdkit import Chem
  if ".pdbqt" in outfile:
    writer = Chem.PDBWriter(outfile)
    writer.write(mol)
    writer.close()
    if is_protein:
      pdbqt_utils.convert_protein_to_pdbqt(mol, outfile)
    else:
      pdbqt_utils.convert_mol_to_pdbqt(mol, outfile)
  elif ".pdb" in outfile:
    writer = Chem.PDBWriter(outfile)
    writer.write(mol)
    writer.close()
  elif ".sdf" in outfile:
    writer = Chem.SDWriter(outfile)
    writer.write(mol)
    writer.close()
  else:
    raise ValueError("Unsupported Format")